Trait encode_unicode::CharExt [−][src]
Extension trait for char
that adds methods for converting to and from UTF-8 or UTF-16.
Required methods
fn to_utf8(self) -> Utf8Char
[src]
Get the UTF-8 representation of this codepoint.
Utf8Char
is to [u8;4]
what char
is to u32
:
a restricted type that cannot be mutated internally.
fn to_utf16(self) -> Utf16Char
[src]
Get the UTF-16 representation of this codepoint.
Utf16Char
is to [u16;2]
what char
is to u32
:
a restricted type that cannot be mutated internally.
fn iter_utf8_bytes(self) -> Utf8IteratorⓘNotable traits for Utf8Iterator
impl Iterator for Utf8Iterator type Item = u8;impl Read for Utf8Iterator
[src]
Notable traits for Utf8Iterator
impl Iterator for Utf8Iterator type Item = u8;impl Read for Utf8Iterator
Iterate over or read the one to four bytes in the UTF-8 representation of this codepoint.
An identical alternative to the unstable char.encode_utf8()
.
That method somehow still exist on stable, so I have to use a different name.
fn iter_utf16_units(self) -> Utf16IteratorⓘNotable traits for Utf16Iterator
impl Iterator for Utf16Iterator type Item = u16;
[src]
Notable traits for Utf16Iterator
impl Iterator for Utf16Iterator type Item = u16;
Iterate over the one or two units in the UTF-16 representation of this codepoint.
An identical alternative to the unstable char.encode_utf16()
.
That method somehow still exist on stable, so I have to use a different name.
fn to_utf8_array(self) -> ([u8; 4], usize)
[src]
Convert this char to an UTF-8 array, and also return how many bytes of the array are used,
The returned array is left-aligned with unused bytes set to zero.
fn to_utf16_array(self) -> [u16; 2]
[src]
Convert this char
to UTF-16.
The second element is non-zero when a surrogate pair is required.
Examples
use encode_unicode::CharExt; assert_eq!('@'.to_utf16_array(), ['@' as u16, 0]); assert_eq!('睷'.to_utf16_array(), ['睷' as u16, 0]); assert_eq!('\u{abcde}'.to_utf16_array(), [0xda6f, 0xdcde]);
fn to_utf16_tuple(self) -> (u16, Option<u16>)
[src]
Convert this char
to UTF-16.
The second item is Some
if a surrogate pair is required.
Examples
use encode_unicode::CharExt; assert_eq!('@'.to_utf16_tuple(), ('@' as u16, None)); assert_eq!('睷'.to_utf16_tuple(), ('睷' as u16, None)); assert_eq!('\u{abcde}'.to_utf16_tuple(), (0xda6f, Some(0xdcde)));
fn from_utf8_slice_start(src: &[u8]) -> Result<(Self, usize), InvalidUtf8Slice>
[src]
Create a char
from the start of an UTF-8 slice,
and also return how many bytes were used.
Errors
Returns an Err
if the slice is empty, doesn’t start with a valid
UTF-8 sequence or is too short for the sequence.
Examples
use encode_unicode::CharExt; use encode_unicode::error::InvalidUtf8Slice::*; use encode_unicode::error::InvalidUtf8::*; assert_eq!(char::from_utf8_slice_start(&[b'A', b'B', b'C']), Ok(('A',1))); assert_eq!(char::from_utf8_slice_start(&[0xdd, 0xbb]), Ok(('\u{77b}',2))); assert_eq!(char::from_utf8_slice_start(&[]), Err(TooShort(1))); assert_eq!(char::from_utf8_slice_start(&[0xf0, 0x99]), Err(TooShort(4))); assert_eq!(char::from_utf8_slice_start(&[0xee, b'F', 0x80]), Err(Utf8(NotAContinuationByte(1)))); assert_eq!(char::from_utf8_slice_start(&[0xee, 0x99, 0x0f]), Err(Utf8(NotAContinuationByte(2))));
fn from_utf16_slice_start(
src: &[u16]
) -> Result<(Self, usize), InvalidUtf16Slice>
[src]
src: &[u16]
) -> Result<(Self, usize), InvalidUtf16Slice>
Create a char
from the start of an UTF-16 slice,
and also return how many units were used.
If you want to continue after an error, continue with the next u16
unit.
fn from_utf8_array(utf8: [u8; 4]) -> Result<Self, InvalidUtf8Array>
[src]
Convert an UTF-8 sequence as returned from .to_utf8_array()
into a char
The codepoint must start at the first byte, and leftover bytes are ignored.
Errors
Returns an Err
if the array doesn’t start with a valid UTF-8 sequence.
Examples
use encode_unicode::CharExt; use encode_unicode::error::InvalidUtf8Array::*; use encode_unicode::error::InvalidUtf8::*; use encode_unicode::error::InvalidCodepoint::*; assert_eq!(char::from_utf8_array([b'A', 0, 0, 0]), Ok('A')); assert_eq!(char::from_utf8_array([0xf4, 0x8b, 0xbb, 0xbb]), Ok('\u{10befb}')); assert_eq!(char::from_utf8_array([b'A', b'B', b'C', b'D']), Ok('A')); assert_eq!(char::from_utf8_array([0, 0, 0xcc, 0xbb]), Ok('\0')); assert_eq!(char::from_utf8_array([0xef, b'F', 0x80, 0x80]), Err(Utf8(NotAContinuationByte(1)))); assert_eq!(char::from_utf8_array([0xc1, 0x80, 0, 0]), Err(Utf8(OverLong))); assert_eq!(char::from_utf8_array([0xf7, 0xaa, 0x99, 0x88]), Err(Codepoint(TooHigh)));
fn from_utf16_array(utf16: [u16; 2]) -> Result<Self, InvalidUtf16Array>
[src]
Convert a UTF-16 pair as returned from .to_utf16_array()
into a char
.
The second element is ignored when not required.
Examples
use encode_unicode::CharExt; use encode_unicode::error::InvalidUtf16Array; assert_eq!(char::from_utf16_array(['x' as u16, 'y' as u16]), Ok('x')); assert_eq!(char::from_utf16_array(['睷' as u16, 0]), Ok('睷')); assert_eq!(char::from_utf16_array([0xda6f, 0xdcde]), Ok('\u{abcde}')); assert_eq!(char::from_utf16_array([0xf111, 0xdbad]), Ok('\u{f111}')); assert_eq!(char::from_utf16_array([0xdaaf, 0xdaaf]), Err(InvalidUtf16Array::SecondIsNotTrailingSurrogate)); assert_eq!(char::from_utf16_array([0xdcac, 0x9000]), Err(InvalidUtf16Array::FirstIsTrailingSurrogate));
fn from_utf16_tuple(
utf16: (u16, Option<u16>)
) -> Result<Self, InvalidUtf16Tuple>
[src]
utf16: (u16, Option<u16>)
) -> Result<Self, InvalidUtf16Tuple>
Convert a UTF-16 pair as returned from .to_utf16_tuple()
into a char
.
unsafe fn from_utf8_exact_slice_unchecked(src: &[u8]) -> Self
[src]
Convert an UTF-8 sequence into a char.
The length of the slice is taken as length of the sequence; it should be 1,2,3 or 4.
Safety
The slice must contain exactly one, valid, UTF-8 sequence.
Passing a slice that produces an invalid codepoint is always undefined behavior; Later checks that the codepoint is valid can be removed by the compiler.
Panics
If the slice is empty
fn from_utf16_array_unchecked(utf16: [u16; 2]) -> Self
[src]
Convert a UTF-16 array as returned from .to_utf16_array()
into a
char
.
This function is safe because it avoids creating invalid codepoints, but the returned value might not be what one expectedd.
Examples
use encode_unicode::CharExt; // starts with a trailing surrogate - converted as if it was a valid // surrogate pair anyway. assert_eq!(char::from_utf16_array_unchecked([0xdbad, 0xf19e]), '\u{fb59e}'); // missing trailing surrogate - ditto assert_eq!(char::from_utf16_array_unchecked([0xd802, 0]), '\u{10800}');
unsafe fn from_utf16_tuple_unchecked(utf16: (u16, Option<u16>)) -> Self
[src]
Convert a UTF-16 tuple as returned from .to_utf16_tuple()
into a char
.
fn from_u32_detailed(c: u32) -> Result<Self, InvalidCodepoint>
[src]
Produces more detailed errors than char::from_u32()
Errors
This function will return an error if
- the value is greater than 0x10ffff
- the value is between 0xd800 and 0xdfff (inclusive)
Examples
use encode_unicode::CharExt; use encode_unicode::error::InvalidCodepoint; assert_eq!(char::from_u32_detailed(0x41), Ok('A')); assert_eq!(char::from_u32_detailed(0x40_00_00), Err(InvalidCodepoint::TooHigh)); assert_eq!(char::from_u32_detailed(0xd951), Err(InvalidCodepoint::Utf16Reserved)); assert_eq!(char::from_u32_detailed(0xdddd), Err(InvalidCodepoint::Utf16Reserved)); assert_eq!(char::from_u32_detailed(0xdd), Ok('Ý')); assert_eq!(char::from_u32_detailed(0x1f331), Ok('🌱'));
Implementations on Foreign Types
impl CharExt for char
[src]
fn to_utf8(self) -> Utf8Char
[src]
fn iter_utf8_bytes(self) -> Utf8IteratorⓘNotable traits for Utf8Iterator
impl Iterator for Utf8Iterator type Item = u8;impl Read for Utf8Iterator
[src]
Notable traits for Utf8Iterator
impl Iterator for Utf8Iterator type Item = u8;impl Read for Utf8Iterator
fn to_utf8_array(self) -> ([u8; 4], usize)
[src]
fn from_utf8_slice_start(src: &[u8]) -> Result<(Self, usize), InvalidUtf8Slice>
[src]
fn from_utf8_array(utf8: [u8; 4]) -> Result<Self, InvalidUtf8Array>
[src]
unsafe fn from_utf8_exact_slice_unchecked(src: &[u8]) -> Self
[src]
fn to_utf16(self) -> Utf16Char
[src]
fn iter_utf16_units(self) -> Utf16IteratorⓘNotable traits for Utf16Iterator
impl Iterator for Utf16Iterator type Item = u16;
[src]
Notable traits for Utf16Iterator
impl Iterator for Utf16Iterator type Item = u16;
fn to_utf16_array(self) -> [u16; 2]
[src]
fn to_utf16_tuple(self) -> (u16, Option<u16>)
[src]
fn from_utf16_slice_start(
src: &[u16]
) -> Result<(Self, usize), InvalidUtf16Slice>
[src]
src: &[u16]
) -> Result<(Self, usize), InvalidUtf16Slice>
fn from_utf16_array(utf16: [u16; 2]) -> Result<Self, InvalidUtf16Array>
[src]
fn from_utf16_tuple(
utf16: (u16, Option<u16>)
) -> Result<Self, InvalidUtf16Tuple>
[src]
utf16: (u16, Option<u16>)
) -> Result<Self, InvalidUtf16Tuple>