Struct encode_unicode::Utf8Char [−][src]
An unicode codepoint stored as UTF-8.
It can be borrowed as a str
, and has the same size as char
.
Implementations
impl Utf8Char
[src][−]
pub fn from_str_start(src: &str) -> Result<(Self, usize), EmptyStrError>
[src][−]
Create an Utf8Char
from the first codepoint in a str
.
Returns an error if the str
is empty.
Examples
use encode_unicode::Utf8Char; assert_eq!(Utf8Char::from_str_start("a"), Ok((Utf8Char::from('a'),1))); assert_eq!(Utf8Char::from_str_start("ab"), Ok((Utf8Char::from('a'),1))); assert_eq!(Utf8Char::from_str_start("🂠 "), Ok((Utf8Char::from('🂠'),4))); assert_eq!(Utf8Char::from_str_start("é"), Ok((Utf8Char::from('e'),1)));// 'e'+u301 combining mark assert!(Utf8Char::from_str_start("").is_err());
pub fn from_slice_start(src: &[u8]) -> Result<(Self, usize), InvalidUtf8Slice>
[src][−]
Create an Utf8Char
of the first codepoint in an UTF-8 slice.
Also returns the length of the UTF-8 sequence for the codepoint.
If the slice is from a str
, use ::from_str_start()
to skip UTF-8 validation.
Errors
Returns an Err
if the slice is empty, doesn’t start with a valid
UTF-8 sequence or is too short for the sequence.
Examples
use encode_unicode::Utf8Char; use encode_unicode::error::InvalidUtf8Slice::*; use encode_unicode::error::InvalidUtf8::*; assert_eq!(Utf8Char::from_slice_start(&[b'A', b'B', b'C']), Ok((Utf8Char::from('A'),1))); assert_eq!(Utf8Char::from_slice_start(&[0xdd, 0xbb]), Ok((Utf8Char::from('\u{77b}'),2))); assert_eq!(Utf8Char::from_slice_start(&[]), Err(TooShort(1))); assert_eq!(Utf8Char::from_slice_start(&[0xf0, 0x99]), Err(TooShort(4))); assert_eq!(Utf8Char::from_slice_start(&[0xee, b'F', 0x80]), Err(Utf8(NotAContinuationByte(1)))); assert_eq!(Utf8Char::from_slice_start(&[0xee, 0x99, 0x0f]), Err(Utf8(NotAContinuationByte(2))));
pub unsafe fn from_slice_start_unchecked(src: &[u8]) -> (Self, usize)
[src][−]
A from_slice_start()
that doesn’t validate the codepoint.
Safety
The slice must be non-empty and start with a valid UTF-8 codepoint.
Invalid or incomplete values might cause reads of uninitalized memory.
pub fn from_array(utf8: [u8; 4]) -> Result<Self, InvalidUtf8Array>
[src][−]
Create an Utf8Char
from a byte array after validating it.
The codepoint must start at the first byte.
Unused bytes are set to zero by this function and so can be anything.
Errors
Returns an Err
if the array doesn’t start with a valid UTF-8 sequence.
Examples
use encode_unicode::Utf8Char; use encode_unicode::error::InvalidUtf8Array::*; use encode_unicode::error::InvalidUtf8::*; use encode_unicode::error::InvalidCodepoint::*; assert_eq!(Utf8Char::from_array([b'A', 0, 0, 0]), Ok(Utf8Char::from('A'))); assert_eq!(Utf8Char::from_array([0xf4, 0x8b, 0xbb, 0xbb]), Ok(Utf8Char::from('\u{10befb}'))); assert_eq!(Utf8Char::from_array([b'A', b'B', b'C', b'D']), Ok(Utf8Char::from('A'))); assert_eq!(Utf8Char::from_array([0, 0, 0xcc, 0xbb]), Ok(Utf8Char::from('\0'))); assert_eq!(Utf8Char::from_array([0xef, b'F', 0x80, 0x80]), Err(Utf8(NotAContinuationByte(1)))); assert_eq!(Utf8Char::from_array([0xc1, 0x80, 0, 0]), Err(Utf8(OverLong))); assert_eq!(Utf8Char::from_array([0xf7, 0xaa, 0x99, 0x88]), Err(Codepoint(TooHigh)));
pub unsafe fn from_array_unchecked(utf8: [u8; 4]) -> Self
[src][−]
Zero-cost constructor.
Safety
Must contain a valid codepoint starting at the first byte, with the
unused bytes zeroed.
Bad values can easily lead to undefined behavior.
pub fn from_ascii(ascii: u8) -> Result<Self, NonAsciiError>
[src][−]
Create an Utf8Char
from a single byte.
The byte must be an ASCII character.
Errors
Returns NonAsciiError
if the byte greater than 127.
Examples
assert_eq!(Utf8Char::from_ascii(b'a').unwrap(), 'a'); assert!(Utf8Char::from_ascii(128).is_err());
pub unsafe fn from_ascii_unchecked(ascii: u8) -> Self
[src][−]
Create an Utf8Char
from a single byte without checking that it’s a
valid codepoint on its own, which is only true for ASCII characters.
Safety
The byte must be less than 128.
pub fn len(self) -> usize
[src][−]
The number of bytes this character needs.
Is between 1 and 4 (inclusive) and identical to .as_ref().len()
or
.as_char().len_utf8()
.
pub fn is_ascii(&self) -> bool
[src][−]
Checks that the codepoint is an ASCII character.
pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool
[src][−]
Checks that two characters are an ASCII case-insensitive match.
Is equivalent to a.to_ascii_lowercase() == b.to_ascii_lowercase()
.
pub fn to_ascii_uppercase(&self) -> Self
[src][−]
Converts the character to its ASCII upper case equivalent.
ASCII letters ‘a’ to ‘z’ are mapped to ‘A’ to ‘Z’, but non-ASCII letters are unchanged.
pub fn to_ascii_lowercase(&self) -> Self
[src][−]
Converts the character to its ASCII lower case equivalent.
ASCII letters ‘A’ to ‘Z’ are mapped to ‘a’ to ‘z’, but non-ASCII letters are unchanged.
pub fn make_ascii_uppercase(&mut self)
[src][−]
Converts the character to its ASCII upper case equivalent in-place.
ASCII letters ‘a’ to ‘z’ are mapped to ‘A’ to ‘Z’, but non-ASCII letters are unchanged.
pub fn make_ascii_lowercase(&mut self)
[src][−]
Converts the character to its ASCII lower case equivalent in-place.
ASCII letters ‘A’ to ‘Z’ are mapped to ‘a’ to ‘z’, but non-ASCII letters are unchanged.
pub fn to_char(self) -> char
[src][−]
Convert from UTF-8 to UTF-32
pub fn to_slice(self, dst: &mut [u8]) -> usize
[src][−]
Write the internal representation to a slice, and then returns the number of bytes written.
Panics
Will panic the buffer is too small;
You can get the required length from .len()
,
but a buffer of length four is always large enough.
pub fn to_array(self) -> ([u8; 4], usize)
[src][−]
Expose the internal array and the number of used bytes.
pub fn as_str(&self) -> &str
[src][−]
Return a str
view of the array the codepoint is stored as.
Is an unambiguous version of .as_ref()
.
Trait Implementations
impl AsRef<[u8]> for Utf8Char
[src][+]
impl AsRef<str> for Utf8Char
[src][+]
impl AsciiExt for Utf8Char
[src][+]
impl Borrow<[u8]> for Utf8Char
[src][+]
impl Borrow<str> for Utf8Char
[src][+]
impl Clone for Utf8Char
[src][+]
impl Copy for Utf8Char
[src]
impl Debug for Utf8Char
[src][+]
impl Default for Utf8Char
[src][+]
impl Deref for Utf8Char
[src][+]
impl Display for Utf8Char
[src][+]
impl Eq for Utf8Char
[src]
impl<'a> Extend<&'a Utf8Char> for Vec<u8>
[src][+]
impl<'a> Extend<&'a Utf8Char> for String
[src][+]
impl Extend<Utf8Char> for Vec<u8>
[src][+]
impl Extend<Utf8Char> for String
[src][+]
impl From<Utf16Char> for Utf8Char
[src][+]
impl From<Utf8Char> for char
[src][+]
impl From<Utf8Char> for Utf8Iterator
[src][+]
impl From<Utf8Char> for Utf16Char
[src][+]
impl From<char> for Utf8Char
[src][+]
impl<'a> FromIterator<&'a Utf8Char> for String
[src][+]
impl<'a> FromIterator<&'a Utf8Char> for Vec<u8>
[src][+]
impl FromIterator<Utf8Char> for String
[src][+]
impl FromIterator<Utf8Char> for Vec<u8>
[src][+]
impl FromStr for Utf8Char
[src][+]
impl Hash for Utf8Char
[src][+]
impl IntoIterator for Utf8Char
[src][+]
impl Ord for Utf8Char
[src][+]
impl PartialEq<Utf16Char> for Utf8Char
[src][+]
impl PartialEq<Utf8Char> for Utf8Char
[src][+]
impl PartialEq<Utf8Char> for char
[src][+]
impl PartialEq<Utf8Char> for Utf16Char
[src][+]
impl PartialEq<char> for Utf8Char
[src][+]
impl PartialEq<u8> for Utf8Char
[src][+]
impl PartialOrd<Utf16Char> for Utf8Char
[src][+]
impl PartialOrd<Utf8Char> for Utf8Char
[src][+]
impl PartialOrd<Utf8Char> for char
[src][+]
impl PartialOrd<Utf8Char> for Utf16Char
[src][+]
impl PartialOrd<char> for Utf8Char
[src][+]
impl StructuralEq for Utf8Char
[src]
impl StructuralPartialEq for Utf8Char
[src]
Auto Trait Implementations
impl RefUnwindSafe for Utf8Char
impl Send for Utf8Char
impl Sync for Utf8Char
impl Unpin for Utf8Char
impl UnwindSafe for Utf8Char
Blanket Implementations
impl<T> Any for T where
T: 'static + ?Sized,
[src][+]
T: 'static + ?Sized,
impl<T> Borrow<T> for T where
T: ?Sized,
[src][+]
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
[src][+]
T: ?Sized,
impl<T> From<T> for T
[src][+]
impl<T, U> Into<U> for T where
U: From<T>,
[src][+]
U: From<T>,
impl<T> ToOwned for T where
T: Clone,
[src][+]
T: Clone,
impl<T> ToString for T where
T: Display + ?Sized,
[src][+]
T: Display + ?Sized,
impl<T, U> TryFrom<U> for T where
U: Into<T>,
[src][+]
U: Into<T>,
impl<T, U> TryInto<U> for T where
U: TryFrom<T>,
[src][+]
U: TryFrom<T>,