Struct encode_unicode::Utf8Char[][src]

pub struct Utf8Char { /* fields omitted */ }

An unicode codepoint stored as UTF-8.

It can be borrowed as a str, and has the same size as char.

Implementations

impl Utf8Char[src]

pub fn from_str_start(src: &str) -> Result<(Self, usize), EmptyStrError>[src]

Create an Utf8Char from the first codepoint in a str.

Returns an error if the str is empty.

Examples

use encode_unicode::Utf8Char;

assert_eq!(Utf8Char::from_str_start("a"), Ok((Utf8Char::from('a'),1)));
assert_eq!(Utf8Char::from_str_start("ab"), Ok((Utf8Char::from('a'),1)));
assert_eq!(Utf8Char::from_str_start("🂠 "), Ok((Utf8Char::from('🂠'),4)));
assert_eq!(Utf8Char::from_str_start("é"), Ok((Utf8Char::from('e'),1)));// 'e'+u301 combining mark
assert!(Utf8Char::from_str_start("").is_err());

pub fn from_slice_start(src: &[u8]) -> Result<(Self, usize), InvalidUtf8Slice>[src]

Create an Utf8Char of the first codepoint in an UTF-8 slice.
Also returns the length of the UTF-8 sequence for the codepoint.

If the slice is from a str, use ::from_str_start() to skip UTF-8 validation.

Errors

Returns an Err if the slice is empty, doesn’t start with a valid UTF-8 sequence or is too short for the sequence.

Examples

use encode_unicode::Utf8Char;
use encode_unicode::error::InvalidUtf8Slice::*;
use encode_unicode::error::InvalidUtf8::*;

assert_eq!(Utf8Char::from_slice_start(&[b'A', b'B', b'C']), Ok((Utf8Char::from('A'),1)));
assert_eq!(Utf8Char::from_slice_start(&[0xdd, 0xbb]), Ok((Utf8Char::from('\u{77b}'),2)));

assert_eq!(Utf8Char::from_slice_start(&[]), Err(TooShort(1)));
assert_eq!(Utf8Char::from_slice_start(&[0xf0, 0x99]), Err(TooShort(4)));
assert_eq!(Utf8Char::from_slice_start(&[0xee, b'F', 0x80]), Err(Utf8(NotAContinuationByte(1))));
assert_eq!(Utf8Char::from_slice_start(&[0xee, 0x99, 0x0f]), Err(Utf8(NotAContinuationByte(2))));

pub unsafe fn from_slice_start_unchecked(src: &[u8]) -> (Self, usize)[src]

A from_slice_start() that doesn’t validate the codepoint.

Safety

The slice must be non-empty and start with a valid UTF-8 codepoint.
Invalid or incomplete values might cause reads of uninitalized memory.

pub fn from_array(utf8: [u8; 4]) -> Result<Self, InvalidUtf8Array>[src]

Create an Utf8Char from a byte array after validating it.

The codepoint must start at the first byte.
Unused bytes are set to zero by this function and so can be anything.

Errors

Returns an Err if the array doesn’t start with a valid UTF-8 sequence.

Examples

use encode_unicode::Utf8Char;
use encode_unicode::error::InvalidUtf8Array::*;
use encode_unicode::error::InvalidUtf8::*;
use encode_unicode::error::InvalidCodepoint::*;

assert_eq!(Utf8Char::from_array([b'A', 0, 0, 0]), Ok(Utf8Char::from('A')));
assert_eq!(Utf8Char::from_array([0xf4, 0x8b, 0xbb, 0xbb]), Ok(Utf8Char::from('\u{10befb}')));
assert_eq!(Utf8Char::from_array([b'A', b'B', b'C', b'D']), Ok(Utf8Char::from('A')));
assert_eq!(Utf8Char::from_array([0, 0, 0xcc, 0xbb]), Ok(Utf8Char::from('\0')));

assert_eq!(Utf8Char::from_array([0xef, b'F', 0x80, 0x80]), Err(Utf8(NotAContinuationByte(1))));
assert_eq!(Utf8Char::from_array([0xc1, 0x80, 0, 0]), Err(Utf8(OverLong)));
assert_eq!(Utf8Char::from_array([0xf7, 0xaa, 0x99, 0x88]), Err(Codepoint(TooHigh)));

pub unsafe fn from_array_unchecked(utf8: [u8; 4]) -> Self[src]

Zero-cost constructor.

Safety

Must contain a valid codepoint starting at the first byte, with the unused bytes zeroed.
Bad values can easily lead to undefined behavior.

pub fn from_ascii(ascii: u8) -> Result<Self, NonAsciiError>[src]

Create an Utf8Char from a single byte.

The byte must be an ASCII character.

Errors

Returns NonAsciiError if the byte greater than 127.

Examples

assert_eq!(Utf8Char::from_ascii(b'a').unwrap(), 'a');
assert!(Utf8Char::from_ascii(128).is_err());

pub unsafe fn from_ascii_unchecked(ascii: u8) -> Self[src]

Create an Utf8Char from a single byte without checking that it’s a valid codepoint on its own, which is only true for ASCII characters.

Safety

The byte must be less than 128.

pub fn len(self) -> usize[src]

The number of bytes this character needs.

Is between 1 and 4 (inclusive) and identical to .as_ref().len() or .as_char().len_utf8().

pub fn is_ascii(&self) -> bool[src]

Checks that the codepoint is an ASCII character.

pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool[src]

Checks that two characters are an ASCII case-insensitive match.

Is equivalent to a.to_ascii_lowercase() == b.to_ascii_lowercase().

pub fn to_ascii_uppercase(&self) -> Self[src]

Converts the character to its ASCII upper case equivalent.

ASCII letters ‘a’ to ‘z’ are mapped to ‘A’ to ‘Z’, but non-ASCII letters are unchanged.

pub fn to_ascii_lowercase(&self) -> Self[src]

Converts the character to its ASCII lower case equivalent.

ASCII letters ‘A’ to ‘Z’ are mapped to ‘a’ to ‘z’, but non-ASCII letters are unchanged.

pub fn make_ascii_uppercase(&mut self)[src]

Converts the character to its ASCII upper case equivalent in-place.

ASCII letters ‘a’ to ‘z’ are mapped to ‘A’ to ‘Z’, but non-ASCII letters are unchanged.

pub fn make_ascii_lowercase(&mut self)[src]

Converts the character to its ASCII lower case equivalent in-place.

ASCII letters ‘A’ to ‘Z’ are mapped to ‘a’ to ‘z’, but non-ASCII letters are unchanged.

pub fn to_char(self) -> char[src]

Convert from UTF-8 to UTF-32

pub fn to_slice(self, dst: &mut [u8]) -> usize[src]

Write the internal representation to a slice, and then returns the number of bytes written.

Panics

Will panic the buffer is too small; You can get the required length from .len(), but a buffer of length four is always large enough.

pub fn to_array(self) -> ([u8; 4], usize)[src]

Expose the internal array and the number of used bytes.

pub fn as_str(&self) -> &str[src]

Return a str view of the array the codepoint is stored as.

Is an unambiguous version of .as_ref().

Trait Implementations

impl AsRef<[u8]> for Utf8Char[src]

impl AsRef<str> for Utf8Char[src]

impl AsciiExt for Utf8Char[src]

type Owned = Utf8Char

👎 Deprecated since 1.26.0:

use inherent methods instead

Container type for copied ASCII characters.

impl Borrow<[u8]> for Utf8Char[src]

impl Borrow<str> for Utf8Char[src]

impl Clone for Utf8Char[src]

impl Copy for Utf8Char[src]

impl Debug for Utf8Char[src]

impl Default for Utf8Char[src]

impl Deref for Utf8Char[src]

type Target = str

The resulting type after dereferencing.

impl Display for Utf8Char[src]

impl Eq for Utf8Char[src]

impl<'a> Extend<&'a Utf8Char> for Vec<u8>[src]

impl<'a> Extend<&'a Utf8Char> for String[src]

impl Extend<Utf8Char> for Vec<u8>[src]

impl Extend<Utf8Char> for String[src]

impl From<Utf16Char> for Utf8Char[src]

impl From<Utf8Char> for char[src]

impl From<Utf8Char> for Utf8Iterator[src]

impl From<Utf8Char> for Utf16Char[src]

impl From<char> for Utf8Char[src]

impl<'a> FromIterator<&'a Utf8Char> for String[src]

impl<'a> FromIterator<&'a Utf8Char> for Vec<u8>[src]

impl FromIterator<Utf8Char> for String[src]

impl FromIterator<Utf8Char> for Vec<u8>[src]

impl FromStr for Utf8Char[src]

type Err = FromStrError

The associated error which can be returned from parsing.

fn from_str(s: &str) -> Result<Self, FromStrError>[src]

Create an Utf8Char from a string slice. The string must contain exactly one codepoint.

Examples

use encode_unicode::error::FromStrError::*;
use encode_unicode::Utf8Char;
use std::str::FromStr;

assert_eq!(Utf8Char::from_str("a"), Ok(Utf8Char::from('a')));
assert_eq!(Utf8Char::from_str("🂠"), Ok(Utf8Char::from('🂠')));
assert_eq!(Utf8Char::from_str(""), Err(Empty));
assert_eq!(Utf8Char::from_str("ab"), Err(MultipleCodepoints));
assert_eq!(Utf8Char::from_str("é"), Err(MultipleCodepoints));// 'e'+u301 combining mark

impl Hash for Utf8Char[src]

impl IntoIterator for Utf8Char[src]

type Item = u8

The type of the elements being iterated over.

type IntoIter = Utf8Iterator

Which kind of iterator are we turning this into?

fn into_iter(self) -> Utf8Iterator

Notable traits for Utf8Iterator

impl Iterator for Utf8Iterator type Item = u8;impl Read for Utf8Iterator
[src]

Iterate over the byte values.

impl Ord for Utf8Char[src]

impl PartialEq<Utf16Char> for Utf8Char[src]

impl PartialEq<Utf8Char> for Utf8Char[src]

impl PartialEq<Utf8Char> for char[src]

impl PartialEq<Utf8Char> for Utf16Char[src]

impl PartialEq<char> for Utf8Char[src]

impl PartialEq<u8> for Utf8Char[src]

Only considers the byte equal if both it and the Utf8Char represents ASCII characters.

There is no impl in the opposite direction, as this should only be used to compare Utf8Chars against constants.

Examples

assert!(Utf8Char::from('8') == b'8');
assert!(Utf8Char::from_array([0xf1,0x80,0x80,0x80]).unwrap() != 0xf1);
assert!(Utf8Char::from('\u{ff}') != 0xff);
assert!(Utf8Char::from('\u{80}') != 0x80);

impl PartialOrd<Utf16Char> for Utf8Char[src]

impl PartialOrd<Utf8Char> for Utf8Char[src]

impl PartialOrd<Utf8Char> for char[src]

impl PartialOrd<Utf8Char> for Utf16Char[src]

impl PartialOrd<char> for Utf8Char[src]

impl StructuralEq for Utf8Char[src]

impl StructuralPartialEq for Utf8Char[src]

Auto Trait Implementations

impl RefUnwindSafe for Utf8Char

impl Send for Utf8Char

impl Sync for Utf8Char

impl Unpin for Utf8Char

impl UnwindSafe for Utf8Char

Blanket Implementations

impl<T> Any for T where
    T: 'static + ?Sized
[src]

impl<T> Borrow<T> for T where
    T: ?Sized
[src]

impl<T> BorrowMut<T> for T where
    T: ?Sized
[src]

impl<T> From<T> for T[src]

impl<T, U> Into<U> for T where
    U: From<T>, 
[src]

impl<T> ToOwned for T where
    T: Clone
[src]

type Owned = T

The resulting type after obtaining ownership.

impl<T> ToString for T where
    T: Display + ?Sized
[src]

impl<T, U> TryFrom<U> for T where
    U: Into<T>, 
[src]

type Error = Infallible

The type returned in the event of a conversion error.

impl<T, U> TryInto<U> for T where
    U: TryFrom<T>, 
[src]

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.