Trait encode_unicode::IterExt [−][src]
Iterator methods that convert between u8
s and Utf8Char
or u16
s and Utf16Char
All the iterator adapters also accept iterators that produce references of the type they convert from.
Required methods
fn to_bytes(self) -> Utf8CharSplitter<Self::Item, Self>ⓘNotable traits for Utf8CharSplitter<U, I>
impl<U: Borrow<Utf8Char>, I: Iterator<Item = U>> Iterator for Utf8CharSplitter<U, I> type Item = u8;impl<U: Borrow<Utf8Char>, I: Iterator<Item = U>> Read for Utf8CharSplitter<U, I>
where
Self::Item: Borrow<Utf8Char>,
[src]
Notable traits for Utf8CharSplitter<U, I>
impl<U: Borrow<Utf8Char>, I: Iterator<Item = U>> Iterator for Utf8CharSplitter<U, I> type Item = u8;impl<U: Borrow<Utf8Char>, I: Iterator<Item = U>> Read for Utf8CharSplitter<U, I>
Self::Item: Borrow<Utf8Char>,
Converts an iterator of Utf8Char
s or &Utf8Char
s to an iterator of
u8
s.
Has the same effect as .flat_map()
or .flatten()
, but the returned
iterator is ~40% faster.
The iterator also implements Read
(when the std
feature isn’t disabled).
Reading will never produce an error, and calls to .read()
and .next()
can be mixed.
The exact number of bytes cannot be known in advance, but size_hint()
gives the possible range.
(min: all remaining characters are ASCII, max: all require four bytes)
Examples
From iterator of values:
use encode_unicode::{IterExt, StrExt}; let iterator = "foo".utf8chars(); let mut bytes = [0; 4]; for (u,dst) in iterator.to_bytes().zip(&mut bytes) {*dst=u;} assert_eq!(&bytes, b"foo\0");
From iterator of references:
use encode_unicode::{IterExt, StrExt, Utf8Char}; let chars: Vec<Utf8Char> = "💣 bomb 💣".utf8chars().collect(); let bytes: Vec<u8> = chars.iter().to_bytes().collect(); let flat_map: Vec<u8> = chars.iter().flat_map(|u8c| *u8c ).collect(); assert_eq!(bytes, flat_map);
Read
ing from it:
use encode_unicode::{IterExt, StrExt}; use std::io::Read; let s = "Ååh‽"; assert_eq!(s.len(), 8); let mut buf = [b'E'; 9]; let mut reader = s.utf8chars().to_bytes(); assert_eq!(reader.read(&mut buf[..]).unwrap(), 8); assert_eq!(reader.read(&mut buf[..]).unwrap(), 0); assert_eq!(&buf[..8], s.as_bytes()); assert_eq!(buf[8], b'E');
fn to_units(self) -> Utf16CharSplitter<Self::Item, Self>ⓘNotable traits for Utf16CharSplitter<U, I>
impl<U: Borrow<Utf16Char>, I: Iterator<Item = U>> Iterator for Utf16CharSplitter<U, I> type Item = u16;
where
Self::Item: Borrow<Utf16Char>,
[src]
Notable traits for Utf16CharSplitter<U, I>
impl<U: Borrow<Utf16Char>, I: Iterator<Item = U>> Iterator for Utf16CharSplitter<U, I> type Item = u16;
Self::Item: Borrow<Utf16Char>,
Converts an iterator of Utf16Char
(or &Utf16Char
) to an iterator of
u16
s.
Has the same effect as .flat_map()
or .flatten()
, but the returned
iterator is about twice as fast.
The exact number of units cannot be known in advance, but size_hint()
gives the possible range.
Examples
From iterator of values:
use encode_unicode::{IterExt, StrExt}; let iterator = "foo".utf16chars(); let mut units = [0; 4]; for (u,dst) in iterator.to_units().zip(&mut units) {*dst=u;} assert_eq!(units, ['f' as u16, 'o' as u16, 'o' as u16, 0]);
From iterator of references:
use encode_unicode::{IterExt, StrExt, Utf16Char}; // (💣 takes two units) let chars: Vec<Utf16Char> = "💣 bomb 💣".utf16chars().collect(); let units: Vec<u16> = chars.iter().to_units().collect(); let flat_map: Vec<u16> = chars.iter().flat_map(|u16c| *u16c ).collect(); assert_eq!(units, flat_map);
fn to_utf8chars(self) -> Utf8CharMerger<Self::Item, Self>ⓘNotable traits for Utf8CharMerger<B, I>
impl<B: Borrow<u8>, I: Iterator<Item = B>> Iterator for Utf8CharMerger<B, I> type Item = Result<Utf8Char, InvalidUtf8Slice>;
where
Self::Item: Borrow<u8>,
[src]
Notable traits for Utf8CharMerger<B, I>
impl<B: Borrow<u8>, I: Iterator<Item = B>> Iterator for Utf8CharMerger<B, I> type Item = Result<Utf8Char, InvalidUtf8Slice>;
Self::Item: Borrow<u8>,
Decodes bytes as UTF-8 and groups them into Utf8Char
s
When errors (invalid values or sequences) are encountered,
it continues with the byte right after the start of the error sequence.
This is neither the most intelligent choiche (sometimes it is guaranteed to
produce another error), nor the easiest to implement, but I believe it to
be the most predictable.
It also means that ASCII characters are never hidden by errors.
Examples
Replace all errors with u+FFFD REPLACEMENT_CHARACTER:
use encode_unicode::{Utf8Char, IterExt}; let mut buf = [b'\0'; 255]; let len = b"foo\xCFbar".iter() .to_utf8chars() .flat_map(|r| r.unwrap_or(Utf8Char::from('\u{FFFD}')).into_iter() ) .zip(&mut buf[..]) .map(|(byte, dst)| *dst = byte ) .count(); assert_eq!(&buf[..len], "foo\u{FFFD}bar".as_bytes());
Collect everything up until the first error into a string:
use encode_unicode::iterator::Utf8CharMerger; let mut good = String::new(); for r in Utf8CharMerger::from(b"foo\xcc\xbbbar\xcc\xddbaz") { if let Ok(uc) = r { good.push_str(uc.as_str()); } else { break; } } assert_eq!(good, "foo̻bar");
Abort decoding on error:
use encode_unicode::{IterExt, Utf8Char}; use encode_unicode::error::{InvalidUtf8Slice, InvalidUtf8}; let result = b"ab\0\xe0\xbc\xa9 \xf3\x80\x77".iter() .to_utf8chars() .collect::<Result<String,InvalidUtf8Slice>>(); assert_eq!(result, Err(InvalidUtf8Slice::Utf8(InvalidUtf8::NotAContinuationByte(2))));
fn to_utf16chars(self) -> Utf16CharMerger<Self::Item, Self>ⓘNotable traits for Utf16CharMerger<B, I>
impl<B: Borrow<u16>, I: Iterator<Item = B>> Iterator for Utf16CharMerger<B, I> type Item = Result<Utf16Char, Utf16PairError>;
where
Self::Item: Borrow<u16>,
[src]
Notable traits for Utf16CharMerger<B, I>
impl<B: Borrow<u16>, I: Iterator<Item = B>> Iterator for Utf16CharMerger<B, I> type Item = Result<Utf16Char, Utf16PairError>;
Self::Item: Borrow<u16>,
Decodes bytes as UTF-16 and groups them into Utf16Char
s
When errors (unmatched leading surrogates or unexpected trailing surrogates) are encountered, an error is produced for every unit.
Examples
Replace errors with ‘�’:
use encode_unicode::{IterExt, Utf16Char}; let slice = &['a' as u16, 0xdf00, 0xd83c, 0xdca0][..]; let string = slice.iter() .to_utf16chars() .map(|r| r.unwrap_or(Utf16Char::from('\u{fffd}')) ) // REPLACEMENT_CHARACTER .collect::<String>(); assert_eq!(string, "a�🂠");
use encode_unicode::{IterExt, Utf16Char}; use encode_unicode::error::Utf16PairError::*; let slice = [0xdcba, 0xdeff, 0xd8be, 0xdeee, 'Y' as u16, 0xdab1, 0xdab1]; let mut iter = slice.iter().to_utf16chars(); assert_eq!(iter.size_hint(), (3, Some(7))); assert_eq!(iter.next(), Some(Err(UnexpectedTrailingSurrogate))); assert_eq!(iter.next(), Some(Err(UnexpectedTrailingSurrogate))); assert_eq!(iter.next(), Some(Ok(Utf16Char::from('\u{3faee}')))); assert_eq!(iter.next(), Some(Ok(Utf16Char::from('Y')))); assert_eq!(iter.next(), Some(Err(UnmatchedLeadingSurrogate))); assert_eq!(iter.next(), Some(Err(Incomplete))); assert_eq!(iter.into_remaining_units().next(), None);
Search for a codepoint and return the codepoint index of the first match:
use encode_unicode::{IterExt, Utf16Char}; let position = [0xd875, 0xdd4f, '≈' as u16, '2' as u16].iter() .to_utf16chars() .position(|r| r == Ok(Utf16Char::from('≈')) ); assert_eq!(position, Some(1));
Implementors
impl<I: Iterator> IterExt for I
[src]
fn to_bytes(self) -> Utf8CharSplitter<Self::Item, Self>ⓘNotable traits for Utf8CharSplitter<U, I>
impl<U: Borrow<Utf8Char>, I: Iterator<Item = U>> Iterator for Utf8CharSplitter<U, I> type Item = u8;impl<U: Borrow<Utf8Char>, I: Iterator<Item = U>> Read for Utf8CharSplitter<U, I>
where
Self::Item: Borrow<Utf8Char>,
[src]
Notable traits for Utf8CharSplitter<U, I>
impl<U: Borrow<Utf8Char>, I: Iterator<Item = U>> Iterator for Utf8CharSplitter<U, I> type Item = u8;impl<U: Borrow<Utf8Char>, I: Iterator<Item = U>> Read for Utf8CharSplitter<U, I>
Self::Item: Borrow<Utf8Char>,
fn to_units(self) -> Utf16CharSplitter<Self::Item, Self>ⓘNotable traits for Utf16CharSplitter<U, I>
impl<U: Borrow<Utf16Char>, I: Iterator<Item = U>> Iterator for Utf16CharSplitter<U, I> type Item = u16;
where
Self::Item: Borrow<Utf16Char>,
[src]
Notable traits for Utf16CharSplitter<U, I>
impl<U: Borrow<Utf16Char>, I: Iterator<Item = U>> Iterator for Utf16CharSplitter<U, I> type Item = u16;
Self::Item: Borrow<Utf16Char>,
fn to_utf8chars(self) -> Utf8CharMerger<Self::Item, Self>ⓘNotable traits for Utf8CharMerger<B, I>
impl<B: Borrow<u8>, I: Iterator<Item = B>> Iterator for Utf8CharMerger<B, I> type Item = Result<Utf8Char, InvalidUtf8Slice>;
where
Self::Item: Borrow<u8>,
[src]
Notable traits for Utf8CharMerger<B, I>
impl<B: Borrow<u8>, I: Iterator<Item = B>> Iterator for Utf8CharMerger<B, I> type Item = Result<Utf8Char, InvalidUtf8Slice>;
Self::Item: Borrow<u8>,
fn to_utf16chars(self) -> Utf16CharMerger<Self::Item, Self>ⓘNotable traits for Utf16CharMerger<B, I>
impl<B: Borrow<u16>, I: Iterator<Item = B>> Iterator for Utf16CharMerger<B, I> type Item = Result<Utf16Char, Utf16PairError>;
where
Self::Item: Borrow<u16>,
[src]
Notable traits for Utf16CharMerger<B, I>
impl<B: Borrow<u16>, I: Iterator<Item = B>> Iterator for Utf16CharMerger<B, I> type Item = Result<Utf16Char, Utf16PairError>;
Self::Item: Borrow<u16>,