formatting_nostd/
utf8.rs
1const UNICODE_REPLACEMENT_CHAR_STR: &str = "�";
2
3pub fn split_at_first_char(buf: &[u8]) -> Option<(&str, &[u8])> {
18 assert!(!buf.is_empty());
19 for charlen in 1..=core::cmp::min(4, buf.len()) {
20 if let Ok(s) = core::str::from_utf8(&buf[..charlen]) {
21 return Some((s, &buf[charlen..]));
22 }
23 }
24 None
25}
26
27#[cfg(test)]
28#[test]
29fn test_split_at_first_char() {
30 assert_eq!(
32 split_at_first_char(&[b'1', b'2', b'3'][..]),
33 Some(("1", &[b'2', b'3'][..]))
34 );
35 assert_eq!(
36 split_at_first_char(&[b'1', b'2'][..]),
37 Some(("1", &[b'2'][..]))
38 );
39 assert_eq!(split_at_first_char(&[b'1'][..]), Some(("1", &[][..])));
40
41 assert_eq!(split_at_first_char(&[0x80, b'2', b'3'][..]), None);
43 assert_eq!(split_at_first_char(&[0x80, b'2'][..]), None);
44 assert_eq!(split_at_first_char(&[0x80][..]), None);
45
46 assert_eq!(
48 split_at_first_char(&[0xc2, 0xa1, 0][..]),
49 Some(("¡", &[0][..]))
50 );
51 assert_eq!(
52 split_at_first_char(&[0xe0, 0xa4, 0xb9, 0][..]),
53 Some(("ह", &[0][..]))
54 );
55 assert_eq!(
56 split_at_first_char(&[0xf0, 0x90, 0x8d, 0x88, 0][..]),
57 Some(("𐍈", &[0][..]))
58 );
59}
60
61pub fn split_at_first_char_lossy(mut buf: &[u8]) -> (&str, &[u8]) {
79 assert!(!buf.is_empty());
80 let mut invalid_seq = false;
81 loop {
82 let res = split_at_first_char(buf);
83 if let Some((first_char, therest)) = res {
84 return if invalid_seq {
85 (UNICODE_REPLACEMENT_CHAR_STR, buf)
87 } else {
88 (first_char, therest)
89 };
90 }
91 invalid_seq = true;
93
94 buf = &buf[1..];
96
97 if buf.is_empty() {
98 return (UNICODE_REPLACEMENT_CHAR_STR, buf);
99 }
100 }
101}
102
103#[cfg(test)]
104#[test]
105fn test_split_at_first_char_lossy() {
106 assert_eq!(
109 split_at_first_char_lossy(&[b'1', 2, 3][..]),
110 ("1", &[2, 3][..])
111 );
112 assert_eq!(
113 split_at_first_char_lossy(&[0x80, 2, 3][..]),
114 (UNICODE_REPLACEMENT_CHAR_STR, &[2, 3][..])
115 );
116 assert_eq!(
117 split_at_first_char_lossy(&[0x80, 0x80, 2, 3][..]),
118 (UNICODE_REPLACEMENT_CHAR_STR, &[2, 3][..])
119 );
120}
121
122pub struct DecodeLossyIterator<'a> {
123 bytes: &'a [u8],
124}
125
126impl<'a> core::iter::Iterator for DecodeLossyIterator<'a> {
127 type Item = &'a str;
128
129 fn next(&mut self) -> Option<Self::Item> {
130 if self.bytes.is_empty() {
131 return None;
132 }
133 let (item, next_bytes) = split_at_first_char_lossy(self.bytes);
134 self.bytes = next_bytes;
135 Some(item)
136 }
137}
138
139pub fn decode_lossy(bytes: &[u8]) -> DecodeLossyIterator {
140 DecodeLossyIterator { bytes }
141}
142
143#[cfg(test)]
144#[test]
145fn test_lossy_decode_iterator() {
146 assert_eq!(
147 decode_lossy("123".as_bytes()).collect::<Vec<_>>(),
148 vec!["1", "2", "3"]
149 );
150 assert_eq!(
151 decode_lossy(&[0x80, 0x80, b'x', 0x80]).collect::<Vec<_>>(),
152 vec![
153 UNICODE_REPLACEMENT_CHAR_STR,
154 "x",
155 UNICODE_REPLACEMENT_CHAR_STR
156 ]
157 );
158}