toml_edit/parser/
trivia.rs

1use std::ops::RangeInclusive;
2
3use winnow::combinator::alt;
4use winnow::combinator::empty;
5use winnow::combinator::eof;
6use winnow::combinator::fail;
7use winnow::combinator::opt;
8use winnow::combinator::peek;
9use winnow::combinator::repeat;
10use winnow::combinator::terminated;
11use winnow::prelude::*;
12use winnow::stream::Stream as _;
13use winnow::token::any;
14use winnow::token::one_of;
15use winnow::token::take_while;
16
17use crate::parser::prelude::*;
18
19pub(crate) unsafe fn from_utf8_unchecked<'b>(
20    bytes: &'b [u8],
21    safety_justification: &'static str,
22) -> &'b str {
23    unsafe {
24        if cfg!(debug_assertions) {
25            // Catch problems more quickly when testing
26            std::str::from_utf8(bytes).expect(safety_justification)
27        } else {
28            std::str::from_utf8_unchecked(bytes)
29        }
30    }
31}
32
33// wschar = ( %x20 /              ; Space
34//            %x09 )              ; Horizontal tab
35pub(crate) const WSCHAR: (u8, u8) = (b' ', b'\t');
36
37// ws = *wschar
38pub(crate) fn ws<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
39    take_while(0.., WSCHAR)
40        .map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` filters out on-ASCII") })
41        .parse_next(input)
42}
43
44// non-ascii = %x80-D7FF / %xE000-10FFFF
45// - ASCII is 0xxxxxxx
46// - First byte for UTF-8 is 11xxxxxx
47// - Subsequent UTF-8 bytes are 10xxxxxx
48pub(crate) const NON_ASCII: RangeInclusive<u8> = 0x80..=0xff;
49
50// non-eol = %x09 / %x20-7E / non-ascii
51pub(crate) const NON_EOL: (u8, RangeInclusive<u8>, RangeInclusive<u8>) =
52    (0x09, 0x20..=0x7E, NON_ASCII);
53
54// comment-start-symbol = %x23 ; #
55pub(crate) const COMMENT_START_SYMBOL: u8 = b'#';
56
57// comment = comment-start-symbol *non-eol
58pub(crate) fn comment(input: &mut Input<'_>) -> PResult<()> {
59    (COMMENT_START_SYMBOL, take_while(0.., NON_EOL))
60        .void()
61        .parse_next(input)
62}
63
64// newline = ( %x0A /              ; LF
65//             %x0D.0A )           ; CRLF
66pub(crate) fn newline(input: &mut Input<'_>) -> PResult<()> {
67    dispatch! {any;
68        b'\n' => empty,
69        b'\r' => one_of(LF).void(),
70        _ => fail,
71    }
72    .parse_next(input)
73}
74pub(crate) const LF: u8 = b'\n';
75pub(crate) const CR: u8 = b'\r';
76
77// ws-newline       = *( wschar / newline )
78pub(crate) fn ws_newline(input: &mut Input<'_>) -> PResult<()> {
79    repeat(
80        0..,
81        alt((newline.value(&b"\n"[..]), take_while(1.., WSCHAR))),
82    )
83    .map(|()| ())
84    .parse_next(input)
85}
86
87// ws-newlines      = newline *( wschar / newline )
88pub(crate) fn ws_newlines(input: &mut Input<'_>) -> PResult<()> {
89    (newline, ws_newline).void().parse_next(input)
90}
91
92// note: this rule is not present in the original grammar
93// ws-comment-newline = *( ws-newline-nonempty / comment )
94pub(crate) fn ws_comment_newline(input: &mut Input<'_>) -> PResult<()> {
95    let mut start = input.checkpoint();
96    loop {
97        let _ = ws.parse_next(input)?;
98
99        let next_token = opt(peek(any)).parse_next(input)?;
100        match next_token {
101            Some(b'#') => (comment, newline).void().parse_next(input)?,
102            Some(b'\n') => (newline).void().parse_next(input)?,
103            Some(b'\r') => (newline).void().parse_next(input)?,
104            _ => break,
105        }
106
107        let end = input.checkpoint();
108        if start == end {
109            break;
110        }
111        start = end;
112    }
113
114    Ok(())
115}
116
117// note: this rule is not present in the original grammar
118// line-ending = newline / eof
119pub(crate) fn line_ending(input: &mut Input<'_>) -> PResult<()> {
120    alt((newline.value("\n"), eof.value("")))
121        .void()
122        .parse_next(input)
123}
124
125// note: this rule is not present in the original grammar
126// line-trailing = ws [comment] skip-line-ending
127pub(crate) fn line_trailing(input: &mut Input<'_>) -> PResult<std::ops::Range<usize>> {
128    terminated((ws, opt(comment)).span(), line_ending).parse_next(input)
129}
130
131#[cfg(test)]
132#[cfg(feature = "parse")]
133#[cfg(feature = "display")]
134mod test {
135    use super::*;
136
137    #[test]
138    fn trivia() {
139        let inputs = [
140            "",
141            r#" "#,
142            r#"
143"#,
144            r#"
145# comment
146
147# comment2
148
149
150"#,
151            r#"
152        "#,
153            r#"# comment
154# comment2
155
156
157   "#,
158        ];
159        for input in inputs {
160            dbg!(input);
161            let parsed = ws_comment_newline.take().parse(new_input(input));
162            assert!(parsed.is_ok(), "{:?}", parsed);
163            let parsed = parsed.unwrap();
164            assert_eq!(parsed, input.as_bytes());
165        }
166    }
167}