1 use std::ops::RangeInclusive;
2 
3 use winnow::combinator::alt;
4 use winnow::combinator::eof;
5 use winnow::combinator::opt;
6 use winnow::combinator::repeat;
7 use winnow::combinator::terminated;
8 use winnow::prelude::*;
9 use winnow::token::one_of;
10 use winnow::token::take_while;
11 
12 use crate::parser::prelude::*;
13 
from_utf8_unchecked<'b>( bytes: &'b [u8], safety_justification: &'static str, ) -> &'b str14 pub(crate) unsafe fn from_utf8_unchecked<'b>(
15     bytes: &'b [u8],
16     safety_justification: &'static str,
17 ) -> &'b str {
18     if cfg!(debug_assertions) {
19         // Catch problems more quickly when testing
20         std::str::from_utf8(bytes).expect(safety_justification)
21     } else {
22         std::str::from_utf8_unchecked(bytes)
23     }
24 }
25 
26 // wschar = ( %x20 /              ; Space
27 //            %x09 )              ; Horizontal tab
28 pub(crate) const WSCHAR: (u8, u8) = (b' ', b'\t');
29 
30 // ws = *wschar
ws<'i>(input: &mut Input<'i>) -> PResult<&'i str>31 pub(crate) fn ws<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
32     take_while(0.., WSCHAR)
33         .map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` filters out on-ASCII") })
34         .parse_next(input)
35 }
36 
37 // non-ascii = %x80-D7FF / %xE000-10FFFF
38 // - ASCII is 0xxxxxxx
39 // - First byte for UTF-8 is 11xxxxxx
40 // - Subsequent UTF-8 bytes are 10xxxxxx
41 pub(crate) const NON_ASCII: RangeInclusive<u8> = 0x80..=0xff;
42 
43 // non-eol = %x09 / %x20-7E / non-ascii
44 pub(crate) const NON_EOL: (u8, RangeInclusive<u8>, RangeInclusive<u8>) =
45     (0x09, 0x20..=0x7E, NON_ASCII);
46 
47 // comment-start-symbol = %x23 ; #
48 pub(crate) const COMMENT_START_SYMBOL: u8 = b'#';
49 
50 // comment = comment-start-symbol *non-eol
comment<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]>51 pub(crate) fn comment<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]> {
52     (COMMENT_START_SYMBOL, take_while(0.., NON_EOL))
53         .recognize()
54         .parse_next(input)
55 }
56 
57 // newline = ( %x0A /              ; LF
58 //             %x0D.0A )           ; CRLF
newline(input: &mut Input<'_>) -> PResult<u8>59 pub(crate) fn newline(input: &mut Input<'_>) -> PResult<u8> {
60     alt((
61         one_of(LF).value(b'\n'),
62         (one_of(CR), one_of(LF)).value(b'\n'),
63     ))
64     .parse_next(input)
65 }
66 pub(crate) const LF: u8 = b'\n';
67 pub(crate) const CR: u8 = b'\r';
68 
69 // ws-newline       = *( wschar / newline )
ws_newline<'i>(input: &mut Input<'i>) -> PResult<&'i str>70 pub(crate) fn ws_newline<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
71     repeat(
72         0..,
73         alt((newline.value(&b"\n"[..]), take_while(1.., WSCHAR))),
74     )
75     .map(|()| ())
76     .recognize()
77     .map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII") })
78     .parse_next(input)
79 }
80 
81 // ws-newlines      = newline *( wschar / newline )
ws_newlines<'i>(input: &mut Input<'i>) -> PResult<&'i str>82 pub(crate) fn ws_newlines<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
83     (newline, ws_newline)
84         .recognize()
85         .map(|b| unsafe {
86             from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII")
87         })
88         .parse_next(input)
89 }
90 
91 // note: this rule is not present in the original grammar
92 // ws-comment-newline = *( ws-newline-nonempty / comment )
ws_comment_newline<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]>93 pub(crate) fn ws_comment_newline<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]> {
94     repeat(
95         0..,
96         alt((
97             repeat(
98                 1..,
99                 alt((take_while(1.., WSCHAR), newline.value(&b"\n"[..]))),
100             )
101             .map(|()| ()),
102             comment.value(()),
103         )),
104     )
105     .map(|()| ())
106     .recognize()
107     .parse_next(input)
108 }
109 
110 // note: this rule is not present in the original grammar
111 // line-ending = newline / eof
line_ending<'i>(input: &mut Input<'i>) -> PResult<&'i str>112 pub(crate) fn line_ending<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
113     alt((newline.value("\n"), eof.value(""))).parse_next(input)
114 }
115 
116 // note: this rule is not present in the original grammar
117 // line-trailing = ws [comment] skip-line-ending
line_trailing(input: &mut Input<'_>) -> PResult<std::ops::Range<usize>>118 pub(crate) fn line_trailing(input: &mut Input<'_>) -> PResult<std::ops::Range<usize>> {
119     terminated((ws, opt(comment)).span(), line_ending).parse_next(input)
120 }
121 
122 #[cfg(test)]
123 #[cfg(feature = "parse")]
124 #[cfg(feature = "display")]
125 mod test {
126     use super::*;
127 
128     #[test]
trivia()129     fn trivia() {
130         let inputs = [
131             "",
132             r#" "#,
133             r#"
134 "#,
135             r#"
136 # comment
137 
138 # comment2
139 
140 
141 "#,
142             r#"
143         "#,
144             r#"# comment
145 # comment2
146 
147 
148    "#,
149         ];
150         for input in inputs {
151             dbg!(input);
152             let parsed = ws_comment_newline.parse(new_input(input));
153             assert!(parsed.is_ok(), "{:?}", parsed);
154             let parsed = parsed.unwrap();
155             assert_eq!(parsed, input.as_bytes());
156         }
157     }
158 }
159