1 // This is a part of Chrono.
2 // See README.md and LICENSE.txt for details.
3 
4 /*!
5  * Various scanning routines for the parser.
6  */
7 
8 use super::{ParseResult, INVALID, OUT_OF_RANGE, TOO_SHORT};
9 use crate::Weekday;
10 
11 /// Tries to parse the non-negative number from `min` to `max` digits.
12 ///
13 /// The absence of digits at all is an unconditional error.
14 /// More than `max` digits are consumed up to the first `max` digits.
15 /// Any number that does not fit in `i64` is an error.
16 #[inline]
number(s: &str, min: usize, max: usize) -> ParseResult<(&str, i64)>17 pub(super) fn number(s: &str, min: usize, max: usize) -> ParseResult<(&str, i64)> {
18     assert!(min <= max);
19 
20     // We are only interested in ascii numbers, so we can work with the `str` as bytes. We stop on
21     // the first non-numeric byte, which may be another ascii character or beginning of multi-byte
22     // UTF-8 character.
23     let bytes = s.as_bytes();
24     if bytes.len() < min {
25         return Err(TOO_SHORT);
26     }
27 
28     let mut n = 0i64;
29     for (i, c) in bytes.iter().take(max).cloned().enumerate() {
30         // cloned() = copied()
31         if !c.is_ascii_digit() {
32             if i < min {
33                 return Err(INVALID);
34             } else {
35                 return Ok((&s[i..], n));
36             }
37         }
38 
39         n = match n.checked_mul(10).and_then(|n| n.checked_add((c - b'0') as i64)) {
40             Some(n) => n,
41             None => return Err(OUT_OF_RANGE),
42         };
43     }
44 
45     Ok((&s[core::cmp::min(max, bytes.len())..], n))
46 }
47 
48 /// Tries to consume at least one digits as a fractional second.
49 /// Returns the number of whole nanoseconds (0--999,999,999).
nanosecond(s: &str) -> ParseResult<(&str, i64)>50 pub(super) fn nanosecond(s: &str) -> ParseResult<(&str, i64)> {
51     // record the number of digits consumed for later scaling.
52     let origlen = s.len();
53     let (s, v) = number(s, 1, 9)?;
54     let consumed = origlen - s.len();
55 
56     // scale the number accordingly.
57     static SCALE: [i64; 10] =
58         [0, 100_000_000, 10_000_000, 1_000_000, 100_000, 10_000, 1_000, 100, 10, 1];
59     let v = v.checked_mul(SCALE[consumed]).ok_or(OUT_OF_RANGE)?;
60 
61     // if there are more than 9 digits, skip next digits.
62     let s = s.trim_start_matches(|c: char| c.is_ascii_digit());
63 
64     Ok((s, v))
65 }
66 
67 /// Tries to consume a fixed number of digits as a fractional second.
68 /// Returns the number of whole nanoseconds (0--999,999,999).
nanosecond_fixed(s: &str, digits: usize) -> ParseResult<(&str, i64)>69 pub(super) fn nanosecond_fixed(s: &str, digits: usize) -> ParseResult<(&str, i64)> {
70     // record the number of digits consumed for later scaling.
71     let (s, v) = number(s, digits, digits)?;
72 
73     // scale the number accordingly.
74     static SCALE: [i64; 10] =
75         [0, 100_000_000, 10_000_000, 1_000_000, 100_000, 10_000, 1_000, 100, 10, 1];
76     let v = v.checked_mul(SCALE[digits]).ok_or(OUT_OF_RANGE)?;
77 
78     Ok((s, v))
79 }
80 
81 /// Tries to parse the month index (0 through 11) with the first three ASCII letters.
short_month0(s: &str) -> ParseResult<(&str, u8)>82 pub(super) fn short_month0(s: &str) -> ParseResult<(&str, u8)> {
83     if s.len() < 3 {
84         return Err(TOO_SHORT);
85     }
86     let buf = s.as_bytes();
87     let month0 = match (buf[0] | 32, buf[1] | 32, buf[2] | 32) {
88         (b'j', b'a', b'n') => 0,
89         (b'f', b'e', b'b') => 1,
90         (b'm', b'a', b'r') => 2,
91         (b'a', b'p', b'r') => 3,
92         (b'm', b'a', b'y') => 4,
93         (b'j', b'u', b'n') => 5,
94         (b'j', b'u', b'l') => 6,
95         (b'a', b'u', b'g') => 7,
96         (b's', b'e', b'p') => 8,
97         (b'o', b'c', b't') => 9,
98         (b'n', b'o', b'v') => 10,
99         (b'd', b'e', b'c') => 11,
100         _ => return Err(INVALID),
101     };
102     Ok((&s[3..], month0))
103 }
104 
105 /// Tries to parse the weekday with the first three ASCII letters.
short_weekday(s: &str) -> ParseResult<(&str, Weekday)>106 pub(super) fn short_weekday(s: &str) -> ParseResult<(&str, Weekday)> {
107     if s.len() < 3 {
108         return Err(TOO_SHORT);
109     }
110     let buf = s.as_bytes();
111     let weekday = match (buf[0] | 32, buf[1] | 32, buf[2] | 32) {
112         (b'm', b'o', b'n') => Weekday::Mon,
113         (b't', b'u', b'e') => Weekday::Tue,
114         (b'w', b'e', b'd') => Weekday::Wed,
115         (b't', b'h', b'u') => Weekday::Thu,
116         (b'f', b'r', b'i') => Weekday::Fri,
117         (b's', b'a', b't') => Weekday::Sat,
118         (b's', b'u', b'n') => Weekday::Sun,
119         _ => return Err(INVALID),
120     };
121     Ok((&s[3..], weekday))
122 }
123 
124 /// Tries to parse the month index (0 through 11) with short or long month names.
125 /// It prefers long month names to short month names when both are possible.
short_or_long_month0(s: &str) -> ParseResult<(&str, u8)>126 pub(super) fn short_or_long_month0(s: &str) -> ParseResult<(&str, u8)> {
127     // lowercased month names, minus first three chars
128     static LONG_MONTH_SUFFIXES: [&[u8]; 12] = [
129         b"uary", b"ruary", b"ch", b"il", b"", b"e", b"y", b"ust", b"tember", b"ober", b"ember",
130         b"ember",
131     ];
132 
133     let (mut s, month0) = short_month0(s)?;
134 
135     // tries to consume the suffix if possible
136     let suffix = LONG_MONTH_SUFFIXES[month0 as usize];
137     if s.len() >= suffix.len() && s.as_bytes()[..suffix.len()].eq_ignore_ascii_case(suffix) {
138         s = &s[suffix.len()..];
139     }
140 
141     Ok((s, month0))
142 }
143 
144 /// Tries to parse the weekday with short or long weekday names.
145 /// It prefers long weekday names to short weekday names when both are possible.
short_or_long_weekday(s: &str) -> ParseResult<(&str, Weekday)>146 pub(super) fn short_or_long_weekday(s: &str) -> ParseResult<(&str, Weekday)> {
147     // lowercased weekday names, minus first three chars
148     static LONG_WEEKDAY_SUFFIXES: [&[u8]; 7] =
149         [b"day", b"sday", b"nesday", b"rsday", b"day", b"urday", b"day"];
150 
151     let (mut s, weekday) = short_weekday(s)?;
152 
153     // tries to consume the suffix if possible
154     let suffix = LONG_WEEKDAY_SUFFIXES[weekday.num_days_from_monday() as usize];
155     if s.len() >= suffix.len() && s.as_bytes()[..suffix.len()].eq_ignore_ascii_case(suffix) {
156         s = &s[suffix.len()..];
157     }
158 
159     Ok((s, weekday))
160 }
161 
162 /// Tries to consume exactly one given character.
char(s: &str, c1: u8) -> ParseResult<&str>163 pub(super) fn char(s: &str, c1: u8) -> ParseResult<&str> {
164     match s.as_bytes().first() {
165         Some(&c) if c == c1 => Ok(&s[1..]),
166         Some(_) => Err(INVALID),
167         None => Err(TOO_SHORT),
168     }
169 }
170 
171 /// Tries to consume one or more whitespace.
space(s: &str) -> ParseResult<&str>172 pub(super) fn space(s: &str) -> ParseResult<&str> {
173     let s_ = s.trim_start();
174     if s_.len() < s.len() {
175         Ok(s_)
176     } else if s.is_empty() {
177         Err(TOO_SHORT)
178     } else {
179         Err(INVALID)
180     }
181 }
182 
183 /// Consumes any number (including zero) of colon or spaces.
colon_or_space(s: &str) -> ParseResult<&str>184 pub(crate) fn colon_or_space(s: &str) -> ParseResult<&str> {
185     Ok(s.trim_start_matches(|c: char| c == ':' || c.is_whitespace()))
186 }
187 
188 /// Parse a timezone from `s` and return the offset in seconds.
189 ///
190 /// The `consume_colon` function is used to parse a mandatory or optional `:`
191 /// separator between hours offset and minutes offset.
192 ///
193 /// The `allow_missing_minutes` flag allows the timezone minutes offset to be
194 /// missing from `s`.
195 ///
196 /// The `allow_tz_minus_sign` flag allows the timezone offset negative character
197 /// to also be `−` MINUS SIGN (U+2212) in addition to the typical
198 /// ASCII-compatible `-` HYPHEN-MINUS (U+2D).
199 /// This is part of [RFC 3339 & ISO 8601].
200 ///
201 /// [RFC 3339 & ISO 8601]: https://en.wikipedia.org/w/index.php?title=ISO_8601&oldid=1114309368#Time_offsets_from_UTC
timezone_offset<F>( mut s: &str, mut consume_colon: F, allow_zulu: bool, allow_missing_minutes: bool, allow_tz_minus_sign: bool, ) -> ParseResult<(&str, i32)> where F: FnMut(&str) -> ParseResult<&str>,202 pub(crate) fn timezone_offset<F>(
203     mut s: &str,
204     mut consume_colon: F,
205     allow_zulu: bool,
206     allow_missing_minutes: bool,
207     allow_tz_minus_sign: bool,
208 ) -> ParseResult<(&str, i32)>
209 where
210     F: FnMut(&str) -> ParseResult<&str>,
211 {
212     if allow_zulu {
213         if let Some(&b'Z' | &b'z') = s.as_bytes().first() {
214             return Ok((&s[1..], 0));
215         }
216     }
217 
218     const fn digits(s: &str) -> ParseResult<(u8, u8)> {
219         let b = s.as_bytes();
220         if b.len() < 2 {
221             Err(TOO_SHORT)
222         } else {
223             Ok((b[0], b[1]))
224         }
225     }
226     let negative = match s.chars().next() {
227         Some('+') => {
228             // PLUS SIGN (U+2B)
229             s = &s['+'.len_utf8()..];
230 
231             false
232         }
233         Some('-') => {
234             // HYPHEN-MINUS (U+2D)
235             s = &s['-'.len_utf8()..];
236 
237             true
238         }
239         Some('−') => {
240             // MINUS SIGN (U+2212)
241             if !allow_tz_minus_sign {
242                 return Err(INVALID);
243             }
244             s = &s['−'.len_utf8()..];
245 
246             true
247         }
248         Some(_) => return Err(INVALID),
249         None => return Err(TOO_SHORT),
250     };
251 
252     // hours (00--99)
253     let hours = match digits(s)? {
254         (h1 @ b'0'..=b'9', h2 @ b'0'..=b'9') => i32::from((h1 - b'0') * 10 + (h2 - b'0')),
255         _ => return Err(INVALID),
256     };
257     s = &s[2..];
258 
259     // colons (and possibly other separators)
260     s = consume_colon(s)?;
261 
262     // minutes (00--59)
263     // if the next two items are digits then we have to add minutes
264     let minutes = if let Ok(ds) = digits(s) {
265         match ds {
266             (m1 @ b'0'..=b'5', m2 @ b'0'..=b'9') => i32::from((m1 - b'0') * 10 + (m2 - b'0')),
267             (b'6'..=b'9', b'0'..=b'9') => return Err(OUT_OF_RANGE),
268             _ => return Err(INVALID),
269         }
270     } else if allow_missing_minutes {
271         0
272     } else {
273         return Err(TOO_SHORT);
274     };
275     s = match s.len() {
276         len if len >= 2 => &s[2..],
277         0 => s,
278         _ => return Err(TOO_SHORT),
279     };
280 
281     let seconds = hours * 3600 + minutes * 60;
282     Ok((s, if negative { -seconds } else { seconds }))
283 }
284 
285 /// Same as `timezone_offset` but also allows for RFC 2822 legacy timezones.
286 /// May return `None` which indicates an insufficient offset data (i.e. `-0000`).
287 /// See [RFC 2822 Section 4.3].
288 ///
289 /// [RFC 2822 Section 4.3]: https://tools.ietf.org/html/rfc2822#section-4.3
timezone_offset_2822(s: &str) -> ParseResult<(&str, i32)>290 pub(super) fn timezone_offset_2822(s: &str) -> ParseResult<(&str, i32)> {
291     // tries to parse legacy time zone names
292     let upto = s.as_bytes().iter().position(|&c| !c.is_ascii_alphabetic()).unwrap_or(s.len());
293     if upto > 0 {
294         let name = &s.as_bytes()[..upto];
295         let s = &s[upto..];
296         let offset_hours = |o| Ok((s, o * 3600));
297         // RFC 2822 requires support for some named North America timezones, a small subset of all
298         // named timezones.
299         if name.eq_ignore_ascii_case(b"gmt")
300             || name.eq_ignore_ascii_case(b"ut")
301             || name.eq_ignore_ascii_case(b"z")
302         {
303             return offset_hours(0);
304         } else if name.eq_ignore_ascii_case(b"edt") {
305             return offset_hours(-4);
306         } else if name.eq_ignore_ascii_case(b"est") || name.eq_ignore_ascii_case(b"cdt") {
307             return offset_hours(-5);
308         } else if name.eq_ignore_ascii_case(b"cst") || name.eq_ignore_ascii_case(b"mdt") {
309             return offset_hours(-6);
310         } else if name.eq_ignore_ascii_case(b"mst") || name.eq_ignore_ascii_case(b"pdt") {
311             return offset_hours(-7);
312         } else if name.eq_ignore_ascii_case(b"pst") {
313             return offset_hours(-8);
314         } else if name.len() == 1 {
315             if let b'a'..=b'i' | b'k'..=b'y' | b'A'..=b'I' | b'K'..=b'Y' = name[0] {
316                 // recommended by RFC 2822: consume but treat it as -0000
317                 return Ok((s, 0));
318             }
319         }
320         Err(INVALID)
321     } else {
322         timezone_offset(s, |s| Ok(s), false, false, false)
323     }
324 }
325 
326 /// Tries to consume an RFC2822 comment including preceding ` `.
327 ///
328 /// Returns the remaining string after the closing parenthesis.
comment_2822(s: &str) -> ParseResult<(&str, ())>329 pub(super) fn comment_2822(s: &str) -> ParseResult<(&str, ())> {
330     use CommentState::*;
331 
332     let s = s.trim_start();
333 
334     let mut state = Start;
335     for (i, c) in s.bytes().enumerate() {
336         state = match (state, c) {
337             (Start, b'(') => Next(1),
338             (Next(1), b')') => return Ok((&s[i + 1..], ())),
339             (Next(depth), b'\\') => Escape(depth),
340             (Next(depth), b'(') => Next(depth + 1),
341             (Next(depth), b')') => Next(depth - 1),
342             (Next(depth), _) | (Escape(depth), _) => Next(depth),
343             _ => return Err(INVALID),
344         };
345     }
346 
347     Err(TOO_SHORT)
348 }
349 
350 enum CommentState {
351     Start,
352     Next(usize),
353     Escape(usize),
354 }
355 
356 #[cfg(test)]
357 mod tests {
358     use super::{
359         comment_2822, nanosecond, nanosecond_fixed, short_or_long_month0, short_or_long_weekday,
360         timezone_offset_2822,
361     };
362     use crate::format::{INVALID, TOO_SHORT};
363     use crate::Weekday;
364 
365     #[test]
test_rfc2822_comments()366     fn test_rfc2822_comments() {
367         let testdata = [
368             ("", Err(TOO_SHORT)),
369             (" ", Err(TOO_SHORT)),
370             ("x", Err(INVALID)),
371             ("(", Err(TOO_SHORT)),
372             ("()", Ok("")),
373             (" \r\n\t()", Ok("")),
374             ("() ", Ok(" ")),
375             ("()z", Ok("z")),
376             ("(x)", Ok("")),
377             ("(())", Ok("")),
378             ("((()))", Ok("")),
379             ("(x(x(x)x)x)", Ok("")),
380             ("( x ( x ( x ) x ) x )", Ok("")),
381             (r"(\)", Err(TOO_SHORT)),
382             (r"(\()", Ok("")),
383             (r"(\))", Ok("")),
384             (r"(\\)", Ok("")),
385             ("(()())", Ok("")),
386             ("( x ( x ) x ( x ) x )", Ok("")),
387         ];
388 
389         for (test_in, expected) in testdata.iter() {
390             let actual = comment_2822(test_in).map(|(s, _)| s);
391             assert_eq!(
392                 *expected, actual,
393                 "{:?} expected to produce {:?}, but produced {:?}.",
394                 test_in, expected, actual
395             );
396         }
397     }
398 
399     #[test]
test_timezone_offset_2822()400     fn test_timezone_offset_2822() {
401         assert_eq!(timezone_offset_2822("cSt").unwrap(), ("", -21600));
402         assert_eq!(timezone_offset_2822("pSt").unwrap(), ("", -28800));
403         assert_eq!(timezone_offset_2822("mSt").unwrap(), ("", -25200));
404         assert_eq!(timezone_offset_2822("-1551").unwrap(), ("", -57060));
405         assert_eq!(timezone_offset_2822("Gp"), Err(INVALID));
406     }
407 
408     #[test]
test_short_or_long_month0()409     fn test_short_or_long_month0() {
410         assert_eq!(short_or_long_month0("JUn").unwrap(), ("", 5));
411         assert_eq!(short_or_long_month0("mAy").unwrap(), ("", 4));
412         assert_eq!(short_or_long_month0("AuG").unwrap(), ("", 7));
413         assert_eq!(short_or_long_month0("Aprâ").unwrap(), ("â", 3));
414         assert_eq!(short_or_long_month0("JUl").unwrap(), ("", 6));
415         assert_eq!(short_or_long_month0("mAr").unwrap(), ("", 2));
416         assert_eq!(short_or_long_month0("Jan").unwrap(), ("", 0));
417     }
418 
419     #[test]
test_short_or_long_weekday()420     fn test_short_or_long_weekday() {
421         assert_eq!(short_or_long_weekday("sAtu").unwrap(), ("u", Weekday::Sat));
422         assert_eq!(short_or_long_weekday("thu").unwrap(), ("", Weekday::Thu));
423     }
424 
425     #[test]
test_nanosecond_fixed()426     fn test_nanosecond_fixed() {
427         assert_eq!(nanosecond_fixed("", 0usize).unwrap(), ("", 0));
428         assert!(nanosecond_fixed("", 1usize).is_err());
429     }
430 
431     #[test]
test_nanosecond()432     fn test_nanosecond() {
433         assert_eq!(nanosecond("2Ù").unwrap(), ("Ù", 200000000));
434         assert_eq!(nanosecond("8").unwrap(), ("", 800000000));
435     }
436 }
437