1 /*!
2 This module provides a regular expression parser.
3 */
4 
5 use std::borrow::Borrow;
6 use std::cell::{Cell, RefCell};
7 use std::mem;
8 use std::result;
9 
10 use crate::ast::{self, Ast, Position, Span};
11 use crate::either::Either;
12 
13 use crate::is_meta_character;
14 
15 type Result<T> = result::Result<T, ast::Error>;
16 
17 /// A primitive is an expression with no sub-expressions. This includes
18 /// literals, assertions and non-set character classes. This representation
19 /// is used as intermediate state in the parser.
20 ///
21 /// This does not include ASCII character classes, since they can only appear
22 /// within a set character class.
23 #[derive(Clone, Debug, Eq, PartialEq)]
24 enum Primitive {
25     Literal(ast::Literal),
26     Assertion(ast::Assertion),
27     Dot(Span),
28     Perl(ast::ClassPerl),
29     Unicode(ast::ClassUnicode),
30 }
31 
32 impl Primitive {
33     /// Return the span of this primitive.
span(&self) -> &Span34     fn span(&self) -> &Span {
35         match *self {
36             Primitive::Literal(ref x) => &x.span,
37             Primitive::Assertion(ref x) => &x.span,
38             Primitive::Dot(ref span) => span,
39             Primitive::Perl(ref x) => &x.span,
40             Primitive::Unicode(ref x) => &x.span,
41         }
42     }
43 
44     /// Convert this primitive into a proper AST.
into_ast(self) -> Ast45     fn into_ast(self) -> Ast {
46         match self {
47             Primitive::Literal(lit) => Ast::Literal(lit),
48             Primitive::Assertion(assert) => Ast::Assertion(assert),
49             Primitive::Dot(span) => Ast::Dot(span),
50             Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)),
51             Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)),
52         }
53     }
54 
55     /// Convert this primitive into an item in a character class.
56     ///
57     /// If this primitive is not a legal item (i.e., an assertion or a dot),
58     /// then return an error.
into_class_set_item<P: Borrow<Parser>>( self, p: &ParserI<'_, P>, ) -> Result<ast::ClassSetItem>59     fn into_class_set_item<P: Borrow<Parser>>(
60         self,
61         p: &ParserI<'_, P>,
62     ) -> Result<ast::ClassSetItem> {
63         use self::Primitive::*;
64         use crate::ast::ClassSetItem;
65 
66         match self {
67             Literal(lit) => Ok(ClassSetItem::Literal(lit)),
68             Perl(cls) => Ok(ClassSetItem::Perl(cls)),
69             Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
70             x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
71         }
72     }
73 
74     /// Convert this primitive into a literal in a character class. In
75     /// particular, literals are the only valid items that can appear in
76     /// ranges.
77     ///
78     /// If this primitive is not a legal item (i.e., a class, assertion or a
79     /// dot), then return an error.
into_class_literal<P: Borrow<Parser>>( self, p: &ParserI<'_, P>, ) -> Result<ast::Literal>80     fn into_class_literal<P: Borrow<Parser>>(
81         self,
82         p: &ParserI<'_, P>,
83     ) -> Result<ast::Literal> {
84         use self::Primitive::*;
85 
86         match self {
87             Literal(lit) => Ok(lit),
88             x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
89         }
90     }
91 }
92 
93 /// Returns true if the given character is a hexadecimal digit.
is_hex(c: char) -> bool94 fn is_hex(c: char) -> bool {
95     ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
96 }
97 
98 /// Returns true if the given character is a valid in a capture group name.
99 ///
100 /// If `first` is true, then `c` is treated as the first character in the
101 /// group name (which must be alphabetic or underscore).
is_capture_char(c: char, first: bool) -> bool102 fn is_capture_char(c: char, first: bool) -> bool {
103     c == '_'
104         || (!first
105             && (('0' <= c && c <= '9') || c == '.' || c == '[' || c == ']'))
106         || ('A' <= c && c <= 'Z')
107         || ('a' <= c && c <= 'z')
108 }
109 
110 /// A builder for a regular expression parser.
111 ///
112 /// This builder permits modifying configuration options for the parser.
113 #[derive(Clone, Debug)]
114 pub struct ParserBuilder {
115     ignore_whitespace: bool,
116     nest_limit: u32,
117     octal: bool,
118 }
119 
120 impl Default for ParserBuilder {
default() -> ParserBuilder121     fn default() -> ParserBuilder {
122         ParserBuilder::new()
123     }
124 }
125 
126 impl ParserBuilder {
127     /// Create a new parser builder with a default configuration.
new() -> ParserBuilder128     pub fn new() -> ParserBuilder {
129         ParserBuilder {
130             ignore_whitespace: false,
131             nest_limit: 250,
132             octal: false,
133         }
134     }
135 
136     /// Build a parser from this configuration with the given pattern.
build(&self) -> Parser137     pub fn build(&self) -> Parser {
138         Parser {
139             pos: Cell::new(Position { offset: 0, line: 1, column: 1 }),
140             capture_index: Cell::new(0),
141             nest_limit: self.nest_limit,
142             octal: self.octal,
143             initial_ignore_whitespace: self.ignore_whitespace,
144             ignore_whitespace: Cell::new(self.ignore_whitespace),
145             comments: RefCell::new(vec![]),
146             stack_group: RefCell::new(vec![]),
147             stack_class: RefCell::new(vec![]),
148             capture_names: RefCell::new(vec![]),
149             scratch: RefCell::new(String::new()),
150         }
151     }
152 
153     /// Set the nesting limit for this parser.
154     ///
155     /// The nesting limit controls how deep the abstract syntax tree is allowed
156     /// to be. If the AST exceeds the given limit (e.g., with too many nested
157     /// groups), then an error is returned by the parser.
158     ///
159     /// The purpose of this limit is to act as a heuristic to prevent stack
160     /// overflow for consumers that do structural induction on an `Ast` using
161     /// explicit recursion. While this crate never does this (instead using
162     /// constant stack space and moving the call stack to the heap), other
163     /// crates may.
164     ///
165     /// This limit is not checked until the entire Ast is parsed. Therefore,
166     /// if callers want to put a limit on the amount of heap space used, then
167     /// they should impose a limit on the length, in bytes, of the concrete
168     /// pattern string. In particular, this is viable since this parser
169     /// implementation will limit itself to heap space proportional to the
170     /// length of the pattern string.
171     ///
172     /// Note that a nest limit of `0` will return a nest limit error for most
173     /// patterns but not all. For example, a nest limit of `0` permits `a` but
174     /// not `ab`, since `ab` requires a concatenation, which results in a nest
175     /// depth of `1`. In general, a nest limit is not something that manifests
176     /// in an obvious way in the concrete syntax, therefore, it should not be
177     /// used in a granular way.
nest_limit(&mut self, limit: u32) -> &mut ParserBuilder178     pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
179         self.nest_limit = limit;
180         self
181     }
182 
183     /// Whether to support octal syntax or not.
184     ///
185     /// Octal syntax is a little-known way of uttering Unicode codepoints in
186     /// a regular expression. For example, `a`, `\x61`, `\u0061` and
187     /// `\141` are all equivalent regular expressions, where the last example
188     /// shows octal syntax.
189     ///
190     /// While supporting octal syntax isn't in and of itself a problem, it does
191     /// make good error messages harder. That is, in PCRE based regex engines,
192     /// syntax like `\0` invokes a backreference, which is explicitly
193     /// unsupported in Rust's regex engine. However, many users expect it to
194     /// be supported. Therefore, when octal support is disabled, the error
195     /// message will explicitly mention that backreferences aren't supported.
196     ///
197     /// Octal syntax is disabled by default.
octal(&mut self, yes: bool) -> &mut ParserBuilder198     pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
199         self.octal = yes;
200         self
201     }
202 
203     /// Enable verbose mode in the regular expression.
204     ///
205     /// When enabled, verbose mode permits insignificant whitespace in many
206     /// places in the regular expression, as well as comments. Comments are
207     /// started using `#` and continue until the end of the line.
208     ///
209     /// By default, this is disabled. It may be selectively enabled in the
210     /// regular expression by using the `x` flag regardless of this setting.
ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder211     pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
212         self.ignore_whitespace = yes;
213         self
214     }
215 }
216 
217 /// A regular expression parser.
218 ///
219 /// This parses a string representation of a regular expression into an
220 /// abstract syntax tree. The size of the tree is proportional to the length
221 /// of the regular expression pattern.
222 ///
223 /// A `Parser` can be configured in more detail via a
224 /// [`ParserBuilder`](struct.ParserBuilder.html).
225 #[derive(Clone, Debug)]
226 pub struct Parser {
227     /// The current position of the parser.
228     pos: Cell<Position>,
229     /// The current capture index.
230     capture_index: Cell<u32>,
231     /// The maximum number of open parens/brackets allowed. If the parser
232     /// exceeds this number, then an error is returned.
233     nest_limit: u32,
234     /// Whether to support octal syntax or not. When `false`, the parser will
235     /// return an error helpfully pointing out that backreferences are not
236     /// supported.
237     octal: bool,
238     /// The initial setting for `ignore_whitespace` as provided by
239     /// `ParserBuilder`. It is used when resetting the parser's state.
240     initial_ignore_whitespace: bool,
241     /// Whether whitespace should be ignored. When enabled, comments are
242     /// also permitted.
243     ignore_whitespace: Cell<bool>,
244     /// A list of comments, in order of appearance.
245     comments: RefCell<Vec<ast::Comment>>,
246     /// A stack of grouped sub-expressions, including alternations.
247     stack_group: RefCell<Vec<GroupState>>,
248     /// A stack of nested character classes. This is only non-empty when
249     /// parsing a class.
250     stack_class: RefCell<Vec<ClassState>>,
251     /// A sorted sequence of capture names. This is used to detect duplicate
252     /// capture names and report an error if one is detected.
253     capture_names: RefCell<Vec<ast::CaptureName>>,
254     /// A scratch buffer used in various places. Mostly this is used to
255     /// accumulate relevant characters from parts of a pattern.
256     scratch: RefCell<String>,
257 }
258 
259 /// ParserI is the internal parser implementation.
260 ///
261 /// We use this separate type so that we can carry the provided pattern string
262 /// along with us. In particular, a `Parser` internal state is not tied to any
263 /// one pattern, but `ParserI` is.
264 ///
265 /// This type also lets us use `ParserI<&Parser>` in production code while
266 /// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
267 /// work against the internal interface of the parser.
268 #[derive(Clone, Debug)]
269 struct ParserI<'s, P> {
270     /// The parser state/configuration.
271     parser: P,
272     /// The full regular expression provided by the user.
273     pattern: &'s str,
274 }
275 
276 /// GroupState represents a single stack frame while parsing nested groups
277 /// and alternations. Each frame records the state up to an opening parenthesis
278 /// or a alternating bracket `|`.
279 #[derive(Clone, Debug)]
280 enum GroupState {
281     /// This state is pushed whenever an opening group is found.
282     Group {
283         /// The concatenation immediately preceding the opening group.
284         concat: ast::Concat,
285         /// The group that has been opened. Its sub-AST is always empty.
286         group: ast::Group,
287         /// Whether this group has the `x` flag enabled or not.
288         ignore_whitespace: bool,
289     },
290     /// This state is pushed whenever a new alternation branch is found. If
291     /// an alternation branch is found and this state is at the top of the
292     /// stack, then this state should be modified to include the new
293     /// alternation.
294     Alternation(ast::Alternation),
295 }
296 
297 /// ClassState represents a single stack frame while parsing character classes.
298 /// Each frame records the state up to an intersection, difference, symmetric
299 /// difference or nested class.
300 ///
301 /// Note that a parser's character class stack is only non-empty when parsing
302 /// a character class. In all other cases, it is empty.
303 #[derive(Clone, Debug)]
304 enum ClassState {
305     /// This state is pushed whenever an opening bracket is found.
306     Open {
307         /// The union of class items immediately preceding this class.
308         union: ast::ClassSetUnion,
309         /// The class that has been opened. Typically this just corresponds
310         /// to the `[`, but it can also include `[^` since `^` indicates
311         /// negation of the class.
312         set: ast::ClassBracketed,
313     },
314     /// This state is pushed when a operator is seen. When popped, the stored
315     /// set becomes the left hand side of the operator.
316     Op {
317         /// The type of the operation, i.e., &&, -- or ~~.
318         kind: ast::ClassSetBinaryOpKind,
319         /// The left-hand side of the operator.
320         lhs: ast::ClassSet,
321     },
322 }
323 
324 impl Parser {
325     /// Create a new parser with a default configuration.
326     ///
327     /// The parser can be run with either the `parse` or `parse_with_comments`
328     /// methods. The parse methods return an abstract syntax tree.
329     ///
330     /// To set configuration options on the parser, use
331     /// [`ParserBuilder`](struct.ParserBuilder.html).
new() -> Parser332     pub fn new() -> Parser {
333         ParserBuilder::new().build()
334     }
335 
336     /// Parse the regular expression into an abstract syntax tree.
parse(&mut self, pattern: &str) -> Result<Ast>337     pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
338         ParserI::new(self, pattern).parse()
339     }
340 
341     /// Parse the regular expression and return an abstract syntax tree with
342     /// all of the comments found in the pattern.
parse_with_comments( &mut self, pattern: &str, ) -> Result<ast::WithComments>343     pub fn parse_with_comments(
344         &mut self,
345         pattern: &str,
346     ) -> Result<ast::WithComments> {
347         ParserI::new(self, pattern).parse_with_comments()
348     }
349 
350     /// Reset the internal state of a parser.
351     ///
352     /// This is called at the beginning of every parse. This prevents the
353     /// parser from running with inconsistent state (say, if a previous
354     /// invocation returned an error and the parser is reused).
reset(&self)355     fn reset(&self) {
356         // These settings should be in line with the construction
357         // in `ParserBuilder::build`.
358         self.pos.set(Position { offset: 0, line: 1, column: 1 });
359         self.ignore_whitespace.set(self.initial_ignore_whitespace);
360         self.comments.borrow_mut().clear();
361         self.stack_group.borrow_mut().clear();
362         self.stack_class.borrow_mut().clear();
363     }
364 }
365 
366 impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
367     /// Build an internal parser from a parser configuration and a pattern.
new(parser: P, pattern: &'s str) -> ParserI<'s, P>368     fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
369         ParserI { parser, pattern }
370     }
371 
372     /// Return a reference to the parser state.
parser(&self) -> &Parser373     fn parser(&self) -> &Parser {
374         self.parser.borrow()
375     }
376 
377     /// Return a reference to the pattern being parsed.
pattern(&self) -> &str378     fn pattern(&self) -> &str {
379         self.pattern.borrow()
380     }
381 
382     /// Create a new error with the given span and error type.
error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error383     fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
384         ast::Error { kind, pattern: self.pattern().to_string(), span }
385     }
386 
387     /// Return the current offset of the parser.
388     ///
389     /// The offset starts at `0` from the beginning of the regular expression
390     /// pattern string.
offset(&self) -> usize391     fn offset(&self) -> usize {
392         self.parser().pos.get().offset
393     }
394 
395     /// Return the current line number of the parser.
396     ///
397     /// The line number starts at `1`.
line(&self) -> usize398     fn line(&self) -> usize {
399         self.parser().pos.get().line
400     }
401 
402     /// Return the current column of the parser.
403     ///
404     /// The column number starts at `1` and is reset whenever a `\n` is seen.
column(&self) -> usize405     fn column(&self) -> usize {
406         self.parser().pos.get().column
407     }
408 
409     /// Return the next capturing index. Each subsequent call increments the
410     /// internal index.
411     ///
412     /// The span given should correspond to the location of the opening
413     /// parenthesis.
414     ///
415     /// If the capture limit is exceeded, then an error is returned.
next_capture_index(&self, span: Span) -> Result<u32>416     fn next_capture_index(&self, span: Span) -> Result<u32> {
417         let current = self.parser().capture_index.get();
418         let i = current.checked_add(1).ok_or_else(|| {
419             self.error(span, ast::ErrorKind::CaptureLimitExceeded)
420         })?;
421         self.parser().capture_index.set(i);
422         Ok(i)
423     }
424 
425     /// Adds the given capture name to this parser. If this capture name has
426     /// already been used, then an error is returned.
add_capture_name(&self, cap: &ast::CaptureName) -> Result<()>427     fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
428         let mut names = self.parser().capture_names.borrow_mut();
429         match names
430             .binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str())
431         {
432             Err(i) => {
433                 names.insert(i, cap.clone());
434                 Ok(())
435             }
436             Ok(i) => Err(self.error(
437                 cap.span,
438                 ast::ErrorKind::GroupNameDuplicate { original: names[i].span },
439             )),
440         }
441     }
442 
443     /// Return whether the parser should ignore whitespace or not.
ignore_whitespace(&self) -> bool444     fn ignore_whitespace(&self) -> bool {
445         self.parser().ignore_whitespace.get()
446     }
447 
448     /// Return the character at the current position of the parser.
449     ///
450     /// This panics if the current position does not point to a valid char.
char(&self) -> char451     fn char(&self) -> char {
452         self.char_at(self.offset())
453     }
454 
455     /// Return the character at the given position.
456     ///
457     /// This panics if the given position does not point to a valid char.
char_at(&self, i: usize) -> char458     fn char_at(&self, i: usize) -> char {
459         self.pattern()[i..]
460             .chars()
461             .next()
462             .unwrap_or_else(|| panic!("expected char at offset {}", i))
463     }
464 
465     /// Bump the parser to the next Unicode scalar value.
466     ///
467     /// If the end of the input has been reached, then `false` is returned.
bump(&self) -> bool468     fn bump(&self) -> bool {
469         if self.is_eof() {
470             return false;
471         }
472         let Position { mut offset, mut line, mut column } = self.pos();
473         if self.char() == '\n' {
474             line = line.checked_add(1).unwrap();
475             column = 1;
476         } else {
477             column = column.checked_add(1).unwrap();
478         }
479         offset += self.char().len_utf8();
480         self.parser().pos.set(Position { offset, line, column });
481         self.pattern()[self.offset()..].chars().next().is_some()
482     }
483 
484     /// If the substring starting at the current position of the parser has
485     /// the given prefix, then bump the parser to the character immediately
486     /// following the prefix and return true. Otherwise, don't bump the parser
487     /// and return false.
bump_if(&self, prefix: &str) -> bool488     fn bump_if(&self, prefix: &str) -> bool {
489         if self.pattern()[self.offset()..].starts_with(prefix) {
490             for _ in 0..prefix.chars().count() {
491                 self.bump();
492             }
493             true
494         } else {
495             false
496         }
497     }
498 
499     /// Returns true if and only if the parser is positioned at a look-around
500     /// prefix. The conditions under which this returns true must always
501     /// correspond to a regular expression that would otherwise be consider
502     /// invalid.
503     ///
504     /// This should only be called immediately after parsing the opening of
505     /// a group or a set of flags.
is_lookaround_prefix(&self) -> bool506     fn is_lookaround_prefix(&self) -> bool {
507         self.bump_if("?=")
508             || self.bump_if("?!")
509             || self.bump_if("?<=")
510             || self.bump_if("?<!")
511     }
512 
513     /// Bump the parser, and if the `x` flag is enabled, bump through any
514     /// subsequent spaces. Return true if and only if the parser is not at
515     /// EOF.
bump_and_bump_space(&self) -> bool516     fn bump_and_bump_space(&self) -> bool {
517         if !self.bump() {
518             return false;
519         }
520         self.bump_space();
521         !self.is_eof()
522     }
523 
524     /// If the `x` flag is enabled (i.e., whitespace insensitivity with
525     /// comments), then this will advance the parser through all whitespace
526     /// and comments to the next non-whitespace non-comment byte.
527     ///
528     /// If the `x` flag is disabled, then this is a no-op.
529     ///
530     /// This should be used selectively throughout the parser where
531     /// arbitrary whitespace is permitted when the `x` flag is enabled. For
532     /// example, `{   5  , 6}` is equivalent to `{5,6}`.
bump_space(&self)533     fn bump_space(&self) {
534         if !self.ignore_whitespace() {
535             return;
536         }
537         while !self.is_eof() {
538             if self.char().is_whitespace() {
539                 self.bump();
540             } else if self.char() == '#' {
541                 let start = self.pos();
542                 let mut comment_text = String::new();
543                 self.bump();
544                 while !self.is_eof() {
545                     let c = self.char();
546                     self.bump();
547                     if c == '\n' {
548                         break;
549                     }
550                     comment_text.push(c);
551                 }
552                 let comment = ast::Comment {
553                     span: Span::new(start, self.pos()),
554                     comment: comment_text,
555                 };
556                 self.parser().comments.borrow_mut().push(comment);
557             } else {
558                 break;
559             }
560         }
561     }
562 
563     /// Peek at the next character in the input without advancing the parser.
564     ///
565     /// If the input has been exhausted, then this returns `None`.
peek(&self) -> Option<char>566     fn peek(&self) -> Option<char> {
567         if self.is_eof() {
568             return None;
569         }
570         self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
571     }
572 
573     /// Like peek, but will ignore spaces when the parser is in whitespace
574     /// insensitive mode.
peek_space(&self) -> Option<char>575     fn peek_space(&self) -> Option<char> {
576         if !self.ignore_whitespace() {
577             return self.peek();
578         }
579         if self.is_eof() {
580             return None;
581         }
582         let mut start = self.offset() + self.char().len_utf8();
583         let mut in_comment = false;
584         for (i, c) in self.pattern()[start..].char_indices() {
585             if c.is_whitespace() {
586                 continue;
587             } else if !in_comment && c == '#' {
588                 in_comment = true;
589             } else if in_comment && c == '\n' {
590                 in_comment = false;
591             } else {
592                 start += i;
593                 break;
594             }
595         }
596         self.pattern()[start..].chars().next()
597     }
598 
599     /// Returns true if the next call to `bump` would return false.
is_eof(&self) -> bool600     fn is_eof(&self) -> bool {
601         self.offset() == self.pattern().len()
602     }
603 
604     /// Return the current position of the parser, which includes the offset,
605     /// line and column.
pos(&self) -> Position606     fn pos(&self) -> Position {
607         self.parser().pos.get()
608     }
609 
610     /// Create a span at the current position of the parser. Both the start
611     /// and end of the span are set.
span(&self) -> Span612     fn span(&self) -> Span {
613         Span::splat(self.pos())
614     }
615 
616     /// Create a span that covers the current character.
span_char(&self) -> Span617     fn span_char(&self) -> Span {
618         let mut next = Position {
619             offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
620             line: self.line(),
621             column: self.column().checked_add(1).unwrap(),
622         };
623         if self.char() == '\n' {
624             next.line += 1;
625             next.column = 1;
626         }
627         Span::new(self.pos(), next)
628     }
629 
630     /// Parse and push a single alternation on to the parser's internal stack.
631     /// If the top of the stack already has an alternation, then add to that
632     /// instead of pushing a new one.
633     ///
634     /// The concatenation given corresponds to a single alternation branch.
635     /// The concatenation returned starts the next branch and is empty.
636     ///
637     /// This assumes the parser is currently positioned at `|` and will advance
638     /// the parser to the character following `|`.
639     #[inline(never)]
push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat>640     fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
641         assert_eq!(self.char(), '|');
642         concat.span.end = self.pos();
643         self.push_or_add_alternation(concat);
644         self.bump();
645         Ok(ast::Concat { span: self.span(), asts: vec![] })
646     }
647 
648     /// Pushes or adds the given branch of an alternation to the parser's
649     /// internal stack of state.
push_or_add_alternation(&self, concat: ast::Concat)650     fn push_or_add_alternation(&self, concat: ast::Concat) {
651         use self::GroupState::*;
652 
653         let mut stack = self.parser().stack_group.borrow_mut();
654         if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
655             alts.asts.push(concat.into_ast());
656             return;
657         }
658         stack.push(Alternation(ast::Alternation {
659             span: Span::new(concat.span.start, self.pos()),
660             asts: vec![concat.into_ast()],
661         }));
662     }
663 
664     /// Parse and push a group AST (and its parent concatenation) on to the
665     /// parser's internal stack. Return a fresh concatenation corresponding
666     /// to the group's sub-AST.
667     ///
668     /// If a set of flags was found (with no group), then the concatenation
669     /// is returned with that set of flags added.
670     ///
671     /// This assumes that the parser is currently positioned on the opening
672     /// parenthesis. It advances the parser to the character at the start
673     /// of the sub-expression (or adjoining expression).
674     ///
675     /// If there was a problem parsing the start of the group, then an error
676     /// is returned.
677     #[inline(never)]
push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat>678     fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
679         assert_eq!(self.char(), '(');
680         match self.parse_group()? {
681             Either::Left(set) => {
682                 let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
683                 if let Some(v) = ignore {
684                     self.parser().ignore_whitespace.set(v);
685                 }
686 
687                 concat.asts.push(Ast::Flags(set));
688                 Ok(concat)
689             }
690             Either::Right(group) => {
691                 let old_ignore_whitespace = self.ignore_whitespace();
692                 let new_ignore_whitespace = group
693                     .flags()
694                     .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
695                     .unwrap_or(old_ignore_whitespace);
696                 self.parser().stack_group.borrow_mut().push(
697                     GroupState::Group {
698                         concat,
699                         group,
700                         ignore_whitespace: old_ignore_whitespace,
701                     },
702                 );
703                 self.parser().ignore_whitespace.set(new_ignore_whitespace);
704                 Ok(ast::Concat { span: self.span(), asts: vec![] })
705             }
706         }
707     }
708 
709     /// Pop a group AST from the parser's internal stack and set the group's
710     /// AST to the given concatenation. Return the concatenation containing
711     /// the group.
712     ///
713     /// This assumes that the parser is currently positioned on the closing
714     /// parenthesis and advances the parser to the character following the `)`.
715     ///
716     /// If no such group could be popped, then an unopened group error is
717     /// returned.
718     #[inline(never)]
pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat>719     fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
720         use self::GroupState::*;
721 
722         assert_eq!(self.char(), ')');
723         let mut stack = self.parser().stack_group.borrow_mut();
724         let (mut prior_concat, mut group, ignore_whitespace, alt) = match stack
725             .pop()
726         {
727             Some(Group { concat, group, ignore_whitespace }) => {
728                 (concat, group, ignore_whitespace, None)
729             }
730             Some(Alternation(alt)) => match stack.pop() {
731                 Some(Group { concat, group, ignore_whitespace }) => {
732                     (concat, group, ignore_whitespace, Some(alt))
733                 }
734                 None | Some(Alternation(_)) => {
735                     return Err(self.error(
736                         self.span_char(),
737                         ast::ErrorKind::GroupUnopened,
738                     ));
739                 }
740             },
741             None => {
742                 return Err(self
743                     .error(self.span_char(), ast::ErrorKind::GroupUnopened));
744             }
745         };
746         self.parser().ignore_whitespace.set(ignore_whitespace);
747         group_concat.span.end = self.pos();
748         self.bump();
749         group.span.end = self.pos();
750         match alt {
751             Some(mut alt) => {
752                 alt.span.end = group_concat.span.end;
753                 alt.asts.push(group_concat.into_ast());
754                 group.ast = Box::new(alt.into_ast());
755             }
756             None => {
757                 group.ast = Box::new(group_concat.into_ast());
758             }
759         }
760         prior_concat.asts.push(Ast::Group(group));
761         Ok(prior_concat)
762     }
763 
764     /// Pop the last state from the parser's internal stack, if it exists, and
765     /// add the given concatenation to it. There either must be no state or a
766     /// single alternation item on the stack. Any other scenario produces an
767     /// error.
768     ///
769     /// This assumes that the parser has advanced to the end.
770     #[inline(never)]
pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast>771     fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
772         concat.span.end = self.pos();
773         let mut stack = self.parser().stack_group.borrow_mut();
774         let ast = match stack.pop() {
775             None => Ok(concat.into_ast()),
776             Some(GroupState::Alternation(mut alt)) => {
777                 alt.span.end = self.pos();
778                 alt.asts.push(concat.into_ast());
779                 Ok(Ast::Alternation(alt))
780             }
781             Some(GroupState::Group { group, .. }) => {
782                 return Err(
783                     self.error(group.span, ast::ErrorKind::GroupUnclosed)
784                 );
785             }
786         };
787         // If we try to pop again, there should be nothing.
788         match stack.pop() {
789             None => ast,
790             Some(GroupState::Alternation(_)) => {
791                 // This unreachable is unfortunate. This case can't happen
792                 // because the only way we can be here is if there were two
793                 // `GroupState::Alternation`s adjacent in the parser's stack,
794                 // which we guarantee to never happen because we never push a
795                 // `GroupState::Alternation` if one is already at the top of
796                 // the stack.
797                 unreachable!()
798             }
799             Some(GroupState::Group { group, .. }) => {
800                 Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
801             }
802         }
803     }
804 
805     /// Parse the opening of a character class and push the current class
806     /// parsing context onto the parser's stack. This assumes that the parser
807     /// is positioned at an opening `[`. The given union should correspond to
808     /// the union of set items built up before seeing the `[`.
809     ///
810     /// If there was a problem parsing the opening of the class, then an error
811     /// is returned. Otherwise, a new union of set items for the class is
812     /// returned (which may be populated with either a `]` or a `-`).
813     #[inline(never)]
push_class_open( &self, parent_union: ast::ClassSetUnion, ) -> Result<ast::ClassSetUnion>814     fn push_class_open(
815         &self,
816         parent_union: ast::ClassSetUnion,
817     ) -> Result<ast::ClassSetUnion> {
818         assert_eq!(self.char(), '[');
819 
820         let (nested_set, nested_union) = self.parse_set_class_open()?;
821         self.parser()
822             .stack_class
823             .borrow_mut()
824             .push(ClassState::Open { union: parent_union, set: nested_set });
825         Ok(nested_union)
826     }
827 
828     /// Parse the end of a character class set and pop the character class
829     /// parser stack. The union given corresponds to the last union built
830     /// before seeing the closing `]`. The union returned corresponds to the
831     /// parent character class set with the nested class added to it.
832     ///
833     /// This assumes that the parser is positioned at a `]` and will advance
834     /// the parser to the byte immediately following the `]`.
835     ///
836     /// If the stack is empty after popping, then this returns the final
837     /// "top-level" character class AST (where a "top-level" character class
838     /// is one that is not nested inside any other character class).
839     ///
840     /// If there is no corresponding opening bracket on the parser's stack,
841     /// then an error is returned.
842     #[inline(never)]
pop_class( &self, nested_union: ast::ClassSetUnion, ) -> Result<Either<ast::ClassSetUnion, ast::Class>>843     fn pop_class(
844         &self,
845         nested_union: ast::ClassSetUnion,
846     ) -> Result<Either<ast::ClassSetUnion, ast::Class>> {
847         assert_eq!(self.char(), ']');
848 
849         let item = ast::ClassSet::Item(nested_union.into_item());
850         let prevset = self.pop_class_op(item);
851         let mut stack = self.parser().stack_class.borrow_mut();
852         match stack.pop() {
853             None => {
854                 // We can never observe an empty stack:
855                 //
856                 // 1) We are guaranteed to start with a non-empty stack since
857                 //    the character class parser is only initiated when it sees
858                 //    a `[`.
859                 // 2) If we ever observe an empty stack while popping after
860                 //    seeing a `]`, then we signal the character class parser
861                 //    to terminate.
862                 panic!("unexpected empty character class stack")
863             }
864             Some(ClassState::Op { .. }) => {
865                 // This panic is unfortunate, but this case is impossible
866                 // since we already popped the Op state if one exists above.
867                 // Namely, every push to the class parser stack is guarded by
868                 // whether an existing Op is already on the top of the stack.
869                 // If it is, the existing Op is modified. That is, the stack
870                 // can never have consecutive Op states.
871                 panic!("unexpected ClassState::Op")
872             }
873             Some(ClassState::Open { mut union, mut set }) => {
874                 self.bump();
875                 set.span.end = self.pos();
876                 set.kind = prevset;
877                 if stack.is_empty() {
878                     Ok(Either::Right(ast::Class::Bracketed(set)))
879                 } else {
880                     union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
881                     Ok(Either::Left(union))
882                 }
883             }
884         }
885     }
886 
887     /// Return an "unclosed class" error whose span points to the most
888     /// recently opened class.
889     ///
890     /// This should only be called while parsing a character class.
891     #[inline(never)]
unclosed_class_error(&self) -> ast::Error892     fn unclosed_class_error(&self) -> ast::Error {
893         for state in self.parser().stack_class.borrow().iter().rev() {
894             if let ClassState::Open { ref set, .. } = *state {
895                 return self.error(set.span, ast::ErrorKind::ClassUnclosed);
896             }
897         }
898         // We are guaranteed to have a non-empty stack with at least
899         // one open bracket, so we should never get here.
900         panic!("no open character class found")
901     }
902 
903     /// Push the current set of class items on to the class parser's stack as
904     /// the left hand side of the given operator.
905     ///
906     /// A fresh set union is returned, which should be used to build the right
907     /// hand side of this operator.
908     #[inline(never)]
push_class_op( &self, next_kind: ast::ClassSetBinaryOpKind, next_union: ast::ClassSetUnion, ) -> ast::ClassSetUnion909     fn push_class_op(
910         &self,
911         next_kind: ast::ClassSetBinaryOpKind,
912         next_union: ast::ClassSetUnion,
913     ) -> ast::ClassSetUnion {
914         let item = ast::ClassSet::Item(next_union.into_item());
915         let new_lhs = self.pop_class_op(item);
916         self.parser()
917             .stack_class
918             .borrow_mut()
919             .push(ClassState::Op { kind: next_kind, lhs: new_lhs });
920         ast::ClassSetUnion { span: self.span(), items: vec![] }
921     }
922 
923     /// Pop a character class set from the character class parser stack. If the
924     /// top of the stack is just an item (not an operation), then return the
925     /// given set unchanged. If the top of the stack is an operation, then the
926     /// given set will be used as the rhs of the operation on the top of the
927     /// stack. In that case, the binary operation is returned as a set.
928     #[inline(never)]
pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet929     fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
930         let mut stack = self.parser().stack_class.borrow_mut();
931         let (kind, lhs) = match stack.pop() {
932             Some(ClassState::Op { kind, lhs }) => (kind, lhs),
933             Some(state @ ClassState::Open { .. }) => {
934                 stack.push(state);
935                 return rhs;
936             }
937             None => unreachable!(),
938         };
939         let span = Span::new(lhs.span().start, rhs.span().end);
940         ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
941             span,
942             kind,
943             lhs: Box::new(lhs),
944             rhs: Box::new(rhs),
945         })
946     }
947 }
948 
949 impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
950     /// Parse the regular expression into an abstract syntax tree.
parse(&self) -> Result<Ast>951     fn parse(&self) -> Result<Ast> {
952         self.parse_with_comments().map(|astc| astc.ast)
953     }
954 
955     /// Parse the regular expression and return an abstract syntax tree with
956     /// all of the comments found in the pattern.
parse_with_comments(&self) -> Result<ast::WithComments>957     fn parse_with_comments(&self) -> Result<ast::WithComments> {
958         assert_eq!(self.offset(), 0, "parser can only be used once");
959         self.parser().reset();
960         let mut concat = ast::Concat { span: self.span(), asts: vec![] };
961         loop {
962             self.bump_space();
963             if self.is_eof() {
964                 break;
965             }
966             match self.char() {
967                 '(' => concat = self.push_group(concat)?,
968                 ')' => concat = self.pop_group(concat)?,
969                 '|' => concat = self.push_alternate(concat)?,
970                 '[' => {
971                     let class = self.parse_set_class()?;
972                     concat.asts.push(Ast::Class(class));
973                 }
974                 '?' => {
975                     concat = self.parse_uncounted_repetition(
976                         concat,
977                         ast::RepetitionKind::ZeroOrOne,
978                     )?;
979                 }
980                 '*' => {
981                     concat = self.parse_uncounted_repetition(
982                         concat,
983                         ast::RepetitionKind::ZeroOrMore,
984                     )?;
985                 }
986                 '+' => {
987                     concat = self.parse_uncounted_repetition(
988                         concat,
989                         ast::RepetitionKind::OneOrMore,
990                     )?;
991                 }
992                 '{' => {
993                     concat = self.parse_counted_repetition(concat)?;
994                 }
995                 _ => concat.asts.push(self.parse_primitive()?.into_ast()),
996             }
997         }
998         let ast = self.pop_group_end(concat)?;
999         NestLimiter::new(self).check(&ast)?;
1000         Ok(ast::WithComments {
1001             ast,
1002             comments: mem::replace(
1003                 &mut *self.parser().comments.borrow_mut(),
1004                 vec![],
1005             ),
1006         })
1007     }
1008 
1009     /// Parses an uncounted repetition operation. An uncounted repetition
1010     /// operator includes ?, * and +, but does not include the {m,n} syntax.
1011     /// The given `kind` should correspond to the operator observed by the
1012     /// caller.
1013     ///
1014     /// This assumes that the parser is currently positioned at the repetition
1015     /// operator and advances the parser to the first character after the
1016     /// operator. (Note that the operator may include a single additional `?`,
1017     /// which makes the operator ungreedy.)
1018     ///
1019     /// The caller should include the concatenation that is being built. The
1020     /// concatenation returned includes the repetition operator applied to the
1021     /// last expression in the given concatenation.
1022     #[inline(never)]
parse_uncounted_repetition( &self, mut concat: ast::Concat, kind: ast::RepetitionKind, ) -> Result<ast::Concat>1023     fn parse_uncounted_repetition(
1024         &self,
1025         mut concat: ast::Concat,
1026         kind: ast::RepetitionKind,
1027     ) -> Result<ast::Concat> {
1028         assert!(
1029             self.char() == '?' || self.char() == '*' || self.char() == '+'
1030         );
1031         let op_start = self.pos();
1032         let ast = match concat.asts.pop() {
1033             Some(ast) => ast,
1034             None => {
1035                 return Err(
1036                     self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1037                 )
1038             }
1039         };
1040         match ast {
1041             Ast::Empty(_) | Ast::Flags(_) => {
1042                 return Err(
1043                     self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1044                 )
1045             }
1046             _ => {}
1047         }
1048         let mut greedy = true;
1049         if self.bump() && self.char() == '?' {
1050             greedy = false;
1051             self.bump();
1052         }
1053         concat.asts.push(Ast::Repetition(ast::Repetition {
1054             span: ast.span().with_end(self.pos()),
1055             op: ast::RepetitionOp {
1056                 span: Span::new(op_start, self.pos()),
1057                 kind,
1058             },
1059             greedy,
1060             ast: Box::new(ast),
1061         }));
1062         Ok(concat)
1063     }
1064 
1065     /// Parses a counted repetition operation. A counted repetition operator
1066     /// corresponds to the {m,n} syntax, and does not include the ?, * or +
1067     /// operators.
1068     ///
1069     /// This assumes that the parser is currently positioned at the opening `{`
1070     /// and advances the parser to the first character after the operator.
1071     /// (Note that the operator may include a single additional `?`, which
1072     /// makes the operator ungreedy.)
1073     ///
1074     /// The caller should include the concatenation that is being built. The
1075     /// concatenation returned includes the repetition operator applied to the
1076     /// last expression in the given concatenation.
1077     #[inline(never)]
parse_counted_repetition( &self, mut concat: ast::Concat, ) -> Result<ast::Concat>1078     fn parse_counted_repetition(
1079         &self,
1080         mut concat: ast::Concat,
1081     ) -> Result<ast::Concat> {
1082         assert!(self.char() == '{');
1083         let start = self.pos();
1084         let ast = match concat.asts.pop() {
1085             Some(ast) => ast,
1086             None => {
1087                 return Err(
1088                     self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1089                 )
1090             }
1091         };
1092         match ast {
1093             Ast::Empty(_) | Ast::Flags(_) => {
1094                 return Err(
1095                     self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1096                 )
1097             }
1098             _ => {}
1099         }
1100         if !self.bump_and_bump_space() {
1101             return Err(self.error(
1102                 Span::new(start, self.pos()),
1103                 ast::ErrorKind::RepetitionCountUnclosed,
1104             ));
1105         }
1106         let count_start = specialize_err(
1107             self.parse_decimal(),
1108             ast::ErrorKind::DecimalEmpty,
1109             ast::ErrorKind::RepetitionCountDecimalEmpty,
1110         )?;
1111         let mut range = ast::RepetitionRange::Exactly(count_start);
1112         if self.is_eof() {
1113             return Err(self.error(
1114                 Span::new(start, self.pos()),
1115                 ast::ErrorKind::RepetitionCountUnclosed,
1116             ));
1117         }
1118         if self.char() == ',' {
1119             if !self.bump_and_bump_space() {
1120                 return Err(self.error(
1121                     Span::new(start, self.pos()),
1122                     ast::ErrorKind::RepetitionCountUnclosed,
1123                 ));
1124             }
1125             if self.char() != '}' {
1126                 let count_end = specialize_err(
1127                     self.parse_decimal(),
1128                     ast::ErrorKind::DecimalEmpty,
1129                     ast::ErrorKind::RepetitionCountDecimalEmpty,
1130                 )?;
1131                 range = ast::RepetitionRange::Bounded(count_start, count_end);
1132             } else {
1133                 range = ast::RepetitionRange::AtLeast(count_start);
1134             }
1135         }
1136         if self.is_eof() || self.char() != '}' {
1137             return Err(self.error(
1138                 Span::new(start, self.pos()),
1139                 ast::ErrorKind::RepetitionCountUnclosed,
1140             ));
1141         }
1142 
1143         let mut greedy = true;
1144         if self.bump_and_bump_space() && self.char() == '?' {
1145             greedy = false;
1146             self.bump();
1147         }
1148 
1149         let op_span = Span::new(start, self.pos());
1150         if !range.is_valid() {
1151             return Err(
1152                 self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
1153             );
1154         }
1155         concat.asts.push(Ast::Repetition(ast::Repetition {
1156             span: ast.span().with_end(self.pos()),
1157             op: ast::RepetitionOp {
1158                 span: op_span,
1159                 kind: ast::RepetitionKind::Range(range),
1160             },
1161             greedy,
1162             ast: Box::new(ast),
1163         }));
1164         Ok(concat)
1165     }
1166 
1167     /// Parse a group (which contains a sub-expression) or a set of flags.
1168     ///
1169     /// If a group was found, then it is returned with an empty AST. If a set
1170     /// of flags is found, then that set is returned.
1171     ///
1172     /// The parser should be positioned at the opening parenthesis.
1173     ///
1174     /// This advances the parser to the character before the start of the
1175     /// sub-expression (in the case of a group) or to the closing parenthesis
1176     /// immediately following the set of flags.
1177     ///
1178     /// # Errors
1179     ///
1180     /// If flags are given and incorrectly specified, then a corresponding
1181     /// error is returned.
1182     ///
1183     /// If a capture name is given and it is incorrectly specified, then a
1184     /// corresponding error is returned.
1185     #[inline(never)]
parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>>1186     fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1187         assert_eq!(self.char(), '(');
1188         let open_span = self.span_char();
1189         self.bump();
1190         self.bump_space();
1191         if self.is_lookaround_prefix() {
1192             return Err(self.error(
1193                 Span::new(open_span.start, self.span().end),
1194                 ast::ErrorKind::UnsupportedLookAround,
1195             ));
1196         }
1197         let inner_span = self.span();
1198         if self.bump_if("?P<") {
1199             let capture_index = self.next_capture_index(open_span)?;
1200             let cap = self.parse_capture_name(capture_index)?;
1201             Ok(Either::Right(ast::Group {
1202                 span: open_span,
1203                 kind: ast::GroupKind::CaptureName(cap),
1204                 ast: Box::new(Ast::Empty(self.span())),
1205             }))
1206         } else if self.bump_if("?") {
1207             if self.is_eof() {
1208                 return Err(
1209                     self.error(open_span, ast::ErrorKind::GroupUnclosed)
1210                 );
1211             }
1212             let flags = self.parse_flags()?;
1213             let char_end = self.char();
1214             self.bump();
1215             if char_end == ')' {
1216                 // We don't allow empty flags, e.g., `(?)`. We instead
1217                 // interpret it as a repetition operator missing its argument.
1218                 if flags.items.is_empty() {
1219                     return Err(self.error(
1220                         inner_span,
1221                         ast::ErrorKind::RepetitionMissing,
1222                     ));
1223                 }
1224                 Ok(Either::Left(ast::SetFlags {
1225                     span: Span { end: self.pos(), ..open_span },
1226                     flags,
1227                 }))
1228             } else {
1229                 assert_eq!(char_end, ':');
1230                 Ok(Either::Right(ast::Group {
1231                     span: open_span,
1232                     kind: ast::GroupKind::NonCapturing(flags),
1233                     ast: Box::new(Ast::Empty(self.span())),
1234                 }))
1235             }
1236         } else {
1237             let capture_index = self.next_capture_index(open_span)?;
1238             Ok(Either::Right(ast::Group {
1239                 span: open_span,
1240                 kind: ast::GroupKind::CaptureIndex(capture_index),
1241                 ast: Box::new(Ast::Empty(self.span())),
1242             }))
1243         }
1244     }
1245 
1246     /// Parses a capture group name. Assumes that the parser is positioned at
1247     /// the first character in the name following the opening `<` (and may
1248     /// possibly be EOF). This advances the parser to the first character
1249     /// following the closing `>`.
1250     ///
1251     /// The caller must provide the capture index of the group for this name.
1252     #[inline(never)]
parse_capture_name( &self, capture_index: u32, ) -> Result<ast::CaptureName>1253     fn parse_capture_name(
1254         &self,
1255         capture_index: u32,
1256     ) -> Result<ast::CaptureName> {
1257         if self.is_eof() {
1258             return Err(self
1259                 .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1260         }
1261         let start = self.pos();
1262         loop {
1263             if self.char() == '>' {
1264                 break;
1265             }
1266             if !is_capture_char(self.char(), self.pos() == start) {
1267                 return Err(self.error(
1268                     self.span_char(),
1269                     ast::ErrorKind::GroupNameInvalid,
1270                 ));
1271             }
1272             if !self.bump() {
1273                 break;
1274             }
1275         }
1276         let end = self.pos();
1277         if self.is_eof() {
1278             return Err(self
1279                 .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1280         }
1281         assert_eq!(self.char(), '>');
1282         self.bump();
1283         let name = &self.pattern()[start.offset..end.offset];
1284         if name.is_empty() {
1285             return Err(self.error(
1286                 Span::new(start, start),
1287                 ast::ErrorKind::GroupNameEmpty,
1288             ));
1289         }
1290         let capname = ast::CaptureName {
1291             span: Span::new(start, end),
1292             name: name.to_string(),
1293             index: capture_index,
1294         };
1295         self.add_capture_name(&capname)?;
1296         Ok(capname)
1297     }
1298 
1299     /// Parse a sequence of flags starting at the current character.
1300     ///
1301     /// This advances the parser to the character immediately following the
1302     /// flags, which is guaranteed to be either `:` or `)`.
1303     ///
1304     /// # Errors
1305     ///
1306     /// If any flags are duplicated, then an error is returned.
1307     ///
1308     /// If the negation operator is used more than once, then an error is
1309     /// returned.
1310     ///
1311     /// If no flags could be found or if the negation operation is not followed
1312     /// by any flags, then an error is returned.
1313     #[inline(never)]
parse_flags(&self) -> Result<ast::Flags>1314     fn parse_flags(&self) -> Result<ast::Flags> {
1315         let mut flags = ast::Flags { span: self.span(), items: vec![] };
1316         let mut last_was_negation = None;
1317         while self.char() != ':' && self.char() != ')' {
1318             if self.char() == '-' {
1319                 last_was_negation = Some(self.span_char());
1320                 let item = ast::FlagsItem {
1321                     span: self.span_char(),
1322                     kind: ast::FlagsItemKind::Negation,
1323                 };
1324                 if let Some(i) = flags.add_item(item) {
1325                     return Err(self.error(
1326                         self.span_char(),
1327                         ast::ErrorKind::FlagRepeatedNegation {
1328                             original: flags.items[i].span,
1329                         },
1330                     ));
1331                 }
1332             } else {
1333                 last_was_negation = None;
1334                 let item = ast::FlagsItem {
1335                     span: self.span_char(),
1336                     kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
1337                 };
1338                 if let Some(i) = flags.add_item(item) {
1339                     return Err(self.error(
1340                         self.span_char(),
1341                         ast::ErrorKind::FlagDuplicate {
1342                             original: flags.items[i].span,
1343                         },
1344                     ));
1345                 }
1346             }
1347             if !self.bump() {
1348                 return Err(
1349                     self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof)
1350                 );
1351             }
1352         }
1353         if let Some(span) = last_was_negation {
1354             return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
1355         }
1356         flags.span.end = self.pos();
1357         Ok(flags)
1358     }
1359 
1360     /// Parse the current character as a flag. Do not advance the parser.
1361     ///
1362     /// # Errors
1363     ///
1364     /// If the flag is not recognized, then an error is returned.
1365     #[inline(never)]
parse_flag(&self) -> Result<ast::Flag>1366     fn parse_flag(&self) -> Result<ast::Flag> {
1367         match self.char() {
1368             'i' => Ok(ast::Flag::CaseInsensitive),
1369             'm' => Ok(ast::Flag::MultiLine),
1370             's' => Ok(ast::Flag::DotMatchesNewLine),
1371             'U' => Ok(ast::Flag::SwapGreed),
1372             'u' => Ok(ast::Flag::Unicode),
1373             'x' => Ok(ast::Flag::IgnoreWhitespace),
1374             _ => {
1375                 Err(self
1376                     .error(self.span_char(), ast::ErrorKind::FlagUnrecognized))
1377             }
1378         }
1379     }
1380 
1381     /// Parse a primitive AST. e.g., A literal, non-set character class or
1382     /// assertion.
1383     ///
1384     /// This assumes that the parser expects a primitive at the current
1385     /// location. i.e., All other non-primitive cases have been handled.
1386     /// For example, if the parser's position is at `|`, then `|` will be
1387     /// treated as a literal (e.g., inside a character class).
1388     ///
1389     /// This advances the parser to the first character immediately following
1390     /// the primitive.
parse_primitive(&self) -> Result<Primitive>1391     fn parse_primitive(&self) -> Result<Primitive> {
1392         match self.char() {
1393             '\\' => self.parse_escape(),
1394             '.' => {
1395                 let ast = Primitive::Dot(self.span_char());
1396                 self.bump();
1397                 Ok(ast)
1398             }
1399             '^' => {
1400                 let ast = Primitive::Assertion(ast::Assertion {
1401                     span: self.span_char(),
1402                     kind: ast::AssertionKind::StartLine,
1403                 });
1404                 self.bump();
1405                 Ok(ast)
1406             }
1407             '$' => {
1408                 let ast = Primitive::Assertion(ast::Assertion {
1409                     span: self.span_char(),
1410                     kind: ast::AssertionKind::EndLine,
1411                 });
1412                 self.bump();
1413                 Ok(ast)
1414             }
1415             c => {
1416                 let ast = Primitive::Literal(ast::Literal {
1417                     span: self.span_char(),
1418                     kind: ast::LiteralKind::Verbatim,
1419                     c,
1420                 });
1421                 self.bump();
1422                 Ok(ast)
1423             }
1424         }
1425     }
1426 
1427     /// Parse an escape sequence as a primitive AST.
1428     ///
1429     /// This assumes the parser is positioned at the start of the escape
1430     /// sequence, i.e., `\`. It advances the parser to the first position
1431     /// immediately following the escape sequence.
1432     #[inline(never)]
parse_escape(&self) -> Result<Primitive>1433     fn parse_escape(&self) -> Result<Primitive> {
1434         assert_eq!(self.char(), '\\');
1435         let start = self.pos();
1436         if !self.bump() {
1437             return Err(self.error(
1438                 Span::new(start, self.pos()),
1439                 ast::ErrorKind::EscapeUnexpectedEof,
1440             ));
1441         }
1442         let c = self.char();
1443         // Put some of the more complicated routines into helpers.
1444         match c {
1445             '0'..='7' => {
1446                 if !self.parser().octal {
1447                     return Err(self.error(
1448                         Span::new(start, self.span_char().end),
1449                         ast::ErrorKind::UnsupportedBackreference,
1450                     ));
1451                 }
1452                 let mut lit = self.parse_octal();
1453                 lit.span.start = start;
1454                 return Ok(Primitive::Literal(lit));
1455             }
1456             '8'..='9' if !self.parser().octal => {
1457                 return Err(self.error(
1458                     Span::new(start, self.span_char().end),
1459                     ast::ErrorKind::UnsupportedBackreference,
1460                 ));
1461             }
1462             'x' | 'u' | 'U' => {
1463                 let mut lit = self.parse_hex()?;
1464                 lit.span.start = start;
1465                 return Ok(Primitive::Literal(lit));
1466             }
1467             'p' | 'P' => {
1468                 let mut cls = self.parse_unicode_class()?;
1469                 cls.span.start = start;
1470                 return Ok(Primitive::Unicode(cls));
1471             }
1472             'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
1473                 let mut cls = self.parse_perl_class();
1474                 cls.span.start = start;
1475                 return Ok(Primitive::Perl(cls));
1476             }
1477             _ => {}
1478         }
1479 
1480         // Handle all of the one letter sequences inline.
1481         self.bump();
1482         let span = Span::new(start, self.pos());
1483         if is_meta_character(c) {
1484             return Ok(Primitive::Literal(ast::Literal {
1485                 span,
1486                 kind: ast::LiteralKind::Punctuation,
1487                 c,
1488             }));
1489         }
1490         let special = |kind, c| {
1491             Ok(Primitive::Literal(ast::Literal {
1492                 span,
1493                 kind: ast::LiteralKind::Special(kind),
1494                 c,
1495             }))
1496         };
1497         match c {
1498             'a' => special(ast::SpecialLiteralKind::Bell, '\x07'),
1499             'f' => special(ast::SpecialLiteralKind::FormFeed, '\x0C'),
1500             't' => special(ast::SpecialLiteralKind::Tab, '\t'),
1501             'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'),
1502             'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'),
1503             'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'),
1504             ' ' if self.ignore_whitespace() => {
1505                 special(ast::SpecialLiteralKind::Space, ' ')
1506             }
1507             'A' => Ok(Primitive::Assertion(ast::Assertion {
1508                 span,
1509                 kind: ast::AssertionKind::StartText,
1510             })),
1511             'z' => Ok(Primitive::Assertion(ast::Assertion {
1512                 span,
1513                 kind: ast::AssertionKind::EndText,
1514             })),
1515             'b' => Ok(Primitive::Assertion(ast::Assertion {
1516                 span,
1517                 kind: ast::AssertionKind::WordBoundary,
1518             })),
1519             'B' => Ok(Primitive::Assertion(ast::Assertion {
1520                 span,
1521                 kind: ast::AssertionKind::NotWordBoundary,
1522             })),
1523             _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
1524         }
1525     }
1526 
1527     /// Parse an octal representation of a Unicode codepoint up to 3 digits
1528     /// long. This expects the parser to be positioned at the first octal
1529     /// digit and advances the parser to the first character immediately
1530     /// following the octal number. This also assumes that parsing octal
1531     /// escapes is enabled.
1532     ///
1533     /// Assuming the preconditions are met, this routine can never fail.
1534     #[inline(never)]
parse_octal(&self) -> ast::Literal1535     fn parse_octal(&self) -> ast::Literal {
1536         use std::char;
1537         use std::u32;
1538 
1539         assert!(self.parser().octal);
1540         assert!('0' <= self.char() && self.char() <= '7');
1541         let start = self.pos();
1542         // Parse up to two more digits.
1543         while self.bump()
1544             && '0' <= self.char()
1545             && self.char() <= '7'
1546             && self.pos().offset - start.offset <= 2
1547         {}
1548         let end = self.pos();
1549         let octal = &self.pattern()[start.offset..end.offset];
1550         // Parsing the octal should never fail since the above guarantees a
1551         // valid number.
1552         let codepoint =
1553             u32::from_str_radix(octal, 8).expect("valid octal number");
1554         // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
1555         // invalid Unicode scalar values.
1556         let c = char::from_u32(codepoint).expect("Unicode scalar value");
1557         ast::Literal {
1558             span: Span::new(start, end),
1559             kind: ast::LiteralKind::Octal,
1560             c,
1561         }
1562     }
1563 
1564     /// Parse a hex representation of a Unicode codepoint. This handles both
1565     /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
1566     /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
1567     /// the first character immediately following the hexadecimal literal.
1568     #[inline(never)]
parse_hex(&self) -> Result<ast::Literal>1569     fn parse_hex(&self) -> Result<ast::Literal> {
1570         assert!(
1571             self.char() == 'x' || self.char() == 'u' || self.char() == 'U'
1572         );
1573 
1574         let hex_kind = match self.char() {
1575             'x' => ast::HexLiteralKind::X,
1576             'u' => ast::HexLiteralKind::UnicodeShort,
1577             _ => ast::HexLiteralKind::UnicodeLong,
1578         };
1579         if !self.bump_and_bump_space() {
1580             return Err(
1581                 self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
1582             );
1583         }
1584         if self.char() == '{' {
1585             self.parse_hex_brace(hex_kind)
1586         } else {
1587             self.parse_hex_digits(hex_kind)
1588         }
1589     }
1590 
1591     /// Parse an N-digit hex representation of a Unicode codepoint. This
1592     /// expects the parser to be positioned at the first digit and will advance
1593     /// the parser to the first character immediately following the escape
1594     /// sequence.
1595     ///
1596     /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
1597     /// or 8 (for `\UNNNNNNNN`).
1598     #[inline(never)]
parse_hex_digits( &self, kind: ast::HexLiteralKind, ) -> Result<ast::Literal>1599     fn parse_hex_digits(
1600         &self,
1601         kind: ast::HexLiteralKind,
1602     ) -> Result<ast::Literal> {
1603         use std::char;
1604         use std::u32;
1605 
1606         let mut scratch = self.parser().scratch.borrow_mut();
1607         scratch.clear();
1608 
1609         let start = self.pos();
1610         for i in 0..kind.digits() {
1611             if i > 0 && !self.bump_and_bump_space() {
1612                 return Err(self
1613                     .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
1614             }
1615             if !is_hex(self.char()) {
1616                 return Err(self.error(
1617                     self.span_char(),
1618                     ast::ErrorKind::EscapeHexInvalidDigit,
1619                 ));
1620             }
1621             scratch.push(self.char());
1622         }
1623         // The final bump just moves the parser past the literal, which may
1624         // be EOF.
1625         self.bump_and_bump_space();
1626         let end = self.pos();
1627         let hex = scratch.as_str();
1628         match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1629             None => Err(self.error(
1630                 Span::new(start, end),
1631                 ast::ErrorKind::EscapeHexInvalid,
1632             )),
1633             Some(c) => Ok(ast::Literal {
1634                 span: Span::new(start, end),
1635                 kind: ast::LiteralKind::HexFixed(kind),
1636                 c,
1637             }),
1638         }
1639     }
1640 
1641     /// Parse a hex representation of any Unicode scalar value. This expects
1642     /// the parser to be positioned at the opening brace `{` and will advance
1643     /// the parser to the first character following the closing brace `}`.
1644     #[inline(never)]
parse_hex_brace( &self, kind: ast::HexLiteralKind, ) -> Result<ast::Literal>1645     fn parse_hex_brace(
1646         &self,
1647         kind: ast::HexLiteralKind,
1648     ) -> Result<ast::Literal> {
1649         use std::char;
1650         use std::u32;
1651 
1652         let mut scratch = self.parser().scratch.borrow_mut();
1653         scratch.clear();
1654 
1655         let brace_pos = self.pos();
1656         let start = self.span_char().end;
1657         while self.bump_and_bump_space() && self.char() != '}' {
1658             if !is_hex(self.char()) {
1659                 return Err(self.error(
1660                     self.span_char(),
1661                     ast::ErrorKind::EscapeHexInvalidDigit,
1662                 ));
1663             }
1664             scratch.push(self.char());
1665         }
1666         if self.is_eof() {
1667             return Err(self.error(
1668                 Span::new(brace_pos, self.pos()),
1669                 ast::ErrorKind::EscapeUnexpectedEof,
1670             ));
1671         }
1672         let end = self.pos();
1673         let hex = scratch.as_str();
1674         assert_eq!(self.char(), '}');
1675         self.bump_and_bump_space();
1676 
1677         if hex.is_empty() {
1678             return Err(self.error(
1679                 Span::new(brace_pos, self.pos()),
1680                 ast::ErrorKind::EscapeHexEmpty,
1681             ));
1682         }
1683         match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1684             None => Err(self.error(
1685                 Span::new(start, end),
1686                 ast::ErrorKind::EscapeHexInvalid,
1687             )),
1688             Some(c) => Ok(ast::Literal {
1689                 span: Span::new(start, self.pos()),
1690                 kind: ast::LiteralKind::HexBrace(kind),
1691                 c,
1692             }),
1693         }
1694     }
1695 
1696     /// Parse a decimal number into a u32 while trimming leading and trailing
1697     /// whitespace.
1698     ///
1699     /// This expects the parser to be positioned at the first position where
1700     /// a decimal digit could occur. This will advance the parser to the byte
1701     /// immediately following the last contiguous decimal digit.
1702     ///
1703     /// If no decimal digit could be found or if there was a problem parsing
1704     /// the complete set of digits into a u32, then an error is returned.
parse_decimal(&self) -> Result<u32>1705     fn parse_decimal(&self) -> Result<u32> {
1706         let mut scratch = self.parser().scratch.borrow_mut();
1707         scratch.clear();
1708 
1709         while !self.is_eof() && self.char().is_whitespace() {
1710             self.bump();
1711         }
1712         let start = self.pos();
1713         while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
1714             scratch.push(self.char());
1715             self.bump_and_bump_space();
1716         }
1717         let span = Span::new(start, self.pos());
1718         while !self.is_eof() && self.char().is_whitespace() {
1719             self.bump_and_bump_space();
1720         }
1721         let digits = scratch.as_str();
1722         if digits.is_empty() {
1723             return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
1724         }
1725         match u32::from_str_radix(digits, 10).ok() {
1726             Some(n) => Ok(n),
1727             None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
1728         }
1729     }
1730 
1731     /// Parse a standard character class consisting primarily of characters or
1732     /// character ranges, but can also contain nested character classes of
1733     /// any type (sans `.`).
1734     ///
1735     /// This assumes the parser is positioned at the opening `[`. If parsing
1736     /// is successful, then the parser is advanced to the position immediately
1737     /// following the closing `]`.
1738     #[inline(never)]
parse_set_class(&self) -> Result<ast::Class>1739     fn parse_set_class(&self) -> Result<ast::Class> {
1740         assert_eq!(self.char(), '[');
1741 
1742         let mut union =
1743             ast::ClassSetUnion { span: self.span(), items: vec![] };
1744         loop {
1745             self.bump_space();
1746             if self.is_eof() {
1747                 return Err(self.unclosed_class_error());
1748             }
1749             match self.char() {
1750                 '[' => {
1751                     // If we've already parsed the opening bracket, then
1752                     // attempt to treat this as the beginning of an ASCII
1753                     // class. If ASCII class parsing fails, then the parser
1754                     // backs up to `[`.
1755                     if !self.parser().stack_class.borrow().is_empty() {
1756                         if let Some(cls) = self.maybe_parse_ascii_class() {
1757                             union.push(ast::ClassSetItem::Ascii(cls));
1758                             continue;
1759                         }
1760                     }
1761                     union = self.push_class_open(union)?;
1762                 }
1763                 ']' => match self.pop_class(union)? {
1764                     Either::Left(nested_union) => {
1765                         union = nested_union;
1766                     }
1767                     Either::Right(class) => return Ok(class),
1768                 },
1769                 '&' if self.peek() == Some('&') => {
1770                     assert!(self.bump_if("&&"));
1771                     union = self.push_class_op(
1772                         ast::ClassSetBinaryOpKind::Intersection,
1773                         union,
1774                     );
1775                 }
1776                 '-' if self.peek() == Some('-') => {
1777                     assert!(self.bump_if("--"));
1778                     union = self.push_class_op(
1779                         ast::ClassSetBinaryOpKind::Difference,
1780                         union,
1781                     );
1782                 }
1783                 '~' if self.peek() == Some('~') => {
1784                     assert!(self.bump_if("~~"));
1785                     union = self.push_class_op(
1786                         ast::ClassSetBinaryOpKind::SymmetricDifference,
1787                         union,
1788                     );
1789                 }
1790                 _ => {
1791                     union.push(self.parse_set_class_range()?);
1792                 }
1793             }
1794         }
1795     }
1796 
1797     /// Parse a single primitive item in a character class set. The item to
1798     /// be parsed can either be one of a simple literal character, a range
1799     /// between two simple literal characters or a "primitive" character
1800     /// class like \w or \p{Greek}.
1801     ///
1802     /// If an invalid escape is found, or if a character class is found where
1803     /// a simple literal is expected (e.g., in a range), then an error is
1804     /// returned.
1805     #[inline(never)]
parse_set_class_range(&self) -> Result<ast::ClassSetItem>1806     fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
1807         let prim1 = self.parse_set_class_item()?;
1808         self.bump_space();
1809         if self.is_eof() {
1810             return Err(self.unclosed_class_error());
1811         }
1812         // If the next char isn't a `-`, then we don't have a range.
1813         // There are two exceptions. If the char after a `-` is a `]`, then
1814         // `-` is interpreted as a literal `-`. Alternatively, if the char
1815         // after a `-` is a `-`, then `--` corresponds to a "difference"
1816         // operation.
1817         if self.char() != '-'
1818             || self.peek_space() == Some(']')
1819             || self.peek_space() == Some('-')
1820         {
1821             return prim1.into_class_set_item(self);
1822         }
1823         // OK, now we're parsing a range, so bump past the `-` and parse the
1824         // second half of the range.
1825         if !self.bump_and_bump_space() {
1826             return Err(self.unclosed_class_error());
1827         }
1828         let prim2 = self.parse_set_class_item()?;
1829         let range = ast::ClassSetRange {
1830             span: Span::new(prim1.span().start, prim2.span().end),
1831             start: prim1.into_class_literal(self)?,
1832             end: prim2.into_class_literal(self)?,
1833         };
1834         if !range.is_valid() {
1835             return Err(
1836                 self.error(range.span, ast::ErrorKind::ClassRangeInvalid)
1837             );
1838         }
1839         Ok(ast::ClassSetItem::Range(range))
1840     }
1841 
1842     /// Parse a single item in a character class as a primitive, where the
1843     /// primitive either consists of a verbatim literal or a single escape
1844     /// sequence.
1845     ///
1846     /// This assumes the parser is positioned at the beginning of a primitive,
1847     /// and advances the parser to the first position after the primitive if
1848     /// successful.
1849     ///
1850     /// Note that it is the caller's responsibility to report an error if an
1851     /// illegal primitive was parsed.
1852     #[inline(never)]
parse_set_class_item(&self) -> Result<Primitive>1853     fn parse_set_class_item(&self) -> Result<Primitive> {
1854         if self.char() == '\\' {
1855             self.parse_escape()
1856         } else {
1857             let x = Primitive::Literal(ast::Literal {
1858                 span: self.span_char(),
1859                 kind: ast::LiteralKind::Verbatim,
1860                 c: self.char(),
1861             });
1862             self.bump();
1863             Ok(x)
1864         }
1865     }
1866 
1867     /// Parses the opening of a character class set. This includes the opening
1868     /// bracket along with `^` if present to indicate negation. This also
1869     /// starts parsing the opening set of unioned items if applicable, since
1870     /// there are special rules applied to certain characters in the opening
1871     /// of a character class. For example, `[^]]` is the class of all
1872     /// characters not equal to `]`. (`]` would need to be escaped in any other
1873     /// position.) Similarly for `-`.
1874     ///
1875     /// In all cases, the op inside the returned `ast::ClassBracketed` is an
1876     /// empty union. This empty union should be replaced with the actual item
1877     /// when it is popped from the parser's stack.
1878     ///
1879     /// This assumes the parser is positioned at the opening `[` and advances
1880     /// the parser to the first non-special byte of the character class.
1881     ///
1882     /// An error is returned if EOF is found.
1883     #[inline(never)]
parse_set_class_open( &self, ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)>1884     fn parse_set_class_open(
1885         &self,
1886     ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
1887         assert_eq!(self.char(), '[');
1888         let start = self.pos();
1889         if !self.bump_and_bump_space() {
1890             return Err(self.error(
1891                 Span::new(start, self.pos()),
1892                 ast::ErrorKind::ClassUnclosed,
1893             ));
1894         }
1895 
1896         let negated = if self.char() != '^' {
1897             false
1898         } else {
1899             if !self.bump_and_bump_space() {
1900                 return Err(self.error(
1901                     Span::new(start, self.pos()),
1902                     ast::ErrorKind::ClassUnclosed,
1903                 ));
1904             }
1905             true
1906         };
1907         // Accept any number of `-` as literal `-`.
1908         let mut union =
1909             ast::ClassSetUnion { span: self.span(), items: vec![] };
1910         while self.char() == '-' {
1911             union.push(ast::ClassSetItem::Literal(ast::Literal {
1912                 span: self.span_char(),
1913                 kind: ast::LiteralKind::Verbatim,
1914                 c: '-',
1915             }));
1916             if !self.bump_and_bump_space() {
1917                 return Err(self.error(
1918                     Span::new(start, start),
1919                     ast::ErrorKind::ClassUnclosed,
1920                 ));
1921             }
1922         }
1923         // If `]` is the *first* char in a set, then interpret it as a literal
1924         // `]`. That is, an empty class is impossible to write.
1925         if union.items.is_empty() && self.char() == ']' {
1926             union.push(ast::ClassSetItem::Literal(ast::Literal {
1927                 span: self.span_char(),
1928                 kind: ast::LiteralKind::Verbatim,
1929                 c: ']',
1930             }));
1931             if !self.bump_and_bump_space() {
1932                 return Err(self.error(
1933                     Span::new(start, self.pos()),
1934                     ast::ErrorKind::ClassUnclosed,
1935                 ));
1936             }
1937         }
1938         let set = ast::ClassBracketed {
1939             span: Span::new(start, self.pos()),
1940             negated,
1941             kind: ast::ClassSet::union(ast::ClassSetUnion {
1942                 span: Span::new(union.span.start, union.span.start),
1943                 items: vec![],
1944             }),
1945         };
1946         Ok((set, union))
1947     }
1948 
1949     /// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
1950     ///
1951     /// This assumes the parser is positioned at the opening `[`.
1952     ///
1953     /// If no valid ASCII character class could be found, then this does not
1954     /// advance the parser and `None` is returned. Otherwise, the parser is
1955     /// advanced to the first byte following the closing `]` and the
1956     /// corresponding ASCII class is returned.
1957     #[inline(never)]
maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii>1958     fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
1959         // ASCII character classes are interesting from a parsing perspective
1960         // because parsing cannot fail with any interesting error. For example,
1961         // in order to use an ASCII character class, it must be enclosed in
1962         // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
1963         // of it as "ASCII character characters have the syntax `[:NAME:]`
1964         // which can only appear within character brackets." This means that
1965         // things like `[[:lower:]A]` are legal constructs.
1966         //
1967         // However, if one types an incorrect ASCII character class, e.g.,
1968         // `[[:loower:]]`, then we treat that as a normal nested character
1969         // class containing the characters `:elorw`. One might argue that we
1970         // should return an error instead since the repeated colons give away
1971         // the intent to write an ASCII class. But what if the user typed
1972         // `[[:lower]]` instead? How can we tell that was intended to be an
1973         // ASCII class and not just a normal nested class?
1974         //
1975         // Reasonable people can probably disagree over this, but for better
1976         // or worse, we implement semantics that never fails at the expense
1977         // of better failure modes.
1978         assert_eq!(self.char(), '[');
1979         // If parsing fails, then we back up the parser to this starting point.
1980         let start = self.pos();
1981         let mut negated = false;
1982         if !self.bump() || self.char() != ':' {
1983             self.parser().pos.set(start);
1984             return None;
1985         }
1986         if !self.bump() {
1987             self.parser().pos.set(start);
1988             return None;
1989         }
1990         if self.char() == '^' {
1991             negated = true;
1992             if !self.bump() {
1993                 self.parser().pos.set(start);
1994                 return None;
1995             }
1996         }
1997         let name_start = self.offset();
1998         while self.char() != ':' && self.bump() {}
1999         if self.is_eof() {
2000             self.parser().pos.set(start);
2001             return None;
2002         }
2003         let name = &self.pattern()[name_start..self.offset()];
2004         if !self.bump_if(":]") {
2005             self.parser().pos.set(start);
2006             return None;
2007         }
2008         let kind = match ast::ClassAsciiKind::from_name(name) {
2009             Some(kind) => kind,
2010             None => {
2011                 self.parser().pos.set(start);
2012                 return None;
2013             }
2014         };
2015         Some(ast::ClassAscii {
2016             span: Span::new(start, self.pos()),
2017             kind,
2018             negated,
2019         })
2020     }
2021 
2022     /// Parse a Unicode class in either the single character notation, `\pN`
2023     /// or the multi-character bracketed notation, `\p{Greek}`. This assumes
2024     /// the parser is positioned at the `p` (or `P` for negation) and will
2025     /// advance the parser to the character immediately following the class.
2026     ///
2027     /// Note that this does not check whether the class name is valid or not.
2028     #[inline(never)]
parse_unicode_class(&self) -> Result<ast::ClassUnicode>2029     fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
2030         assert!(self.char() == 'p' || self.char() == 'P');
2031 
2032         let mut scratch = self.parser().scratch.borrow_mut();
2033         scratch.clear();
2034 
2035         let negated = self.char() == 'P';
2036         if !self.bump_and_bump_space() {
2037             return Err(
2038                 self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
2039             );
2040         }
2041         let (start, kind) = if self.char() == '{' {
2042             let start = self.span_char().end;
2043             while self.bump_and_bump_space() && self.char() != '}' {
2044                 scratch.push(self.char());
2045             }
2046             if self.is_eof() {
2047                 return Err(self
2048                     .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2049             }
2050             assert_eq!(self.char(), '}');
2051             self.bump();
2052 
2053             let name = scratch.as_str();
2054             if let Some(i) = name.find("!=") {
2055                 (
2056                     start,
2057                     ast::ClassUnicodeKind::NamedValue {
2058                         op: ast::ClassUnicodeOpKind::NotEqual,
2059                         name: name[..i].to_string(),
2060                         value: name[i + 2..].to_string(),
2061                     },
2062                 )
2063             } else if let Some(i) = name.find(':') {
2064                 (
2065                     start,
2066                     ast::ClassUnicodeKind::NamedValue {
2067                         op: ast::ClassUnicodeOpKind::Colon,
2068                         name: name[..i].to_string(),
2069                         value: name[i + 1..].to_string(),
2070                     },
2071                 )
2072             } else if let Some(i) = name.find('=') {
2073                 (
2074                     start,
2075                     ast::ClassUnicodeKind::NamedValue {
2076                         op: ast::ClassUnicodeOpKind::Equal,
2077                         name: name[..i].to_string(),
2078                         value: name[i + 1..].to_string(),
2079                     },
2080                 )
2081             } else {
2082                 (start, ast::ClassUnicodeKind::Named(name.to_string()))
2083             }
2084         } else {
2085             let start = self.pos();
2086             let c = self.char();
2087             if c == '\\' {
2088                 return Err(self.error(
2089                     self.span_char(),
2090                     ast::ErrorKind::UnicodeClassInvalid,
2091                 ));
2092             }
2093             self.bump_and_bump_space();
2094             let kind = ast::ClassUnicodeKind::OneLetter(c);
2095             (start, kind)
2096         };
2097         Ok(ast::ClassUnicode {
2098             span: Span::new(start, self.pos()),
2099             negated,
2100             kind,
2101         })
2102     }
2103 
2104     /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
2105     /// parser is currently at a valid character class name and will be
2106     /// advanced to the character immediately following the class.
2107     #[inline(never)]
parse_perl_class(&self) -> ast::ClassPerl2108     fn parse_perl_class(&self) -> ast::ClassPerl {
2109         let c = self.char();
2110         let span = self.span_char();
2111         self.bump();
2112         let (negated, kind) = match c {
2113             'd' => (false, ast::ClassPerlKind::Digit),
2114             'D' => (true, ast::ClassPerlKind::Digit),
2115             's' => (false, ast::ClassPerlKind::Space),
2116             'S' => (true, ast::ClassPerlKind::Space),
2117             'w' => (false, ast::ClassPerlKind::Word),
2118             'W' => (true, ast::ClassPerlKind::Word),
2119             c => panic!("expected valid Perl class but got '{}'", c),
2120         };
2121         ast::ClassPerl { span, kind, negated }
2122     }
2123 }
2124 
2125 /// A type that traverses a fully parsed Ast and checks whether its depth
2126 /// exceeds the specified nesting limit. If it does, then an error is returned.
2127 #[derive(Debug)]
2128 struct NestLimiter<'p, 's, P> {
2129     /// The parser that is checking the nest limit.
2130     p: &'p ParserI<'s, P>,
2131     /// The current depth while walking an Ast.
2132     depth: u32,
2133 }
2134 
2135 impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P>2136     fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
2137         NestLimiter { p, depth: 0 }
2138     }
2139 
2140     #[inline(never)]
check(self, ast: &Ast) -> Result<()>2141     fn check(self, ast: &Ast) -> Result<()> {
2142         ast::visit(ast, self)
2143     }
2144 
increment_depth(&mut self, span: &Span) -> Result<()>2145     fn increment_depth(&mut self, span: &Span) -> Result<()> {
2146         let new = self.depth.checked_add(1).ok_or_else(|| {
2147             self.p.error(
2148                 span.clone(),
2149                 ast::ErrorKind::NestLimitExceeded(::std::u32::MAX),
2150             )
2151         })?;
2152         let limit = self.p.parser().nest_limit;
2153         if new > limit {
2154             return Err(self.p.error(
2155                 span.clone(),
2156                 ast::ErrorKind::NestLimitExceeded(limit),
2157             ));
2158         }
2159         self.depth = new;
2160         Ok(())
2161     }
2162 
decrement_depth(&mut self)2163     fn decrement_depth(&mut self) {
2164         // Assuming the correctness of the visitor, this should never drop
2165         // below 0.
2166         self.depth = self.depth.checked_sub(1).unwrap();
2167     }
2168 }
2169 
2170 impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
2171     type Output = ();
2172     type Err = ast::Error;
2173 
finish(self) -> Result<()>2174     fn finish(self) -> Result<()> {
2175         Ok(())
2176     }
2177 
visit_pre(&mut self, ast: &Ast) -> Result<()>2178     fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
2179         let span = match *ast {
2180             Ast::Empty(_)
2181             | Ast::Flags(_)
2182             | Ast::Literal(_)
2183             | Ast::Dot(_)
2184             | Ast::Assertion(_)
2185             | Ast::Class(ast::Class::Unicode(_))
2186             | Ast::Class(ast::Class::Perl(_)) => {
2187                 // These are all base cases, so we don't increment depth.
2188                 return Ok(());
2189             }
2190             Ast::Class(ast::Class::Bracketed(ref x)) => &x.span,
2191             Ast::Repetition(ref x) => &x.span,
2192             Ast::Group(ref x) => &x.span,
2193             Ast::Alternation(ref x) => &x.span,
2194             Ast::Concat(ref x) => &x.span,
2195         };
2196         self.increment_depth(span)
2197     }
2198 
visit_post(&mut self, ast: &Ast) -> Result<()>2199     fn visit_post(&mut self, ast: &Ast) -> Result<()> {
2200         match *ast {
2201             Ast::Empty(_)
2202             | Ast::Flags(_)
2203             | Ast::Literal(_)
2204             | Ast::Dot(_)
2205             | Ast::Assertion(_)
2206             | Ast::Class(ast::Class::Unicode(_))
2207             | Ast::Class(ast::Class::Perl(_)) => {
2208                 // These are all base cases, so we don't decrement depth.
2209                 Ok(())
2210             }
2211             Ast::Class(ast::Class::Bracketed(_))
2212             | Ast::Repetition(_)
2213             | Ast::Group(_)
2214             | Ast::Alternation(_)
2215             | Ast::Concat(_) => {
2216                 self.decrement_depth();
2217                 Ok(())
2218             }
2219         }
2220     }
2221 
visit_class_set_item_pre( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>2222     fn visit_class_set_item_pre(
2223         &mut self,
2224         ast: &ast::ClassSetItem,
2225     ) -> Result<()> {
2226         let span = match *ast {
2227             ast::ClassSetItem::Empty(_)
2228             | ast::ClassSetItem::Literal(_)
2229             | ast::ClassSetItem::Range(_)
2230             | ast::ClassSetItem::Ascii(_)
2231             | ast::ClassSetItem::Unicode(_)
2232             | ast::ClassSetItem::Perl(_) => {
2233                 // These are all base cases, so we don't increment depth.
2234                 return Ok(());
2235             }
2236             ast::ClassSetItem::Bracketed(ref x) => &x.span,
2237             ast::ClassSetItem::Union(ref x) => &x.span,
2238         };
2239         self.increment_depth(span)
2240     }
2241 
visit_class_set_item_post( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>2242     fn visit_class_set_item_post(
2243         &mut self,
2244         ast: &ast::ClassSetItem,
2245     ) -> Result<()> {
2246         match *ast {
2247             ast::ClassSetItem::Empty(_)
2248             | ast::ClassSetItem::Literal(_)
2249             | ast::ClassSetItem::Range(_)
2250             | ast::ClassSetItem::Ascii(_)
2251             | ast::ClassSetItem::Unicode(_)
2252             | ast::ClassSetItem::Perl(_) => {
2253                 // These are all base cases, so we don't decrement depth.
2254                 Ok(())
2255             }
2256             ast::ClassSetItem::Bracketed(_) | ast::ClassSetItem::Union(_) => {
2257                 self.decrement_depth();
2258                 Ok(())
2259             }
2260         }
2261     }
2262 
visit_class_set_binary_op_pre( &mut self, ast: &ast::ClassSetBinaryOp, ) -> Result<()>2263     fn visit_class_set_binary_op_pre(
2264         &mut self,
2265         ast: &ast::ClassSetBinaryOp,
2266     ) -> Result<()> {
2267         self.increment_depth(&ast.span)
2268     }
2269 
visit_class_set_binary_op_post( &mut self, _ast: &ast::ClassSetBinaryOp, ) -> Result<()>2270     fn visit_class_set_binary_op_post(
2271         &mut self,
2272         _ast: &ast::ClassSetBinaryOp,
2273     ) -> Result<()> {
2274         self.decrement_depth();
2275         Ok(())
2276     }
2277 }
2278 
2279 /// When the result is an error, transforms the ast::ErrorKind from the source
2280 /// Result into another one. This function is used to return clearer error
2281 /// messages when possible.
specialize_err<T>( result: Result<T>, from: ast::ErrorKind, to: ast::ErrorKind, ) -> Result<T>2282 fn specialize_err<T>(
2283     result: Result<T>,
2284     from: ast::ErrorKind,
2285     to: ast::ErrorKind,
2286 ) -> Result<T> {
2287     if let Err(e) = result {
2288         if e.kind == from {
2289             Err(ast::Error { kind: to, pattern: e.pattern, span: e.span })
2290         } else {
2291             Err(e)
2292         }
2293     } else {
2294         result
2295     }
2296 }
2297 
2298 #[cfg(test)]
2299 mod tests {
2300     use std::ops::Range;
2301 
2302     use super::{Parser, ParserBuilder, ParserI, Primitive};
2303     use crate::ast::{self, Ast, Position, Span};
2304 
2305     // Our own assert_eq, which has slightly better formatting (but honestly
2306     // still kind of crappy).
2307     macro_rules! assert_eq {
2308         ($left:expr, $right:expr) => {{
2309             match (&$left, &$right) {
2310                 (left_val, right_val) => {
2311                     if !(*left_val == *right_val) {
2312                         panic!(
2313                             "assertion failed: `(left == right)`\n\n\
2314                              left:  `{:?}`\nright: `{:?}`\n\n",
2315                             left_val, right_val
2316                         )
2317                     }
2318                 }
2319             }
2320         }};
2321     }
2322 
2323     // We create these errors to compare with real ast::Errors in the tests.
2324     // We define equality between TestError and ast::Error to disregard the
2325     // pattern string in ast::Error, which is annoying to provide in tests.
2326     #[derive(Clone, Debug)]
2327     struct TestError {
2328         span: Span,
2329         kind: ast::ErrorKind,
2330     }
2331 
2332     impl PartialEq<ast::Error> for TestError {
eq(&self, other: &ast::Error) -> bool2333         fn eq(&self, other: &ast::Error) -> bool {
2334             self.span == other.span && self.kind == other.kind
2335         }
2336     }
2337 
2338     impl PartialEq<TestError> for ast::Error {
eq(&self, other: &TestError) -> bool2339         fn eq(&self, other: &TestError) -> bool {
2340             self.span == other.span && self.kind == other.kind
2341         }
2342     }
2343 
s(str: &str) -> String2344     fn s(str: &str) -> String {
2345         str.to_string()
2346     }
2347 
parser(pattern: &str) -> ParserI<'_, Parser>2348     fn parser(pattern: &str) -> ParserI<'_, Parser> {
2349         ParserI::new(Parser::new(), pattern)
2350     }
2351 
parser_octal(pattern: &str) -> ParserI<'_, Parser>2352     fn parser_octal(pattern: &str) -> ParserI<'_, Parser> {
2353         let parser = ParserBuilder::new().octal(true).build();
2354         ParserI::new(parser, pattern)
2355     }
2356 
parser_nest_limit( pattern: &str, nest_limit: u32, ) -> ParserI<'_, Parser>2357     fn parser_nest_limit(
2358         pattern: &str,
2359         nest_limit: u32,
2360     ) -> ParserI<'_, Parser> {
2361         let p = ParserBuilder::new().nest_limit(nest_limit).build();
2362         ParserI::new(p, pattern)
2363     }
2364 
parser_ignore_whitespace(pattern: &str) -> ParserI<'_, Parser>2365     fn parser_ignore_whitespace(pattern: &str) -> ParserI<'_, Parser> {
2366         let p = ParserBuilder::new().ignore_whitespace(true).build();
2367         ParserI::new(p, pattern)
2368     }
2369 
2370     /// Short alias for creating a new span.
nspan(start: Position, end: Position) -> Span2371     fn nspan(start: Position, end: Position) -> Span {
2372         Span::new(start, end)
2373     }
2374 
2375     /// Short alias for creating a new position.
npos(offset: usize, line: usize, column: usize) -> Position2376     fn npos(offset: usize, line: usize, column: usize) -> Position {
2377         Position::new(offset, line, column)
2378     }
2379 
2380     /// Create a new span from the given offset range. This assumes a single
2381     /// line and sets the columns based on the offsets. i.e., This only works
2382     /// out of the box for ASCII, which is fine for most tests.
span(range: Range<usize>) -> Span2383     fn span(range: Range<usize>) -> Span {
2384         let start = Position::new(range.start, 1, range.start + 1);
2385         let end = Position::new(range.end, 1, range.end + 1);
2386         Span::new(start, end)
2387     }
2388 
2389     /// Create a new span for the corresponding byte range in the given string.
span_range(subject: &str, range: Range<usize>) -> Span2390     fn span_range(subject: &str, range: Range<usize>) -> Span {
2391         let start = Position {
2392             offset: range.start,
2393             line: 1 + subject[..range.start].matches('\n').count(),
2394             column: 1 + subject[..range.start]
2395                 .chars()
2396                 .rev()
2397                 .position(|c| c == '\n')
2398                 .unwrap_or(subject[..range.start].chars().count()),
2399         };
2400         let end = Position {
2401             offset: range.end,
2402             line: 1 + subject[..range.end].matches('\n').count(),
2403             column: 1 + subject[..range.end]
2404                 .chars()
2405                 .rev()
2406                 .position(|c| c == '\n')
2407                 .unwrap_or(subject[..range.end].chars().count()),
2408         };
2409         Span::new(start, end)
2410     }
2411 
2412     /// Create a verbatim literal starting at the given position.
lit(c: char, start: usize) -> Ast2413     fn lit(c: char, start: usize) -> Ast {
2414         lit_with(c, span(start..start + c.len_utf8()))
2415     }
2416 
2417     /// Create a punctuation literal starting at the given position.
punct_lit(c: char, span: Span) -> Ast2418     fn punct_lit(c: char, span: Span) -> Ast {
2419         Ast::Literal(ast::Literal {
2420             span,
2421             kind: ast::LiteralKind::Punctuation,
2422             c,
2423         })
2424     }
2425 
2426     /// Create a verbatim literal with the given span.
lit_with(c: char, span: Span) -> Ast2427     fn lit_with(c: char, span: Span) -> Ast {
2428         Ast::Literal(ast::Literal {
2429             span,
2430             kind: ast::LiteralKind::Verbatim,
2431             c,
2432         })
2433     }
2434 
2435     /// Create a concatenation with the given range.
concat(range: Range<usize>, asts: Vec<Ast>) -> Ast2436     fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2437         concat_with(span(range), asts)
2438     }
2439 
2440     /// Create a concatenation with the given span.
concat_with(span: Span, asts: Vec<Ast>) -> Ast2441     fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
2442         Ast::Concat(ast::Concat { span, asts })
2443     }
2444 
2445     /// Create an alternation with the given span.
alt(range: Range<usize>, asts: Vec<Ast>) -> Ast2446     fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2447         Ast::Alternation(ast::Alternation { span: span(range), asts })
2448     }
2449 
2450     /// Create a capturing group with the given span.
group(range: Range<usize>, index: u32, ast: Ast) -> Ast2451     fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
2452         Ast::Group(ast::Group {
2453             span: span(range),
2454             kind: ast::GroupKind::CaptureIndex(index),
2455             ast: Box::new(ast),
2456         })
2457     }
2458 
2459     /// Create an ast::SetFlags.
2460     ///
2461     /// The given pattern should be the full pattern string. The range given
2462     /// should correspond to the byte offsets where the flag set occurs.
2463     ///
2464     /// If negated is true, then the set is interpreted as beginning with a
2465     /// negation.
flag_set( pat: &str, range: Range<usize>, flag: ast::Flag, negated: bool, ) -> Ast2466     fn flag_set(
2467         pat: &str,
2468         range: Range<usize>,
2469         flag: ast::Flag,
2470         negated: bool,
2471     ) -> Ast {
2472         let mut items = vec![ast::FlagsItem {
2473             span: span_range(pat, (range.end - 2)..(range.end - 1)),
2474             kind: ast::FlagsItemKind::Flag(flag),
2475         }];
2476         if negated {
2477             items.insert(
2478                 0,
2479                 ast::FlagsItem {
2480                     span: span_range(pat, (range.start + 2)..(range.end - 2)),
2481                     kind: ast::FlagsItemKind::Negation,
2482                 },
2483             );
2484         }
2485         Ast::Flags(ast::SetFlags {
2486             span: span_range(pat, range.clone()),
2487             flags: ast::Flags {
2488                 span: span_range(pat, (range.start + 2)..(range.end - 1)),
2489                 items,
2490             },
2491         })
2492     }
2493 
2494     #[test]
parse_nest_limit()2495     fn parse_nest_limit() {
2496         // A nest limit of 0 still allows some types of regexes.
2497         assert_eq!(
2498             parser_nest_limit("", 0).parse(),
2499             Ok(Ast::Empty(span(0..0)))
2500         );
2501         assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0)));
2502 
2503         // Test repetition operations, which require one level of nesting.
2504         assert_eq!(
2505             parser_nest_limit("a+", 0).parse().unwrap_err(),
2506             TestError {
2507                 span: span(0..2),
2508                 kind: ast::ErrorKind::NestLimitExceeded(0),
2509             }
2510         );
2511         assert_eq!(
2512             parser_nest_limit("a+", 1).parse(),
2513             Ok(Ast::Repetition(ast::Repetition {
2514                 span: span(0..2),
2515                 op: ast::RepetitionOp {
2516                     span: span(1..2),
2517                     kind: ast::RepetitionKind::OneOrMore,
2518                 },
2519                 greedy: true,
2520                 ast: Box::new(lit('a', 0)),
2521             }))
2522         );
2523         assert_eq!(
2524             parser_nest_limit("(a)+", 1).parse().unwrap_err(),
2525             TestError {
2526                 span: span(0..3),
2527                 kind: ast::ErrorKind::NestLimitExceeded(1),
2528             }
2529         );
2530         assert_eq!(
2531             parser_nest_limit("a+*", 1).parse().unwrap_err(),
2532             TestError {
2533                 span: span(0..2),
2534                 kind: ast::ErrorKind::NestLimitExceeded(1),
2535             }
2536         );
2537         assert_eq!(
2538             parser_nest_limit("a+*", 2).parse(),
2539             Ok(Ast::Repetition(ast::Repetition {
2540                 span: span(0..3),
2541                 op: ast::RepetitionOp {
2542                     span: span(2..3),
2543                     kind: ast::RepetitionKind::ZeroOrMore,
2544                 },
2545                 greedy: true,
2546                 ast: Box::new(Ast::Repetition(ast::Repetition {
2547                     span: span(0..2),
2548                     op: ast::RepetitionOp {
2549                         span: span(1..2),
2550                         kind: ast::RepetitionKind::OneOrMore,
2551                     },
2552                     greedy: true,
2553                     ast: Box::new(lit('a', 0)),
2554                 })),
2555             }))
2556         );
2557 
2558         // Test concatenations. A concatenation requires one level of nesting.
2559         assert_eq!(
2560             parser_nest_limit("ab", 0).parse().unwrap_err(),
2561             TestError {
2562                 span: span(0..2),
2563                 kind: ast::ErrorKind::NestLimitExceeded(0),
2564             }
2565         );
2566         assert_eq!(
2567             parser_nest_limit("ab", 1).parse(),
2568             Ok(concat(0..2, vec![lit('a', 0), lit('b', 1)]))
2569         );
2570         assert_eq!(
2571             parser_nest_limit("abc", 1).parse(),
2572             Ok(concat(0..3, vec![lit('a', 0), lit('b', 1), lit('c', 2)]))
2573         );
2574 
2575         // Test alternations. An alternation requires one level of nesting.
2576         assert_eq!(
2577             parser_nest_limit("a|b", 0).parse().unwrap_err(),
2578             TestError {
2579                 span: span(0..3),
2580                 kind: ast::ErrorKind::NestLimitExceeded(0),
2581             }
2582         );
2583         assert_eq!(
2584             parser_nest_limit("a|b", 1).parse(),
2585             Ok(alt(0..3, vec![lit('a', 0), lit('b', 2)]))
2586         );
2587         assert_eq!(
2588             parser_nest_limit("a|b|c", 1).parse(),
2589             Ok(alt(0..5, vec![lit('a', 0), lit('b', 2), lit('c', 4)]))
2590         );
2591 
2592         // Test character classes. Classes form their own mini-recursive
2593         // syntax!
2594         assert_eq!(
2595             parser_nest_limit("[a]", 0).parse().unwrap_err(),
2596             TestError {
2597                 span: span(0..3),
2598                 kind: ast::ErrorKind::NestLimitExceeded(0),
2599             }
2600         );
2601         assert_eq!(
2602             parser_nest_limit("[a]", 1).parse(),
2603             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
2604                 span: span(0..3),
2605                 negated: false,
2606                 kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
2607                     ast::Literal {
2608                         span: span(1..2),
2609                         kind: ast::LiteralKind::Verbatim,
2610                         c: 'a',
2611                     }
2612                 )),
2613             })))
2614         );
2615         assert_eq!(
2616             parser_nest_limit("[ab]", 1).parse().unwrap_err(),
2617             TestError {
2618                 span: span(1..3),
2619                 kind: ast::ErrorKind::NestLimitExceeded(1),
2620             }
2621         );
2622         assert_eq!(
2623             parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(),
2624             TestError {
2625                 span: span(3..7),
2626                 kind: ast::ErrorKind::NestLimitExceeded(2),
2627             }
2628         );
2629         assert_eq!(
2630             parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(),
2631             TestError {
2632                 span: span(4..6),
2633                 kind: ast::ErrorKind::NestLimitExceeded(3),
2634             }
2635         );
2636         assert_eq!(
2637             parser_nest_limit("[a--b]", 1).parse().unwrap_err(),
2638             TestError {
2639                 span: span(1..5),
2640                 kind: ast::ErrorKind::NestLimitExceeded(1),
2641             }
2642         );
2643         assert_eq!(
2644             parser_nest_limit("[a--bc]", 2).parse().unwrap_err(),
2645             TestError {
2646                 span: span(4..6),
2647                 kind: ast::ErrorKind::NestLimitExceeded(2),
2648             }
2649         );
2650     }
2651 
2652     #[test]
parse_comments()2653     fn parse_comments() {
2654         let pat = "(?x)
2655 # This is comment 1.
2656 foo # This is comment 2.
2657   # This is comment 3.
2658 bar
2659 # This is comment 4.";
2660         let astc = parser(pat).parse_with_comments().unwrap();
2661         assert_eq!(
2662             astc.ast,
2663             concat_with(
2664                 span_range(pat, 0..pat.len()),
2665                 vec![
2666                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2667                     lit_with('f', span_range(pat, 26..27)),
2668                     lit_with('o', span_range(pat, 27..28)),
2669                     lit_with('o', span_range(pat, 28..29)),
2670                     lit_with('b', span_range(pat, 74..75)),
2671                     lit_with('a', span_range(pat, 75..76)),
2672                     lit_with('r', span_range(pat, 76..77)),
2673                 ]
2674             )
2675         );
2676         assert_eq!(
2677             astc.comments,
2678             vec![
2679                 ast::Comment {
2680                     span: span_range(pat, 5..26),
2681                     comment: s(" This is comment 1."),
2682                 },
2683                 ast::Comment {
2684                     span: span_range(pat, 30..51),
2685                     comment: s(" This is comment 2."),
2686                 },
2687                 ast::Comment {
2688                     span: span_range(pat, 53..74),
2689                     comment: s(" This is comment 3."),
2690                 },
2691                 ast::Comment {
2692                     span: span_range(pat, 78..98),
2693                     comment: s(" This is comment 4."),
2694                 },
2695             ]
2696         );
2697     }
2698 
2699     #[test]
parse_holistic()2700     fn parse_holistic() {
2701         assert_eq!(parser("]").parse(), Ok(lit(']', 0)));
2702         assert_eq!(
2703             parser(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(),
2704             Ok(concat(
2705                 0..36,
2706                 vec![
2707                     punct_lit('\\', span(0..2)),
2708                     punct_lit('.', span(2..4)),
2709                     punct_lit('+', span(4..6)),
2710                     punct_lit('*', span(6..8)),
2711                     punct_lit('?', span(8..10)),
2712                     punct_lit('(', span(10..12)),
2713                     punct_lit(')', span(12..14)),
2714                     punct_lit('|', span(14..16)),
2715                     punct_lit('[', span(16..18)),
2716                     punct_lit(']', span(18..20)),
2717                     punct_lit('{', span(20..22)),
2718                     punct_lit('}', span(22..24)),
2719                     punct_lit('^', span(24..26)),
2720                     punct_lit('$', span(26..28)),
2721                     punct_lit('#', span(28..30)),
2722                     punct_lit('&', span(30..32)),
2723                     punct_lit('-', span(32..34)),
2724                     punct_lit('~', span(34..36)),
2725                 ]
2726             ))
2727         );
2728     }
2729 
2730     #[test]
parse_ignore_whitespace()2731     fn parse_ignore_whitespace() {
2732         // Test that basic whitespace insensitivity works.
2733         let pat = "(?x)a b";
2734         assert_eq!(
2735             parser(pat).parse(),
2736             Ok(concat_with(
2737                 nspan(npos(0, 1, 1), npos(7, 1, 8)),
2738                 vec![
2739                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2740                     lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2741                     lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2742                 ]
2743             ))
2744         );
2745 
2746         // Test that we can toggle whitespace insensitivity.
2747         let pat = "(?x)a b(?-x)a b";
2748         assert_eq!(
2749             parser(pat).parse(),
2750             Ok(concat_with(
2751                 nspan(npos(0, 1, 1), npos(15, 1, 16)),
2752                 vec![
2753                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2754                     lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2755                     lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2756                     flag_set(pat, 7..12, ast::Flag::IgnoreWhitespace, true),
2757                     lit_with('a', nspan(npos(12, 1, 13), npos(13, 1, 14))),
2758                     lit_with(' ', nspan(npos(13, 1, 14), npos(14, 1, 15))),
2759                     lit_with('b', nspan(npos(14, 1, 15), npos(15, 1, 16))),
2760                 ]
2761             ))
2762         );
2763 
2764         // Test that nesting whitespace insensitive flags works.
2765         let pat = "a (?x:a )a ";
2766         assert_eq!(
2767             parser(pat).parse(),
2768             Ok(concat_with(
2769                 span_range(pat, 0..11),
2770                 vec![
2771                     lit_with('a', span_range(pat, 0..1)),
2772                     lit_with(' ', span_range(pat, 1..2)),
2773                     Ast::Group(ast::Group {
2774                         span: span_range(pat, 2..9),
2775                         kind: ast::GroupKind::NonCapturing(ast::Flags {
2776                             span: span_range(pat, 4..5),
2777                             items: vec![ast::FlagsItem {
2778                                 span: span_range(pat, 4..5),
2779                                 kind: ast::FlagsItemKind::Flag(
2780                                     ast::Flag::IgnoreWhitespace
2781                                 ),
2782                             },],
2783                         }),
2784                         ast: Box::new(lit_with('a', span_range(pat, 6..7))),
2785                     }),
2786                     lit_with('a', span_range(pat, 9..10)),
2787                     lit_with(' ', span_range(pat, 10..11)),
2788                 ]
2789             ))
2790         );
2791 
2792         // Test that whitespace after an opening paren is insignificant.
2793         let pat = "(?x)( ?P<foo> a )";
2794         assert_eq!(
2795             parser(pat).parse(),
2796             Ok(concat_with(
2797                 span_range(pat, 0..pat.len()),
2798                 vec![
2799                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2800                     Ast::Group(ast::Group {
2801                         span: span_range(pat, 4..pat.len()),
2802                         kind: ast::GroupKind::CaptureName(ast::CaptureName {
2803                             span: span_range(pat, 9..12),
2804                             name: s("foo"),
2805                             index: 1,
2806                         }),
2807                         ast: Box::new(lit_with('a', span_range(pat, 14..15))),
2808                     }),
2809                 ]
2810             ))
2811         );
2812         let pat = "(?x)(  a )";
2813         assert_eq!(
2814             parser(pat).parse(),
2815             Ok(concat_with(
2816                 span_range(pat, 0..pat.len()),
2817                 vec![
2818                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2819                     Ast::Group(ast::Group {
2820                         span: span_range(pat, 4..pat.len()),
2821                         kind: ast::GroupKind::CaptureIndex(1),
2822                         ast: Box::new(lit_with('a', span_range(pat, 7..8))),
2823                     }),
2824                 ]
2825             ))
2826         );
2827         let pat = "(?x)(  ?:  a )";
2828         assert_eq!(
2829             parser(pat).parse(),
2830             Ok(concat_with(
2831                 span_range(pat, 0..pat.len()),
2832                 vec![
2833                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2834                     Ast::Group(ast::Group {
2835                         span: span_range(pat, 4..pat.len()),
2836                         kind: ast::GroupKind::NonCapturing(ast::Flags {
2837                             span: span_range(pat, 8..8),
2838                             items: vec![],
2839                         }),
2840                         ast: Box::new(lit_with('a', span_range(pat, 11..12))),
2841                     }),
2842                 ]
2843             ))
2844         );
2845         let pat = r"(?x)\x { 53 }";
2846         assert_eq!(
2847             parser(pat).parse(),
2848             Ok(concat_with(
2849                 span_range(pat, 0..pat.len()),
2850                 vec![
2851                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2852                     Ast::Literal(ast::Literal {
2853                         span: span(4..13),
2854                         kind: ast::LiteralKind::HexBrace(
2855                             ast::HexLiteralKind::X
2856                         ),
2857                         c: 'S',
2858                     }),
2859                 ]
2860             ))
2861         );
2862 
2863         // Test that whitespace after an escape is OK.
2864         let pat = r"(?x)\ ";
2865         assert_eq!(
2866             parser(pat).parse(),
2867             Ok(concat_with(
2868                 span_range(pat, 0..pat.len()),
2869                 vec![
2870                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2871                     Ast::Literal(ast::Literal {
2872                         span: span_range(pat, 4..6),
2873                         kind: ast::LiteralKind::Special(
2874                             ast::SpecialLiteralKind::Space
2875                         ),
2876                         c: ' ',
2877                     }),
2878                 ]
2879             ))
2880         );
2881         // ... but only when `x` mode is enabled.
2882         let pat = r"\ ";
2883         assert_eq!(
2884             parser(pat).parse().unwrap_err(),
2885             TestError {
2886                 span: span_range(pat, 0..2),
2887                 kind: ast::ErrorKind::EscapeUnrecognized,
2888             }
2889         );
2890     }
2891 
2892     #[test]
parse_newlines()2893     fn parse_newlines() {
2894         let pat = ".\n.";
2895         assert_eq!(
2896             parser(pat).parse(),
2897             Ok(concat_with(
2898                 span_range(pat, 0..3),
2899                 vec![
2900                     Ast::Dot(span_range(pat, 0..1)),
2901                     lit_with('\n', span_range(pat, 1..2)),
2902                     Ast::Dot(span_range(pat, 2..3)),
2903                 ]
2904             ))
2905         );
2906 
2907         let pat = "foobar\nbaz\nquux\n";
2908         assert_eq!(
2909             parser(pat).parse(),
2910             Ok(concat_with(
2911                 span_range(pat, 0..pat.len()),
2912                 vec![
2913                     lit_with('f', nspan(npos(0, 1, 1), npos(1, 1, 2))),
2914                     lit_with('o', nspan(npos(1, 1, 2), npos(2, 1, 3))),
2915                     lit_with('o', nspan(npos(2, 1, 3), npos(3, 1, 4))),
2916                     lit_with('b', nspan(npos(3, 1, 4), npos(4, 1, 5))),
2917                     lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2918                     lit_with('r', nspan(npos(5, 1, 6), npos(6, 1, 7))),
2919                     lit_with('\n', nspan(npos(6, 1, 7), npos(7, 2, 1))),
2920                     lit_with('b', nspan(npos(7, 2, 1), npos(8, 2, 2))),
2921                     lit_with('a', nspan(npos(8, 2, 2), npos(9, 2, 3))),
2922                     lit_with('z', nspan(npos(9, 2, 3), npos(10, 2, 4))),
2923                     lit_with('\n', nspan(npos(10, 2, 4), npos(11, 3, 1))),
2924                     lit_with('q', nspan(npos(11, 3, 1), npos(12, 3, 2))),
2925                     lit_with('u', nspan(npos(12, 3, 2), npos(13, 3, 3))),
2926                     lit_with('u', nspan(npos(13, 3, 3), npos(14, 3, 4))),
2927                     lit_with('x', nspan(npos(14, 3, 4), npos(15, 3, 5))),
2928                     lit_with('\n', nspan(npos(15, 3, 5), npos(16, 4, 1))),
2929                 ]
2930             ))
2931         );
2932     }
2933 
2934     #[test]
parse_uncounted_repetition()2935     fn parse_uncounted_repetition() {
2936         assert_eq!(
2937             parser(r"a*").parse(),
2938             Ok(Ast::Repetition(ast::Repetition {
2939                 span: span(0..2),
2940                 op: ast::RepetitionOp {
2941                     span: span(1..2),
2942                     kind: ast::RepetitionKind::ZeroOrMore,
2943                 },
2944                 greedy: true,
2945                 ast: Box::new(lit('a', 0)),
2946             }))
2947         );
2948         assert_eq!(
2949             parser(r"a+").parse(),
2950             Ok(Ast::Repetition(ast::Repetition {
2951                 span: span(0..2),
2952                 op: ast::RepetitionOp {
2953                     span: span(1..2),
2954                     kind: ast::RepetitionKind::OneOrMore,
2955                 },
2956                 greedy: true,
2957                 ast: Box::new(lit('a', 0)),
2958             }))
2959         );
2960 
2961         assert_eq!(
2962             parser(r"a?").parse(),
2963             Ok(Ast::Repetition(ast::Repetition {
2964                 span: span(0..2),
2965                 op: ast::RepetitionOp {
2966                     span: span(1..2),
2967                     kind: ast::RepetitionKind::ZeroOrOne,
2968                 },
2969                 greedy: true,
2970                 ast: Box::new(lit('a', 0)),
2971             }))
2972         );
2973         assert_eq!(
2974             parser(r"a??").parse(),
2975             Ok(Ast::Repetition(ast::Repetition {
2976                 span: span(0..3),
2977                 op: ast::RepetitionOp {
2978                     span: span(1..3),
2979                     kind: ast::RepetitionKind::ZeroOrOne,
2980                 },
2981                 greedy: false,
2982                 ast: Box::new(lit('a', 0)),
2983             }))
2984         );
2985         assert_eq!(
2986             parser(r"a?").parse(),
2987             Ok(Ast::Repetition(ast::Repetition {
2988                 span: span(0..2),
2989                 op: ast::RepetitionOp {
2990                     span: span(1..2),
2991                     kind: ast::RepetitionKind::ZeroOrOne,
2992                 },
2993                 greedy: true,
2994                 ast: Box::new(lit('a', 0)),
2995             }))
2996         );
2997         assert_eq!(
2998             parser(r"a?b").parse(),
2999             Ok(concat(
3000                 0..3,
3001                 vec![
3002                     Ast::Repetition(ast::Repetition {
3003                         span: span(0..2),
3004                         op: ast::RepetitionOp {
3005                             span: span(1..2),
3006                             kind: ast::RepetitionKind::ZeroOrOne,
3007                         },
3008                         greedy: true,
3009                         ast: Box::new(lit('a', 0)),
3010                     }),
3011                     lit('b', 2),
3012                 ]
3013             ))
3014         );
3015         assert_eq!(
3016             parser(r"a??b").parse(),
3017             Ok(concat(
3018                 0..4,
3019                 vec![
3020                     Ast::Repetition(ast::Repetition {
3021                         span: span(0..3),
3022                         op: ast::RepetitionOp {
3023                             span: span(1..3),
3024                             kind: ast::RepetitionKind::ZeroOrOne,
3025                         },
3026                         greedy: false,
3027                         ast: Box::new(lit('a', 0)),
3028                     }),
3029                     lit('b', 3),
3030                 ]
3031             ))
3032         );
3033         assert_eq!(
3034             parser(r"ab?").parse(),
3035             Ok(concat(
3036                 0..3,
3037                 vec![
3038                     lit('a', 0),
3039                     Ast::Repetition(ast::Repetition {
3040                         span: span(1..3),
3041                         op: ast::RepetitionOp {
3042                             span: span(2..3),
3043                             kind: ast::RepetitionKind::ZeroOrOne,
3044                         },
3045                         greedy: true,
3046                         ast: Box::new(lit('b', 1)),
3047                     }),
3048                 ]
3049             ))
3050         );
3051         assert_eq!(
3052             parser(r"(ab)?").parse(),
3053             Ok(Ast::Repetition(ast::Repetition {
3054                 span: span(0..5),
3055                 op: ast::RepetitionOp {
3056                     span: span(4..5),
3057                     kind: ast::RepetitionKind::ZeroOrOne,
3058                 },
3059                 greedy: true,
3060                 ast: Box::new(group(
3061                     0..4,
3062                     1,
3063                     concat(1..3, vec![lit('a', 1), lit('b', 2),])
3064                 )),
3065             }))
3066         );
3067         assert_eq!(
3068             parser(r"|a?").parse(),
3069             Ok(alt(
3070                 0..3,
3071                 vec![
3072                     Ast::Empty(span(0..0)),
3073                     Ast::Repetition(ast::Repetition {
3074                         span: span(1..3),
3075                         op: ast::RepetitionOp {
3076                             span: span(2..3),
3077                             kind: ast::RepetitionKind::ZeroOrOne,
3078                         },
3079                         greedy: true,
3080                         ast: Box::new(lit('a', 1)),
3081                     }),
3082                 ]
3083             ))
3084         );
3085 
3086         assert_eq!(
3087             parser(r"*").parse().unwrap_err(),
3088             TestError {
3089                 span: span(0..0),
3090                 kind: ast::ErrorKind::RepetitionMissing,
3091             }
3092         );
3093         assert_eq!(
3094             parser(r"(?i)*").parse().unwrap_err(),
3095             TestError {
3096                 span: span(4..4),
3097                 kind: ast::ErrorKind::RepetitionMissing,
3098             }
3099         );
3100         assert_eq!(
3101             parser(r"(*)").parse().unwrap_err(),
3102             TestError {
3103                 span: span(1..1),
3104                 kind: ast::ErrorKind::RepetitionMissing,
3105             }
3106         );
3107         assert_eq!(
3108             parser(r"(?:?)").parse().unwrap_err(),
3109             TestError {
3110                 span: span(3..3),
3111                 kind: ast::ErrorKind::RepetitionMissing,
3112             }
3113         );
3114         assert_eq!(
3115             parser(r"+").parse().unwrap_err(),
3116             TestError {
3117                 span: span(0..0),
3118                 kind: ast::ErrorKind::RepetitionMissing,
3119             }
3120         );
3121         assert_eq!(
3122             parser(r"?").parse().unwrap_err(),
3123             TestError {
3124                 span: span(0..0),
3125                 kind: ast::ErrorKind::RepetitionMissing,
3126             }
3127         );
3128         assert_eq!(
3129             parser(r"(?)").parse().unwrap_err(),
3130             TestError {
3131                 span: span(1..1),
3132                 kind: ast::ErrorKind::RepetitionMissing,
3133             }
3134         );
3135         assert_eq!(
3136             parser(r"|*").parse().unwrap_err(),
3137             TestError {
3138                 span: span(1..1),
3139                 kind: ast::ErrorKind::RepetitionMissing,
3140             }
3141         );
3142         assert_eq!(
3143             parser(r"|+").parse().unwrap_err(),
3144             TestError {
3145                 span: span(1..1),
3146                 kind: ast::ErrorKind::RepetitionMissing,
3147             }
3148         );
3149         assert_eq!(
3150             parser(r"|?").parse().unwrap_err(),
3151             TestError {
3152                 span: span(1..1),
3153                 kind: ast::ErrorKind::RepetitionMissing,
3154             }
3155         );
3156     }
3157 
3158     #[test]
parse_counted_repetition()3159     fn parse_counted_repetition() {
3160         assert_eq!(
3161             parser(r"a{5}").parse(),
3162             Ok(Ast::Repetition(ast::Repetition {
3163                 span: span(0..4),
3164                 op: ast::RepetitionOp {
3165                     span: span(1..4),
3166                     kind: ast::RepetitionKind::Range(
3167                         ast::RepetitionRange::Exactly(5)
3168                     ),
3169                 },
3170                 greedy: true,
3171                 ast: Box::new(lit('a', 0)),
3172             }))
3173         );
3174         assert_eq!(
3175             parser(r"a{5,}").parse(),
3176             Ok(Ast::Repetition(ast::Repetition {
3177                 span: span(0..5),
3178                 op: ast::RepetitionOp {
3179                     span: span(1..5),
3180                     kind: ast::RepetitionKind::Range(
3181                         ast::RepetitionRange::AtLeast(5)
3182                     ),
3183                 },
3184                 greedy: true,
3185                 ast: Box::new(lit('a', 0)),
3186             }))
3187         );
3188         assert_eq!(
3189             parser(r"a{5,9}").parse(),
3190             Ok(Ast::Repetition(ast::Repetition {
3191                 span: span(0..6),
3192                 op: ast::RepetitionOp {
3193                     span: span(1..6),
3194                     kind: ast::RepetitionKind::Range(
3195                         ast::RepetitionRange::Bounded(5, 9)
3196                     ),
3197                 },
3198                 greedy: true,
3199                 ast: Box::new(lit('a', 0)),
3200             }))
3201         );
3202         assert_eq!(
3203             parser(r"a{5}?").parse(),
3204             Ok(Ast::Repetition(ast::Repetition {
3205                 span: span(0..5),
3206                 op: ast::RepetitionOp {
3207                     span: span(1..5),
3208                     kind: ast::RepetitionKind::Range(
3209                         ast::RepetitionRange::Exactly(5)
3210                     ),
3211                 },
3212                 greedy: false,
3213                 ast: Box::new(lit('a', 0)),
3214             }))
3215         );
3216         assert_eq!(
3217             parser(r"ab{5}").parse(),
3218             Ok(concat(
3219                 0..5,
3220                 vec![
3221                     lit('a', 0),
3222                     Ast::Repetition(ast::Repetition {
3223                         span: span(1..5),
3224                         op: ast::RepetitionOp {
3225                             span: span(2..5),
3226                             kind: ast::RepetitionKind::Range(
3227                                 ast::RepetitionRange::Exactly(5)
3228                             ),
3229                         },
3230                         greedy: true,
3231                         ast: Box::new(lit('b', 1)),
3232                     }),
3233                 ]
3234             ))
3235         );
3236         assert_eq!(
3237             parser(r"ab{5}c").parse(),
3238             Ok(concat(
3239                 0..6,
3240                 vec![
3241                     lit('a', 0),
3242                     Ast::Repetition(ast::Repetition {
3243                         span: span(1..5),
3244                         op: ast::RepetitionOp {
3245                             span: span(2..5),
3246                             kind: ast::RepetitionKind::Range(
3247                                 ast::RepetitionRange::Exactly(5)
3248                             ),
3249                         },
3250                         greedy: true,
3251                         ast: Box::new(lit('b', 1)),
3252                     }),
3253                     lit('c', 5),
3254                 ]
3255             ))
3256         );
3257 
3258         assert_eq!(
3259             parser(r"a{ 5 }").parse(),
3260             Ok(Ast::Repetition(ast::Repetition {
3261                 span: span(0..6),
3262                 op: ast::RepetitionOp {
3263                     span: span(1..6),
3264                     kind: ast::RepetitionKind::Range(
3265                         ast::RepetitionRange::Exactly(5)
3266                     ),
3267                 },
3268                 greedy: true,
3269                 ast: Box::new(lit('a', 0)),
3270             }))
3271         );
3272         assert_eq!(
3273             parser(r"a{ 5 , 9 }").parse(),
3274             Ok(Ast::Repetition(ast::Repetition {
3275                 span: span(0..10),
3276                 op: ast::RepetitionOp {
3277                     span: span(1..10),
3278                     kind: ast::RepetitionKind::Range(
3279                         ast::RepetitionRange::Bounded(5, 9)
3280                     ),
3281                 },
3282                 greedy: true,
3283                 ast: Box::new(lit('a', 0)),
3284             }))
3285         );
3286         assert_eq!(
3287             parser_ignore_whitespace(r"a{5,9} ?").parse(),
3288             Ok(Ast::Repetition(ast::Repetition {
3289                 span: span(0..8),
3290                 op: ast::RepetitionOp {
3291                     span: span(1..8),
3292                     kind: ast::RepetitionKind::Range(
3293                         ast::RepetitionRange::Bounded(5, 9)
3294                     ),
3295                 },
3296                 greedy: false,
3297                 ast: Box::new(lit('a', 0)),
3298             }))
3299         );
3300 
3301         assert_eq!(
3302             parser(r"(?i){0}").parse().unwrap_err(),
3303             TestError {
3304                 span: span(4..4),
3305                 kind: ast::ErrorKind::RepetitionMissing,
3306             }
3307         );
3308         assert_eq!(
3309             parser(r"(?m){1,1}").parse().unwrap_err(),
3310             TestError {
3311                 span: span(4..4),
3312                 kind: ast::ErrorKind::RepetitionMissing,
3313             }
3314         );
3315         assert_eq!(
3316             parser(r"a{]}").parse().unwrap_err(),
3317             TestError {
3318                 span: span(2..2),
3319                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3320             }
3321         );
3322         assert_eq!(
3323             parser(r"a{1,]}").parse().unwrap_err(),
3324             TestError {
3325                 span: span(4..4),
3326                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3327             }
3328         );
3329         assert_eq!(
3330             parser(r"a{").parse().unwrap_err(),
3331             TestError {
3332                 span: span(1..2),
3333                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3334             }
3335         );
3336         assert_eq!(
3337             parser(r"a{}").parse().unwrap_err(),
3338             TestError {
3339                 span: span(2..2),
3340                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3341             }
3342         );
3343         assert_eq!(
3344             parser(r"a{a").parse().unwrap_err(),
3345             TestError {
3346                 span: span(2..2),
3347                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3348             }
3349         );
3350         assert_eq!(
3351             parser(r"a{9999999999}").parse().unwrap_err(),
3352             TestError {
3353                 span: span(2..12),
3354                 kind: ast::ErrorKind::DecimalInvalid,
3355             }
3356         );
3357         assert_eq!(
3358             parser(r"a{9").parse().unwrap_err(),
3359             TestError {
3360                 span: span(1..3),
3361                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3362             }
3363         );
3364         assert_eq!(
3365             parser(r"a{9,a").parse().unwrap_err(),
3366             TestError {
3367                 span: span(4..4),
3368                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3369             }
3370         );
3371         assert_eq!(
3372             parser(r"a{9,9999999999}").parse().unwrap_err(),
3373             TestError {
3374                 span: span(4..14),
3375                 kind: ast::ErrorKind::DecimalInvalid,
3376             }
3377         );
3378         assert_eq!(
3379             parser(r"a{9,").parse().unwrap_err(),
3380             TestError {
3381                 span: span(1..4),
3382                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3383             }
3384         );
3385         assert_eq!(
3386             parser(r"a{9,11").parse().unwrap_err(),
3387             TestError {
3388                 span: span(1..6),
3389                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3390             }
3391         );
3392         assert_eq!(
3393             parser(r"a{2,1}").parse().unwrap_err(),
3394             TestError {
3395                 span: span(1..6),
3396                 kind: ast::ErrorKind::RepetitionCountInvalid,
3397             }
3398         );
3399         assert_eq!(
3400             parser(r"{5}").parse().unwrap_err(),
3401             TestError {
3402                 span: span(0..0),
3403                 kind: ast::ErrorKind::RepetitionMissing,
3404             }
3405         );
3406         assert_eq!(
3407             parser(r"|{5}").parse().unwrap_err(),
3408             TestError {
3409                 span: span(1..1),
3410                 kind: ast::ErrorKind::RepetitionMissing,
3411             }
3412         );
3413     }
3414 
3415     #[test]
parse_alternate()3416     fn parse_alternate() {
3417         assert_eq!(
3418             parser(r"a|b").parse(),
3419             Ok(Ast::Alternation(ast::Alternation {
3420                 span: span(0..3),
3421                 asts: vec![lit('a', 0), lit('b', 2)],
3422             }))
3423         );
3424         assert_eq!(
3425             parser(r"(a|b)").parse(),
3426             Ok(group(
3427                 0..5,
3428                 1,
3429                 Ast::Alternation(ast::Alternation {
3430                     span: span(1..4),
3431                     asts: vec![lit('a', 1), lit('b', 3)],
3432                 })
3433             ))
3434         );
3435 
3436         assert_eq!(
3437             parser(r"a|b|c").parse(),
3438             Ok(Ast::Alternation(ast::Alternation {
3439                 span: span(0..5),
3440                 asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
3441             }))
3442         );
3443         assert_eq!(
3444             parser(r"ax|by|cz").parse(),
3445             Ok(Ast::Alternation(ast::Alternation {
3446                 span: span(0..8),
3447                 asts: vec![
3448                     concat(0..2, vec![lit('a', 0), lit('x', 1)]),
3449                     concat(3..5, vec![lit('b', 3), lit('y', 4)]),
3450                     concat(6..8, vec![lit('c', 6), lit('z', 7)]),
3451                 ],
3452             }))
3453         );
3454         assert_eq!(
3455             parser(r"(ax|by|cz)").parse(),
3456             Ok(group(
3457                 0..10,
3458                 1,
3459                 Ast::Alternation(ast::Alternation {
3460                     span: span(1..9),
3461                     asts: vec![
3462                         concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3463                         concat(4..6, vec![lit('b', 4), lit('y', 5)]),
3464                         concat(7..9, vec![lit('c', 7), lit('z', 8)]),
3465                     ],
3466                 })
3467             ))
3468         );
3469         assert_eq!(
3470             parser(r"(ax|(by|(cz)))").parse(),
3471             Ok(group(
3472                 0..14,
3473                 1,
3474                 alt(
3475                     1..13,
3476                     vec![
3477                         concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3478                         group(
3479                             4..13,
3480                             2,
3481                             alt(
3482                                 5..12,
3483                                 vec![
3484                                     concat(
3485                                         5..7,
3486                                         vec![lit('b', 5), lit('y', 6)]
3487                                     ),
3488                                     group(
3489                                         8..12,
3490                                         3,
3491                                         concat(
3492                                             9..11,
3493                                             vec![lit('c', 9), lit('z', 10),]
3494                                         )
3495                                     ),
3496                                 ]
3497                             )
3498                         ),
3499                     ]
3500                 )
3501             ))
3502         );
3503 
3504         assert_eq!(
3505             parser(r"|").parse(),
3506             Ok(alt(
3507                 0..1,
3508                 vec![Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),]
3509             ))
3510         );
3511         assert_eq!(
3512             parser(r"||").parse(),
3513             Ok(alt(
3514                 0..2,
3515                 vec![
3516                     Ast::Empty(span(0..0)),
3517                     Ast::Empty(span(1..1)),
3518                     Ast::Empty(span(2..2)),
3519                 ]
3520             ))
3521         );
3522         assert_eq!(
3523             parser(r"a|").parse(),
3524             Ok(alt(0..2, vec![lit('a', 0), Ast::Empty(span(2..2)),]))
3525         );
3526         assert_eq!(
3527             parser(r"|a").parse(),
3528             Ok(alt(0..2, vec![Ast::Empty(span(0..0)), lit('a', 1),]))
3529         );
3530 
3531         assert_eq!(
3532             parser(r"(|)").parse(),
3533             Ok(group(
3534                 0..3,
3535                 1,
3536                 alt(
3537                     1..2,
3538                     vec![Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),]
3539                 )
3540             ))
3541         );
3542         assert_eq!(
3543             parser(r"(a|)").parse(),
3544             Ok(group(
3545                 0..4,
3546                 1,
3547                 alt(1..3, vec![lit('a', 1), Ast::Empty(span(3..3)),])
3548             ))
3549         );
3550         assert_eq!(
3551             parser(r"(|a)").parse(),
3552             Ok(group(
3553                 0..4,
3554                 1,
3555                 alt(1..3, vec![Ast::Empty(span(1..1)), lit('a', 2),])
3556             ))
3557         );
3558 
3559         assert_eq!(
3560             parser(r"a|b)").parse().unwrap_err(),
3561             TestError {
3562                 span: span(3..4),
3563                 kind: ast::ErrorKind::GroupUnopened,
3564             }
3565         );
3566         assert_eq!(
3567             parser(r"(a|b").parse().unwrap_err(),
3568             TestError {
3569                 span: span(0..1),
3570                 kind: ast::ErrorKind::GroupUnclosed,
3571             }
3572         );
3573     }
3574 
3575     #[test]
parse_unsupported_lookaround()3576     fn parse_unsupported_lookaround() {
3577         assert_eq!(
3578             parser(r"(?=a)").parse().unwrap_err(),
3579             TestError {
3580                 span: span(0..3),
3581                 kind: ast::ErrorKind::UnsupportedLookAround,
3582             }
3583         );
3584         assert_eq!(
3585             parser(r"(?!a)").parse().unwrap_err(),
3586             TestError {
3587                 span: span(0..3),
3588                 kind: ast::ErrorKind::UnsupportedLookAround,
3589             }
3590         );
3591         assert_eq!(
3592             parser(r"(?<=a)").parse().unwrap_err(),
3593             TestError {
3594                 span: span(0..4),
3595                 kind: ast::ErrorKind::UnsupportedLookAround,
3596             }
3597         );
3598         assert_eq!(
3599             parser(r"(?<!a)").parse().unwrap_err(),
3600             TestError {
3601                 span: span(0..4),
3602                 kind: ast::ErrorKind::UnsupportedLookAround,
3603             }
3604         );
3605     }
3606 
3607     #[test]
parse_group()3608     fn parse_group() {
3609         assert_eq!(
3610             parser("(?i)").parse(),
3611             Ok(Ast::Flags(ast::SetFlags {
3612                 span: span(0..4),
3613                 flags: ast::Flags {
3614                     span: span(2..3),
3615                     items: vec![ast::FlagsItem {
3616                         span: span(2..3),
3617                         kind: ast::FlagsItemKind::Flag(
3618                             ast::Flag::CaseInsensitive
3619                         ),
3620                     }],
3621                 },
3622             }))
3623         );
3624         assert_eq!(
3625             parser("(?iU)").parse(),
3626             Ok(Ast::Flags(ast::SetFlags {
3627                 span: span(0..5),
3628                 flags: ast::Flags {
3629                     span: span(2..4),
3630                     items: vec![
3631                         ast::FlagsItem {
3632                             span: span(2..3),
3633                             kind: ast::FlagsItemKind::Flag(
3634                                 ast::Flag::CaseInsensitive
3635                             ),
3636                         },
3637                         ast::FlagsItem {
3638                             span: span(3..4),
3639                             kind: ast::FlagsItemKind::Flag(
3640                                 ast::Flag::SwapGreed
3641                             ),
3642                         },
3643                     ],
3644                 },
3645             }))
3646         );
3647         assert_eq!(
3648             parser("(?i-U)").parse(),
3649             Ok(Ast::Flags(ast::SetFlags {
3650                 span: span(0..6),
3651                 flags: ast::Flags {
3652                     span: span(2..5),
3653                     items: vec![
3654                         ast::FlagsItem {
3655                             span: span(2..3),
3656                             kind: ast::FlagsItemKind::Flag(
3657                                 ast::Flag::CaseInsensitive
3658                             ),
3659                         },
3660                         ast::FlagsItem {
3661                             span: span(3..4),
3662                             kind: ast::FlagsItemKind::Negation,
3663                         },
3664                         ast::FlagsItem {
3665                             span: span(4..5),
3666                             kind: ast::FlagsItemKind::Flag(
3667                                 ast::Flag::SwapGreed
3668                             ),
3669                         },
3670                     ],
3671                 },
3672             }))
3673         );
3674 
3675         assert_eq!(
3676             parser("()").parse(),
3677             Ok(Ast::Group(ast::Group {
3678                 span: span(0..2),
3679                 kind: ast::GroupKind::CaptureIndex(1),
3680                 ast: Box::new(Ast::Empty(span(1..1))),
3681             }))
3682         );
3683         assert_eq!(
3684             parser("(a)").parse(),
3685             Ok(Ast::Group(ast::Group {
3686                 span: span(0..3),
3687                 kind: ast::GroupKind::CaptureIndex(1),
3688                 ast: Box::new(lit('a', 1)),
3689             }))
3690         );
3691         assert_eq!(
3692             parser("(())").parse(),
3693             Ok(Ast::Group(ast::Group {
3694                 span: span(0..4),
3695                 kind: ast::GroupKind::CaptureIndex(1),
3696                 ast: Box::new(Ast::Group(ast::Group {
3697                     span: span(1..3),
3698                     kind: ast::GroupKind::CaptureIndex(2),
3699                     ast: Box::new(Ast::Empty(span(2..2))),
3700                 })),
3701             }))
3702         );
3703 
3704         assert_eq!(
3705             parser("(?:a)").parse(),
3706             Ok(Ast::Group(ast::Group {
3707                 span: span(0..5),
3708                 kind: ast::GroupKind::NonCapturing(ast::Flags {
3709                     span: span(2..2),
3710                     items: vec![],
3711                 }),
3712                 ast: Box::new(lit('a', 3)),
3713             }))
3714         );
3715 
3716         assert_eq!(
3717             parser("(?i:a)").parse(),
3718             Ok(Ast::Group(ast::Group {
3719                 span: span(0..6),
3720                 kind: ast::GroupKind::NonCapturing(ast::Flags {
3721                     span: span(2..3),
3722                     items: vec![ast::FlagsItem {
3723                         span: span(2..3),
3724                         kind: ast::FlagsItemKind::Flag(
3725                             ast::Flag::CaseInsensitive
3726                         ),
3727                     },],
3728                 }),
3729                 ast: Box::new(lit('a', 4)),
3730             }))
3731         );
3732         assert_eq!(
3733             parser("(?i-U:a)").parse(),
3734             Ok(Ast::Group(ast::Group {
3735                 span: span(0..8),
3736                 kind: ast::GroupKind::NonCapturing(ast::Flags {
3737                     span: span(2..5),
3738                     items: vec![
3739                         ast::FlagsItem {
3740                             span: span(2..3),
3741                             kind: ast::FlagsItemKind::Flag(
3742                                 ast::Flag::CaseInsensitive
3743                             ),
3744                         },
3745                         ast::FlagsItem {
3746                             span: span(3..4),
3747                             kind: ast::FlagsItemKind::Negation,
3748                         },
3749                         ast::FlagsItem {
3750                             span: span(4..5),
3751                             kind: ast::FlagsItemKind::Flag(
3752                                 ast::Flag::SwapGreed
3753                             ),
3754                         },
3755                     ],
3756                 }),
3757                 ast: Box::new(lit('a', 6)),
3758             }))
3759         );
3760 
3761         assert_eq!(
3762             parser("(").parse().unwrap_err(),
3763             TestError {
3764                 span: span(0..1),
3765                 kind: ast::ErrorKind::GroupUnclosed,
3766             }
3767         );
3768         assert_eq!(
3769             parser("(?").parse().unwrap_err(),
3770             TestError {
3771                 span: span(0..1),
3772                 kind: ast::ErrorKind::GroupUnclosed,
3773             }
3774         );
3775         assert_eq!(
3776             parser("(?P").parse().unwrap_err(),
3777             TestError {
3778                 span: span(2..3),
3779                 kind: ast::ErrorKind::FlagUnrecognized,
3780             }
3781         );
3782         assert_eq!(
3783             parser("(?P<").parse().unwrap_err(),
3784             TestError {
3785                 span: span(4..4),
3786                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3787             }
3788         );
3789         assert_eq!(
3790             parser("(a").parse().unwrap_err(),
3791             TestError {
3792                 span: span(0..1),
3793                 kind: ast::ErrorKind::GroupUnclosed,
3794             }
3795         );
3796         assert_eq!(
3797             parser("(()").parse().unwrap_err(),
3798             TestError {
3799                 span: span(0..1),
3800                 kind: ast::ErrorKind::GroupUnclosed,
3801             }
3802         );
3803         assert_eq!(
3804             parser(")").parse().unwrap_err(),
3805             TestError {
3806                 span: span(0..1),
3807                 kind: ast::ErrorKind::GroupUnopened,
3808             }
3809         );
3810         assert_eq!(
3811             parser("a)").parse().unwrap_err(),
3812             TestError {
3813                 span: span(1..2),
3814                 kind: ast::ErrorKind::GroupUnopened,
3815             }
3816         );
3817     }
3818 
3819     #[test]
parse_capture_name()3820     fn parse_capture_name() {
3821         assert_eq!(
3822             parser("(?P<a>z)").parse(),
3823             Ok(Ast::Group(ast::Group {
3824                 span: span(0..8),
3825                 kind: ast::GroupKind::CaptureName(ast::CaptureName {
3826                     span: span(4..5),
3827                     name: s("a"),
3828                     index: 1,
3829                 }),
3830                 ast: Box::new(lit('z', 6)),
3831             }))
3832         );
3833         assert_eq!(
3834             parser("(?P<abc>z)").parse(),
3835             Ok(Ast::Group(ast::Group {
3836                 span: span(0..10),
3837                 kind: ast::GroupKind::CaptureName(ast::CaptureName {
3838                     span: span(4..7),
3839                     name: s("abc"),
3840                     index: 1,
3841                 }),
3842                 ast: Box::new(lit('z', 8)),
3843             }))
3844         );
3845 
3846         assert_eq!(
3847             parser("(?P<a_1>z)").parse(),
3848             Ok(Ast::Group(ast::Group {
3849                 span: span(0..10),
3850                 kind: ast::GroupKind::CaptureName(ast::CaptureName {
3851                     span: span(4..7),
3852                     name: s("a_1"),
3853                     index: 1,
3854                 }),
3855                 ast: Box::new(lit('z', 8)),
3856             }))
3857         );
3858 
3859         assert_eq!(
3860             parser("(?P<a.1>z)").parse(),
3861             Ok(Ast::Group(ast::Group {
3862                 span: span(0..10),
3863                 kind: ast::GroupKind::CaptureName(ast::CaptureName {
3864                     span: span(4..7),
3865                     name: s("a.1"),
3866                     index: 1,
3867                 }),
3868                 ast: Box::new(lit('z', 8)),
3869             }))
3870         );
3871 
3872         assert_eq!(
3873             parser("(?P<a[1]>z)").parse(),
3874             Ok(Ast::Group(ast::Group {
3875                 span: span(0..11),
3876                 kind: ast::GroupKind::CaptureName(ast::CaptureName {
3877                     span: span(4..8),
3878                     name: s("a[1]"),
3879                     index: 1,
3880                 }),
3881                 ast: Box::new(lit('z', 9)),
3882             }))
3883         );
3884 
3885         assert_eq!(
3886             parser("(?P<").parse().unwrap_err(),
3887             TestError {
3888                 span: span(4..4),
3889                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3890             }
3891         );
3892         assert_eq!(
3893             parser("(?P<>z)").parse().unwrap_err(),
3894             TestError {
3895                 span: span(4..4),
3896                 kind: ast::ErrorKind::GroupNameEmpty,
3897             }
3898         );
3899         assert_eq!(
3900             parser("(?P<a").parse().unwrap_err(),
3901             TestError {
3902                 span: span(5..5),
3903                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3904             }
3905         );
3906         assert_eq!(
3907             parser("(?P<ab").parse().unwrap_err(),
3908             TestError {
3909                 span: span(6..6),
3910                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3911             }
3912         );
3913         assert_eq!(
3914             parser("(?P<0a").parse().unwrap_err(),
3915             TestError {
3916                 span: span(4..5),
3917                 kind: ast::ErrorKind::GroupNameInvalid,
3918             }
3919         );
3920         assert_eq!(
3921             parser("(?P<~").parse().unwrap_err(),
3922             TestError {
3923                 span: span(4..5),
3924                 kind: ast::ErrorKind::GroupNameInvalid,
3925             }
3926         );
3927         assert_eq!(
3928             parser("(?P<abc~").parse().unwrap_err(),
3929             TestError {
3930                 span: span(7..8),
3931                 kind: ast::ErrorKind::GroupNameInvalid,
3932             }
3933         );
3934         assert_eq!(
3935             parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
3936             TestError {
3937                 span: span(12..13),
3938                 kind: ast::ErrorKind::GroupNameDuplicate {
3939                     original: span(4..5),
3940                 },
3941             }
3942         );
3943     }
3944 
3945     #[test]
parse_flags()3946     fn parse_flags() {
3947         assert_eq!(
3948             parser("i:").parse_flags(),
3949             Ok(ast::Flags {
3950                 span: span(0..1),
3951                 items: vec![ast::FlagsItem {
3952                     span: span(0..1),
3953                     kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3954                 }],
3955             })
3956         );
3957         assert_eq!(
3958             parser("i)").parse_flags(),
3959             Ok(ast::Flags {
3960                 span: span(0..1),
3961                 items: vec![ast::FlagsItem {
3962                     span: span(0..1),
3963                     kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3964                 }],
3965             })
3966         );
3967 
3968         assert_eq!(
3969             parser("isU:").parse_flags(),
3970             Ok(ast::Flags {
3971                 span: span(0..3),
3972                 items: vec![
3973                     ast::FlagsItem {
3974                         span: span(0..1),
3975                         kind: ast::FlagsItemKind::Flag(
3976                             ast::Flag::CaseInsensitive
3977                         ),
3978                     },
3979                     ast::FlagsItem {
3980                         span: span(1..2),
3981                         kind: ast::FlagsItemKind::Flag(
3982                             ast::Flag::DotMatchesNewLine
3983                         ),
3984                     },
3985                     ast::FlagsItem {
3986                         span: span(2..3),
3987                         kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3988                     },
3989                 ],
3990             })
3991         );
3992 
3993         assert_eq!(
3994             parser("-isU:").parse_flags(),
3995             Ok(ast::Flags {
3996                 span: span(0..4),
3997                 items: vec![
3998                     ast::FlagsItem {
3999                         span: span(0..1),
4000                         kind: ast::FlagsItemKind::Negation,
4001                     },
4002                     ast::FlagsItem {
4003                         span: span(1..2),
4004                         kind: ast::FlagsItemKind::Flag(
4005                             ast::Flag::CaseInsensitive
4006                         ),
4007                     },
4008                     ast::FlagsItem {
4009                         span: span(2..3),
4010                         kind: ast::FlagsItemKind::Flag(
4011                             ast::Flag::DotMatchesNewLine
4012                         ),
4013                     },
4014                     ast::FlagsItem {
4015                         span: span(3..4),
4016                         kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4017                     },
4018                 ],
4019             })
4020         );
4021         assert_eq!(
4022             parser("i-sU:").parse_flags(),
4023             Ok(ast::Flags {
4024                 span: span(0..4),
4025                 items: vec![
4026                     ast::FlagsItem {
4027                         span: span(0..1),
4028                         kind: ast::FlagsItemKind::Flag(
4029                             ast::Flag::CaseInsensitive
4030                         ),
4031                     },
4032                     ast::FlagsItem {
4033                         span: span(1..2),
4034                         kind: ast::FlagsItemKind::Negation,
4035                     },
4036                     ast::FlagsItem {
4037                         span: span(2..3),
4038                         kind: ast::FlagsItemKind::Flag(
4039                             ast::Flag::DotMatchesNewLine
4040                         ),
4041                     },
4042                     ast::FlagsItem {
4043                         span: span(3..4),
4044                         kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4045                     },
4046                 ],
4047             })
4048         );
4049 
4050         assert_eq!(
4051             parser("isU").parse_flags().unwrap_err(),
4052             TestError {
4053                 span: span(3..3),
4054                 kind: ast::ErrorKind::FlagUnexpectedEof,
4055             }
4056         );
4057         assert_eq!(
4058             parser("isUa:").parse_flags().unwrap_err(),
4059             TestError {
4060                 span: span(3..4),
4061                 kind: ast::ErrorKind::FlagUnrecognized,
4062             }
4063         );
4064         assert_eq!(
4065             parser("isUi:").parse_flags().unwrap_err(),
4066             TestError {
4067                 span: span(3..4),
4068                 kind: ast::ErrorKind::FlagDuplicate { original: span(0..1) },
4069             }
4070         );
4071         assert_eq!(
4072             parser("i-sU-i:").parse_flags().unwrap_err(),
4073             TestError {
4074                 span: span(4..5),
4075                 kind: ast::ErrorKind::FlagRepeatedNegation {
4076                     original: span(1..2),
4077                 },
4078             }
4079         );
4080         assert_eq!(
4081             parser("-)").parse_flags().unwrap_err(),
4082             TestError {
4083                 span: span(0..1),
4084                 kind: ast::ErrorKind::FlagDanglingNegation,
4085             }
4086         );
4087         assert_eq!(
4088             parser("i-)").parse_flags().unwrap_err(),
4089             TestError {
4090                 span: span(1..2),
4091                 kind: ast::ErrorKind::FlagDanglingNegation,
4092             }
4093         );
4094         assert_eq!(
4095             parser("iU-)").parse_flags().unwrap_err(),
4096             TestError {
4097                 span: span(2..3),
4098                 kind: ast::ErrorKind::FlagDanglingNegation,
4099             }
4100         );
4101     }
4102 
4103     #[test]
parse_flag()4104     fn parse_flag() {
4105         assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
4106         assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
4107         assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
4108         assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
4109         assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
4110         assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
4111 
4112         assert_eq!(
4113             parser("a").parse_flag().unwrap_err(),
4114             TestError {
4115                 span: span(0..1),
4116                 kind: ast::ErrorKind::FlagUnrecognized,
4117             }
4118         );
4119         assert_eq!(
4120             parser("☃").parse_flag().unwrap_err(),
4121             TestError {
4122                 span: span_range("☃", 0..3),
4123                 kind: ast::ErrorKind::FlagUnrecognized,
4124             }
4125         );
4126     }
4127 
4128     #[test]
parse_primitive_non_escape()4129     fn parse_primitive_non_escape() {
4130         assert_eq!(
4131             parser(r".").parse_primitive(),
4132             Ok(Primitive::Dot(span(0..1)))
4133         );
4134         assert_eq!(
4135             parser(r"^").parse_primitive(),
4136             Ok(Primitive::Assertion(ast::Assertion {
4137                 span: span(0..1),
4138                 kind: ast::AssertionKind::StartLine,
4139             }))
4140         );
4141         assert_eq!(
4142             parser(r"$").parse_primitive(),
4143             Ok(Primitive::Assertion(ast::Assertion {
4144                 span: span(0..1),
4145                 kind: ast::AssertionKind::EndLine,
4146             }))
4147         );
4148 
4149         assert_eq!(
4150             parser(r"a").parse_primitive(),
4151             Ok(Primitive::Literal(ast::Literal {
4152                 span: span(0..1),
4153                 kind: ast::LiteralKind::Verbatim,
4154                 c: 'a',
4155             }))
4156         );
4157         assert_eq!(
4158             parser(r"|").parse_primitive(),
4159             Ok(Primitive::Literal(ast::Literal {
4160                 span: span(0..1),
4161                 kind: ast::LiteralKind::Verbatim,
4162                 c: '|',
4163             }))
4164         );
4165         assert_eq!(
4166             parser(r"☃").parse_primitive(),
4167             Ok(Primitive::Literal(ast::Literal {
4168                 span: span_range("☃", 0..3),
4169                 kind: ast::LiteralKind::Verbatim,
4170                 c: '☃',
4171             }))
4172         );
4173     }
4174 
4175     #[test]
parse_escape()4176     fn parse_escape() {
4177         assert_eq!(
4178             parser(r"\|").parse_primitive(),
4179             Ok(Primitive::Literal(ast::Literal {
4180                 span: span(0..2),
4181                 kind: ast::LiteralKind::Punctuation,
4182                 c: '|',
4183             }))
4184         );
4185         let specials = &[
4186             (r"\a", '\x07', ast::SpecialLiteralKind::Bell),
4187             (r"\f", '\x0C', ast::SpecialLiteralKind::FormFeed),
4188             (r"\t", '\t', ast::SpecialLiteralKind::Tab),
4189             (r"\n", '\n', ast::SpecialLiteralKind::LineFeed),
4190             (r"\r", '\r', ast::SpecialLiteralKind::CarriageReturn),
4191             (r"\v", '\x0B', ast::SpecialLiteralKind::VerticalTab),
4192         ];
4193         for &(pat, c, ref kind) in specials {
4194             assert_eq!(
4195                 parser(pat).parse_primitive(),
4196                 Ok(Primitive::Literal(ast::Literal {
4197                     span: span(0..2),
4198                     kind: ast::LiteralKind::Special(kind.clone()),
4199                     c,
4200                 }))
4201             );
4202         }
4203         assert_eq!(
4204             parser(r"\A").parse_primitive(),
4205             Ok(Primitive::Assertion(ast::Assertion {
4206                 span: span(0..2),
4207                 kind: ast::AssertionKind::StartText,
4208             }))
4209         );
4210         assert_eq!(
4211             parser(r"\z").parse_primitive(),
4212             Ok(Primitive::Assertion(ast::Assertion {
4213                 span: span(0..2),
4214                 kind: ast::AssertionKind::EndText,
4215             }))
4216         );
4217         assert_eq!(
4218             parser(r"\b").parse_primitive(),
4219             Ok(Primitive::Assertion(ast::Assertion {
4220                 span: span(0..2),
4221                 kind: ast::AssertionKind::WordBoundary,
4222             }))
4223         );
4224         assert_eq!(
4225             parser(r"\B").parse_primitive(),
4226             Ok(Primitive::Assertion(ast::Assertion {
4227                 span: span(0..2),
4228                 kind: ast::AssertionKind::NotWordBoundary,
4229             }))
4230         );
4231 
4232         assert_eq!(
4233             parser(r"\").parse_escape().unwrap_err(),
4234             TestError {
4235                 span: span(0..1),
4236                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4237             }
4238         );
4239         assert_eq!(
4240             parser(r"\y").parse_escape().unwrap_err(),
4241             TestError {
4242                 span: span(0..2),
4243                 kind: ast::ErrorKind::EscapeUnrecognized,
4244             }
4245         );
4246     }
4247 
4248     #[test]
parse_unsupported_backreference()4249     fn parse_unsupported_backreference() {
4250         assert_eq!(
4251             parser(r"\0").parse_escape().unwrap_err(),
4252             TestError {
4253                 span: span(0..2),
4254                 kind: ast::ErrorKind::UnsupportedBackreference,
4255             }
4256         );
4257         assert_eq!(
4258             parser(r"\9").parse_escape().unwrap_err(),
4259             TestError {
4260                 span: span(0..2),
4261                 kind: ast::ErrorKind::UnsupportedBackreference,
4262             }
4263         );
4264     }
4265 
4266     #[test]
parse_octal()4267     fn parse_octal() {
4268         for i in 0..511 {
4269             let pat = format!(r"\{:o}", i);
4270             assert_eq!(
4271                 parser_octal(&pat).parse_escape(),
4272                 Ok(Primitive::Literal(ast::Literal {
4273                     span: span(0..pat.len()),
4274                     kind: ast::LiteralKind::Octal,
4275                     c: ::std::char::from_u32(i).unwrap(),
4276                 }))
4277             );
4278         }
4279         assert_eq!(
4280             parser_octal(r"\778").parse_escape(),
4281             Ok(Primitive::Literal(ast::Literal {
4282                 span: span(0..3),
4283                 kind: ast::LiteralKind::Octal,
4284                 c: '?',
4285             }))
4286         );
4287         assert_eq!(
4288             parser_octal(r"\7777").parse_escape(),
4289             Ok(Primitive::Literal(ast::Literal {
4290                 span: span(0..4),
4291                 kind: ast::LiteralKind::Octal,
4292                 c: '\u{01FF}',
4293             }))
4294         );
4295         assert_eq!(
4296             parser_octal(r"\778").parse(),
4297             Ok(Ast::Concat(ast::Concat {
4298                 span: span(0..4),
4299                 asts: vec![
4300                     Ast::Literal(ast::Literal {
4301                         span: span(0..3),
4302                         kind: ast::LiteralKind::Octal,
4303                         c: '?',
4304                     }),
4305                     Ast::Literal(ast::Literal {
4306                         span: span(3..4),
4307                         kind: ast::LiteralKind::Verbatim,
4308                         c: '8',
4309                     }),
4310                 ],
4311             }))
4312         );
4313         assert_eq!(
4314             parser_octal(r"\7777").parse(),
4315             Ok(Ast::Concat(ast::Concat {
4316                 span: span(0..5),
4317                 asts: vec![
4318                     Ast::Literal(ast::Literal {
4319                         span: span(0..4),
4320                         kind: ast::LiteralKind::Octal,
4321                         c: '\u{01FF}',
4322                     }),
4323                     Ast::Literal(ast::Literal {
4324                         span: span(4..5),
4325                         kind: ast::LiteralKind::Verbatim,
4326                         c: '7',
4327                     }),
4328                 ],
4329             }))
4330         );
4331 
4332         assert_eq!(
4333             parser_octal(r"\8").parse_escape().unwrap_err(),
4334             TestError {
4335                 span: span(0..2),
4336                 kind: ast::ErrorKind::EscapeUnrecognized,
4337             }
4338         );
4339     }
4340 
4341     #[test]
parse_hex_two()4342     fn parse_hex_two() {
4343         for i in 0..256 {
4344             let pat = format!(r"\x{:02x}", i);
4345             assert_eq!(
4346                 parser(&pat).parse_escape(),
4347                 Ok(Primitive::Literal(ast::Literal {
4348                     span: span(0..pat.len()),
4349                     kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
4350                     c: ::std::char::from_u32(i).unwrap(),
4351                 }))
4352             );
4353         }
4354 
4355         assert_eq!(
4356             parser(r"\xF").parse_escape().unwrap_err(),
4357             TestError {
4358                 span: span(3..3),
4359                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4360             }
4361         );
4362         assert_eq!(
4363             parser(r"\xG").parse_escape().unwrap_err(),
4364             TestError {
4365                 span: span(2..3),
4366                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4367             }
4368         );
4369         assert_eq!(
4370             parser(r"\xFG").parse_escape().unwrap_err(),
4371             TestError {
4372                 span: span(3..4),
4373                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4374             }
4375         );
4376     }
4377 
4378     #[test]
parse_hex_four()4379     fn parse_hex_four() {
4380         for i in 0..65536 {
4381             let c = match ::std::char::from_u32(i) {
4382                 None => continue,
4383                 Some(c) => c,
4384             };
4385             let pat = format!(r"\u{:04x}", i);
4386             assert_eq!(
4387                 parser(&pat).parse_escape(),
4388                 Ok(Primitive::Literal(ast::Literal {
4389                     span: span(0..pat.len()),
4390                     kind: ast::LiteralKind::HexFixed(
4391                         ast::HexLiteralKind::UnicodeShort
4392                     ),
4393                     c,
4394                 }))
4395             );
4396         }
4397 
4398         assert_eq!(
4399             parser(r"\uF").parse_escape().unwrap_err(),
4400             TestError {
4401                 span: span(3..3),
4402                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4403             }
4404         );
4405         assert_eq!(
4406             parser(r"\uG").parse_escape().unwrap_err(),
4407             TestError {
4408                 span: span(2..3),
4409                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4410             }
4411         );
4412         assert_eq!(
4413             parser(r"\uFG").parse_escape().unwrap_err(),
4414             TestError {
4415                 span: span(3..4),
4416                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4417             }
4418         );
4419         assert_eq!(
4420             parser(r"\uFFG").parse_escape().unwrap_err(),
4421             TestError {
4422                 span: span(4..5),
4423                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4424             }
4425         );
4426         assert_eq!(
4427             parser(r"\uFFFG").parse_escape().unwrap_err(),
4428             TestError {
4429                 span: span(5..6),
4430                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4431             }
4432         );
4433         assert_eq!(
4434             parser(r"\uD800").parse_escape().unwrap_err(),
4435             TestError {
4436                 span: span(2..6),
4437                 kind: ast::ErrorKind::EscapeHexInvalid,
4438             }
4439         );
4440     }
4441 
4442     #[test]
parse_hex_eight()4443     fn parse_hex_eight() {
4444         for i in 0..65536 {
4445             let c = match ::std::char::from_u32(i) {
4446                 None => continue,
4447                 Some(c) => c,
4448             };
4449             let pat = format!(r"\U{:08x}", i);
4450             assert_eq!(
4451                 parser(&pat).parse_escape(),
4452                 Ok(Primitive::Literal(ast::Literal {
4453                     span: span(0..pat.len()),
4454                     kind: ast::LiteralKind::HexFixed(
4455                         ast::HexLiteralKind::UnicodeLong
4456                     ),
4457                     c,
4458                 }))
4459             );
4460         }
4461 
4462         assert_eq!(
4463             parser(r"\UF").parse_escape().unwrap_err(),
4464             TestError {
4465                 span: span(3..3),
4466                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4467             }
4468         );
4469         assert_eq!(
4470             parser(r"\UG").parse_escape().unwrap_err(),
4471             TestError {
4472                 span: span(2..3),
4473                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4474             }
4475         );
4476         assert_eq!(
4477             parser(r"\UFG").parse_escape().unwrap_err(),
4478             TestError {
4479                 span: span(3..4),
4480                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4481             }
4482         );
4483         assert_eq!(
4484             parser(r"\UFFG").parse_escape().unwrap_err(),
4485             TestError {
4486                 span: span(4..5),
4487                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4488             }
4489         );
4490         assert_eq!(
4491             parser(r"\UFFFG").parse_escape().unwrap_err(),
4492             TestError {
4493                 span: span(5..6),
4494                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4495             }
4496         );
4497         assert_eq!(
4498             parser(r"\UFFFFG").parse_escape().unwrap_err(),
4499             TestError {
4500                 span: span(6..7),
4501                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4502             }
4503         );
4504         assert_eq!(
4505             parser(r"\UFFFFFG").parse_escape().unwrap_err(),
4506             TestError {
4507                 span: span(7..8),
4508                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4509             }
4510         );
4511         assert_eq!(
4512             parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
4513             TestError {
4514                 span: span(8..9),
4515                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4516             }
4517         );
4518         assert_eq!(
4519             parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
4520             TestError {
4521                 span: span(9..10),
4522                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4523             }
4524         );
4525     }
4526 
4527     #[test]
parse_hex_brace()4528     fn parse_hex_brace() {
4529         assert_eq!(
4530             parser(r"\u{26c4}").parse_escape(),
4531             Ok(Primitive::Literal(ast::Literal {
4532                 span: span(0..8),
4533                 kind: ast::LiteralKind::HexBrace(
4534                     ast::HexLiteralKind::UnicodeShort
4535                 ),
4536                 c: '⛄',
4537             }))
4538         );
4539         assert_eq!(
4540             parser(r"\U{26c4}").parse_escape(),
4541             Ok(Primitive::Literal(ast::Literal {
4542                 span: span(0..8),
4543                 kind: ast::LiteralKind::HexBrace(
4544                     ast::HexLiteralKind::UnicodeLong
4545                 ),
4546                 c: '⛄',
4547             }))
4548         );
4549         assert_eq!(
4550             parser(r"\x{26c4}").parse_escape(),
4551             Ok(Primitive::Literal(ast::Literal {
4552                 span: span(0..8),
4553                 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4554                 c: '⛄',
4555             }))
4556         );
4557         assert_eq!(
4558             parser(r"\x{26C4}").parse_escape(),
4559             Ok(Primitive::Literal(ast::Literal {
4560                 span: span(0..8),
4561                 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4562                 c: '⛄',
4563             }))
4564         );
4565         assert_eq!(
4566             parser(r"\x{10fFfF}").parse_escape(),
4567             Ok(Primitive::Literal(ast::Literal {
4568                 span: span(0..10),
4569                 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4570                 c: '\u{10FFFF}',
4571             }))
4572         );
4573 
4574         assert_eq!(
4575             parser(r"\x").parse_escape().unwrap_err(),
4576             TestError {
4577                 span: span(2..2),
4578                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4579             }
4580         );
4581         assert_eq!(
4582             parser(r"\x{").parse_escape().unwrap_err(),
4583             TestError {
4584                 span: span(2..3),
4585                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4586             }
4587         );
4588         assert_eq!(
4589             parser(r"\x{FF").parse_escape().unwrap_err(),
4590             TestError {
4591                 span: span(2..5),
4592                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4593             }
4594         );
4595         assert_eq!(
4596             parser(r"\x{}").parse_escape().unwrap_err(),
4597             TestError {
4598                 span: span(2..4),
4599                 kind: ast::ErrorKind::EscapeHexEmpty,
4600             }
4601         );
4602         assert_eq!(
4603             parser(r"\x{FGF}").parse_escape().unwrap_err(),
4604             TestError {
4605                 span: span(4..5),
4606                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4607             }
4608         );
4609         assert_eq!(
4610             parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
4611             TestError {
4612                 span: span(3..9),
4613                 kind: ast::ErrorKind::EscapeHexInvalid,
4614             }
4615         );
4616         assert_eq!(
4617             parser(r"\x{D800}").parse_escape().unwrap_err(),
4618             TestError {
4619                 span: span(3..7),
4620                 kind: ast::ErrorKind::EscapeHexInvalid,
4621             }
4622         );
4623         assert_eq!(
4624             parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
4625             TestError {
4626                 span: span(3..12),
4627                 kind: ast::ErrorKind::EscapeHexInvalid,
4628             }
4629         );
4630     }
4631 
4632     #[test]
parse_decimal()4633     fn parse_decimal() {
4634         assert_eq!(parser("123").parse_decimal(), Ok(123));
4635         assert_eq!(parser("0").parse_decimal(), Ok(0));
4636         assert_eq!(parser("01").parse_decimal(), Ok(1));
4637 
4638         assert_eq!(
4639             parser("-1").parse_decimal().unwrap_err(),
4640             TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
4641         );
4642         assert_eq!(
4643             parser("").parse_decimal().unwrap_err(),
4644             TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
4645         );
4646         assert_eq!(
4647             parser("9999999999").parse_decimal().unwrap_err(),
4648             TestError {
4649                 span: span(0..10),
4650                 kind: ast::ErrorKind::DecimalInvalid,
4651             }
4652         );
4653     }
4654 
4655     #[test]
parse_set_class()4656     fn parse_set_class() {
4657         fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
4658             ast::ClassSet::union(ast::ClassSetUnion { span, items })
4659         }
4660 
4661         fn intersection(
4662             span: Span,
4663             lhs: ast::ClassSet,
4664             rhs: ast::ClassSet,
4665         ) -> ast::ClassSet {
4666             ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4667                 span,
4668                 kind: ast::ClassSetBinaryOpKind::Intersection,
4669                 lhs: Box::new(lhs),
4670                 rhs: Box::new(rhs),
4671             })
4672         }
4673 
4674         fn difference(
4675             span: Span,
4676             lhs: ast::ClassSet,
4677             rhs: ast::ClassSet,
4678         ) -> ast::ClassSet {
4679             ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4680                 span,
4681                 kind: ast::ClassSetBinaryOpKind::Difference,
4682                 lhs: Box::new(lhs),
4683                 rhs: Box::new(rhs),
4684             })
4685         }
4686 
4687         fn symdifference(
4688             span: Span,
4689             lhs: ast::ClassSet,
4690             rhs: ast::ClassSet,
4691         ) -> ast::ClassSet {
4692             ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4693                 span,
4694                 kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
4695                 lhs: Box::new(lhs),
4696                 rhs: Box::new(rhs),
4697             })
4698         }
4699 
4700         fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
4701             ast::ClassSet::Item(item)
4702         }
4703 
4704         fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
4705             ast::ClassSetItem::Ascii(cls)
4706         }
4707 
4708         fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
4709             ast::ClassSetItem::Unicode(cls)
4710         }
4711 
4712         fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
4713             ast::ClassSetItem::Perl(cls)
4714         }
4715 
4716         fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
4717             ast::ClassSetItem::Bracketed(Box::new(cls))
4718         }
4719 
4720         fn lit(span: Span, c: char) -> ast::ClassSetItem {
4721             ast::ClassSetItem::Literal(ast::Literal {
4722                 span,
4723                 kind: ast::LiteralKind::Verbatim,
4724                 c,
4725             })
4726         }
4727 
4728         fn empty(span: Span) -> ast::ClassSetItem {
4729             ast::ClassSetItem::Empty(span)
4730         }
4731 
4732         fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
4733             let pos1 = Position {
4734                 offset: span.start.offset + start.len_utf8(),
4735                 column: span.start.column + 1,
4736                 ..span.start
4737             };
4738             let pos2 = Position {
4739                 offset: span.end.offset - end.len_utf8(),
4740                 column: span.end.column - 1,
4741                 ..span.end
4742             };
4743             ast::ClassSetItem::Range(ast::ClassSetRange {
4744                 span,
4745                 start: ast::Literal {
4746                     span: Span { end: pos1, ..span },
4747                     kind: ast::LiteralKind::Verbatim,
4748                     c: start,
4749                 },
4750                 end: ast::Literal {
4751                     span: Span { start: pos2, ..span },
4752                     kind: ast::LiteralKind::Verbatim,
4753                     c: end,
4754                 },
4755             })
4756         }
4757 
4758         fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
4759             ast::ClassAscii { span, kind: ast::ClassAsciiKind::Alnum, negated }
4760         }
4761 
4762         fn lower(span: Span, negated: bool) -> ast::ClassAscii {
4763             ast::ClassAscii { span, kind: ast::ClassAsciiKind::Lower, negated }
4764         }
4765 
4766         assert_eq!(
4767             parser("[[:alnum:]]").parse(),
4768             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4769                 span: span(0..11),
4770                 negated: false,
4771                 kind: itemset(item_ascii(alnum(span(1..10), false))),
4772             })))
4773         );
4774         assert_eq!(
4775             parser("[[[:alnum:]]]").parse(),
4776             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4777                 span: span(0..13),
4778                 negated: false,
4779                 kind: itemset(item_bracket(ast::ClassBracketed {
4780                     span: span(1..12),
4781                     negated: false,
4782                     kind: itemset(item_ascii(alnum(span(2..11), false))),
4783                 })),
4784             })))
4785         );
4786         assert_eq!(
4787             parser("[[:alnum:]&&[:lower:]]").parse(),
4788             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4789                 span: span(0..22),
4790                 negated: false,
4791                 kind: intersection(
4792                     span(1..21),
4793                     itemset(item_ascii(alnum(span(1..10), false))),
4794                     itemset(item_ascii(lower(span(12..21), false))),
4795                 ),
4796             })))
4797         );
4798         assert_eq!(
4799             parser("[[:alnum:]--[:lower:]]").parse(),
4800             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4801                 span: span(0..22),
4802                 negated: false,
4803                 kind: difference(
4804                     span(1..21),
4805                     itemset(item_ascii(alnum(span(1..10), false))),
4806                     itemset(item_ascii(lower(span(12..21), false))),
4807                 ),
4808             })))
4809         );
4810         assert_eq!(
4811             parser("[[:alnum:]~~[:lower:]]").parse(),
4812             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4813                 span: span(0..22),
4814                 negated: false,
4815                 kind: symdifference(
4816                     span(1..21),
4817                     itemset(item_ascii(alnum(span(1..10), false))),
4818                     itemset(item_ascii(lower(span(12..21), false))),
4819                 ),
4820             })))
4821         );
4822 
4823         assert_eq!(
4824             parser("[a]").parse(),
4825             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4826                 span: span(0..3),
4827                 negated: false,
4828                 kind: itemset(lit(span(1..2), 'a')),
4829             })))
4830         );
4831         assert_eq!(
4832             parser(r"[a\]]").parse(),
4833             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4834                 span: span(0..5),
4835                 negated: false,
4836                 kind: union(
4837                     span(1..4),
4838                     vec![
4839                         lit(span(1..2), 'a'),
4840                         ast::ClassSetItem::Literal(ast::Literal {
4841                             span: span(2..4),
4842                             kind: ast::LiteralKind::Punctuation,
4843                             c: ']',
4844                         }),
4845                     ]
4846                 ),
4847             })))
4848         );
4849         assert_eq!(
4850             parser(r"[a\-z]").parse(),
4851             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4852                 span: span(0..6),
4853                 negated: false,
4854                 kind: union(
4855                     span(1..5),
4856                     vec![
4857                         lit(span(1..2), 'a'),
4858                         ast::ClassSetItem::Literal(ast::Literal {
4859                             span: span(2..4),
4860                             kind: ast::LiteralKind::Punctuation,
4861                             c: '-',
4862                         }),
4863                         lit(span(4..5), 'z'),
4864                     ]
4865                 ),
4866             })))
4867         );
4868         assert_eq!(
4869             parser("[ab]").parse(),
4870             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4871                 span: span(0..4),
4872                 negated: false,
4873                 kind: union(
4874                     span(1..3),
4875                     vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),]
4876                 ),
4877             })))
4878         );
4879         assert_eq!(
4880             parser("[a-]").parse(),
4881             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4882                 span: span(0..4),
4883                 negated: false,
4884                 kind: union(
4885                     span(1..3),
4886                     vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),]
4887                 ),
4888             })))
4889         );
4890         assert_eq!(
4891             parser("[-a]").parse(),
4892             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4893                 span: span(0..4),
4894                 negated: false,
4895                 kind: union(
4896                     span(1..3),
4897                     vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),]
4898                 ),
4899             })))
4900         );
4901         assert_eq!(
4902             parser(r"[\pL]").parse(),
4903             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4904                 span: span(0..5),
4905                 negated: false,
4906                 kind: itemset(item_unicode(ast::ClassUnicode {
4907                     span: span(1..4),
4908                     negated: false,
4909                     kind: ast::ClassUnicodeKind::OneLetter('L'),
4910                 })),
4911             })))
4912         );
4913         assert_eq!(
4914             parser(r"[\w]").parse(),
4915             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4916                 span: span(0..4),
4917                 negated: false,
4918                 kind: itemset(item_perl(ast::ClassPerl {
4919                     span: span(1..3),
4920                     kind: ast::ClassPerlKind::Word,
4921                     negated: false,
4922                 })),
4923             })))
4924         );
4925         assert_eq!(
4926             parser(r"[a\wz]").parse(),
4927             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4928                 span: span(0..6),
4929                 negated: false,
4930                 kind: union(
4931                     span(1..5),
4932                     vec![
4933                         lit(span(1..2), 'a'),
4934                         item_perl(ast::ClassPerl {
4935                             span: span(2..4),
4936                             kind: ast::ClassPerlKind::Word,
4937                             negated: false,
4938                         }),
4939                         lit(span(4..5), 'z'),
4940                     ]
4941                 ),
4942             })))
4943         );
4944 
4945         assert_eq!(
4946             parser("[a-z]").parse(),
4947             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4948                 span: span(0..5),
4949                 negated: false,
4950                 kind: itemset(range(span(1..4), 'a', 'z')),
4951             })))
4952         );
4953         assert_eq!(
4954             parser("[a-cx-z]").parse(),
4955             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4956                 span: span(0..8),
4957                 negated: false,
4958                 kind: union(
4959                     span(1..7),
4960                     vec![
4961                         range(span(1..4), 'a', 'c'),
4962                         range(span(4..7), 'x', 'z'),
4963                     ]
4964                 ),
4965             })))
4966         );
4967         assert_eq!(
4968             parser(r"[\w&&a-cx-z]").parse(),
4969             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4970                 span: span(0..12),
4971                 negated: false,
4972                 kind: intersection(
4973                     span(1..11),
4974                     itemset(item_perl(ast::ClassPerl {
4975                         span: span(1..3),
4976                         kind: ast::ClassPerlKind::Word,
4977                         negated: false,
4978                     })),
4979                     union(
4980                         span(5..11),
4981                         vec![
4982                             range(span(5..8), 'a', 'c'),
4983                             range(span(8..11), 'x', 'z'),
4984                         ]
4985                     ),
4986                 ),
4987             })))
4988         );
4989         assert_eq!(
4990             parser(r"[a-cx-z&&\w]").parse(),
4991             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4992                 span: span(0..12),
4993                 negated: false,
4994                 kind: intersection(
4995                     span(1..11),
4996                     union(
4997                         span(1..7),
4998                         vec![
4999                             range(span(1..4), 'a', 'c'),
5000                             range(span(4..7), 'x', 'z'),
5001                         ]
5002                     ),
5003                     itemset(item_perl(ast::ClassPerl {
5004                         span: span(9..11),
5005                         kind: ast::ClassPerlKind::Word,
5006                         negated: false,
5007                     })),
5008                 ),
5009             })))
5010         );
5011         assert_eq!(
5012             parser(r"[a--b--c]").parse(),
5013             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5014                 span: span(0..9),
5015                 negated: false,
5016                 kind: difference(
5017                     span(1..8),
5018                     difference(
5019                         span(1..5),
5020                         itemset(lit(span(1..2), 'a')),
5021                         itemset(lit(span(4..5), 'b')),
5022                     ),
5023                     itemset(lit(span(7..8), 'c')),
5024                 ),
5025             })))
5026         );
5027         assert_eq!(
5028             parser(r"[a~~b~~c]").parse(),
5029             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5030                 span: span(0..9),
5031                 negated: false,
5032                 kind: symdifference(
5033                     span(1..8),
5034                     symdifference(
5035                         span(1..5),
5036                         itemset(lit(span(1..2), 'a')),
5037                         itemset(lit(span(4..5), 'b')),
5038                     ),
5039                     itemset(lit(span(7..8), 'c')),
5040                 ),
5041             })))
5042         );
5043         assert_eq!(
5044             parser(r"[\^&&^]").parse(),
5045             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5046                 span: span(0..7),
5047                 negated: false,
5048                 kind: intersection(
5049                     span(1..6),
5050                     itemset(ast::ClassSetItem::Literal(ast::Literal {
5051                         span: span(1..3),
5052                         kind: ast::LiteralKind::Punctuation,
5053                         c: '^',
5054                     })),
5055                     itemset(lit(span(5..6), '^')),
5056                 ),
5057             })))
5058         );
5059         assert_eq!(
5060             parser(r"[\&&&&]").parse(),
5061             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5062                 span: span(0..7),
5063                 negated: false,
5064                 kind: intersection(
5065                     span(1..6),
5066                     itemset(ast::ClassSetItem::Literal(ast::Literal {
5067                         span: span(1..3),
5068                         kind: ast::LiteralKind::Punctuation,
5069                         c: '&',
5070                     })),
5071                     itemset(lit(span(5..6), '&')),
5072                 ),
5073             })))
5074         );
5075         assert_eq!(
5076             parser(r"[&&&&]").parse(),
5077             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5078                 span: span(0..6),
5079                 negated: false,
5080                 kind: intersection(
5081                     span(1..5),
5082                     intersection(
5083                         span(1..3),
5084                         itemset(empty(span(1..1))),
5085                         itemset(empty(span(3..3))),
5086                     ),
5087                     itemset(empty(span(5..5))),
5088                 ),
5089             })))
5090         );
5091 
5092         let pat = "[☃-⛄]";
5093         assert_eq!(
5094             parser(pat).parse(),
5095             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5096                 span: span_range(pat, 0..9),
5097                 negated: false,
5098                 kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
5099                     span: span_range(pat, 1..8),
5100                     start: ast::Literal {
5101                         span: span_range(pat, 1..4),
5102                         kind: ast::LiteralKind::Verbatim,
5103                         c: '☃',
5104                     },
5105                     end: ast::Literal {
5106                         span: span_range(pat, 5..8),
5107                         kind: ast::LiteralKind::Verbatim,
5108                         c: '⛄',
5109                     },
5110                 })),
5111             })))
5112         );
5113 
5114         assert_eq!(
5115             parser(r"[]]").parse(),
5116             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5117                 span: span(0..3),
5118                 negated: false,
5119                 kind: itemset(lit(span(1..2), ']')),
5120             })))
5121         );
5122         assert_eq!(
5123             parser(r"[]\[]").parse(),
5124             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5125                 span: span(0..5),
5126                 negated: false,
5127                 kind: union(
5128                     span(1..4),
5129                     vec![
5130                         lit(span(1..2), ']'),
5131                         ast::ClassSetItem::Literal(ast::Literal {
5132                             span: span(2..4),
5133                             kind: ast::LiteralKind::Punctuation,
5134                             c: '[',
5135                         }),
5136                     ]
5137                 ),
5138             })))
5139         );
5140         assert_eq!(
5141             parser(r"[\[]]").parse(),
5142             Ok(concat(
5143                 0..5,
5144                 vec![
5145                     Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5146                         span: span(0..4),
5147                         negated: false,
5148                         kind: itemset(ast::ClassSetItem::Literal(
5149                             ast::Literal {
5150                                 span: span(1..3),
5151                                 kind: ast::LiteralKind::Punctuation,
5152                                 c: '[',
5153                             }
5154                         )),
5155                     })),
5156                     Ast::Literal(ast::Literal {
5157                         span: span(4..5),
5158                         kind: ast::LiteralKind::Verbatim,
5159                         c: ']',
5160                     }),
5161                 ]
5162             ))
5163         );
5164 
5165         assert_eq!(
5166             parser("[").parse().unwrap_err(),
5167             TestError {
5168                 span: span(0..1),
5169                 kind: ast::ErrorKind::ClassUnclosed,
5170             }
5171         );
5172         assert_eq!(
5173             parser("[[").parse().unwrap_err(),
5174             TestError {
5175                 span: span(1..2),
5176                 kind: ast::ErrorKind::ClassUnclosed,
5177             }
5178         );
5179         assert_eq!(
5180             parser("[[-]").parse().unwrap_err(),
5181             TestError {
5182                 span: span(0..1),
5183                 kind: ast::ErrorKind::ClassUnclosed,
5184             }
5185         );
5186         assert_eq!(
5187             parser("[[[:alnum:]").parse().unwrap_err(),
5188             TestError {
5189                 span: span(1..2),
5190                 kind: ast::ErrorKind::ClassUnclosed,
5191             }
5192         );
5193         assert_eq!(
5194             parser(r"[\b]").parse().unwrap_err(),
5195             TestError {
5196                 span: span(1..3),
5197                 kind: ast::ErrorKind::ClassEscapeInvalid,
5198             }
5199         );
5200         assert_eq!(
5201             parser(r"[\w-a]").parse().unwrap_err(),
5202             TestError {
5203                 span: span(1..3),
5204                 kind: ast::ErrorKind::ClassRangeLiteral,
5205             }
5206         );
5207         assert_eq!(
5208             parser(r"[a-\w]").parse().unwrap_err(),
5209             TestError {
5210                 span: span(3..5),
5211                 kind: ast::ErrorKind::ClassRangeLiteral,
5212             }
5213         );
5214         assert_eq!(
5215             parser(r"[z-a]").parse().unwrap_err(),
5216             TestError {
5217                 span: span(1..4),
5218                 kind: ast::ErrorKind::ClassRangeInvalid,
5219             }
5220         );
5221 
5222         assert_eq!(
5223             parser_ignore_whitespace("[a ").parse().unwrap_err(),
5224             TestError {
5225                 span: span(0..1),
5226                 kind: ast::ErrorKind::ClassUnclosed,
5227             }
5228         );
5229         assert_eq!(
5230             parser_ignore_whitespace("[a- ").parse().unwrap_err(),
5231             TestError {
5232                 span: span(0..1),
5233                 kind: ast::ErrorKind::ClassUnclosed,
5234             }
5235         );
5236     }
5237 
5238     #[test]
parse_set_class_open()5239     fn parse_set_class_open() {
5240         assert_eq!(parser("[a]").parse_set_class_open(), {
5241             let set = ast::ClassBracketed {
5242                 span: span(0..1),
5243                 negated: false,
5244                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5245                     span: span(1..1),
5246                     items: vec![],
5247                 }),
5248             };
5249             let union = ast::ClassSetUnion { span: span(1..1), items: vec![] };
5250             Ok((set, union))
5251         });
5252         assert_eq!(
5253             parser_ignore_whitespace("[   a]").parse_set_class_open(),
5254             {
5255                 let set = ast::ClassBracketed {
5256                     span: span(0..4),
5257                     negated: false,
5258                     kind: ast::ClassSet::union(ast::ClassSetUnion {
5259                         span: span(4..4),
5260                         items: vec![],
5261                     }),
5262                 };
5263                 let union =
5264                     ast::ClassSetUnion { span: span(4..4), items: vec![] };
5265                 Ok((set, union))
5266             }
5267         );
5268         assert_eq!(parser("[^a]").parse_set_class_open(), {
5269             let set = ast::ClassBracketed {
5270                 span: span(0..2),
5271                 negated: true,
5272                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5273                     span: span(2..2),
5274                     items: vec![],
5275                 }),
5276             };
5277             let union = ast::ClassSetUnion { span: span(2..2), items: vec![] };
5278             Ok((set, union))
5279         });
5280         assert_eq!(
5281             parser_ignore_whitespace("[ ^ a]").parse_set_class_open(),
5282             {
5283                 let set = ast::ClassBracketed {
5284                     span: span(0..4),
5285                     negated: true,
5286                     kind: ast::ClassSet::union(ast::ClassSetUnion {
5287                         span: span(4..4),
5288                         items: vec![],
5289                     }),
5290                 };
5291                 let union =
5292                     ast::ClassSetUnion { span: span(4..4), items: vec![] };
5293                 Ok((set, union))
5294             }
5295         );
5296         assert_eq!(parser("[-a]").parse_set_class_open(), {
5297             let set = ast::ClassBracketed {
5298                 span: span(0..2),
5299                 negated: false,
5300                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5301                     span: span(1..1),
5302                     items: vec![],
5303                 }),
5304             };
5305             let union = ast::ClassSetUnion {
5306                 span: span(1..2),
5307                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5308                     span: span(1..2),
5309                     kind: ast::LiteralKind::Verbatim,
5310                     c: '-',
5311                 })],
5312             };
5313             Ok((set, union))
5314         });
5315         assert_eq!(
5316             parser_ignore_whitespace("[ - a]").parse_set_class_open(),
5317             {
5318                 let set = ast::ClassBracketed {
5319                     span: span(0..4),
5320                     negated: false,
5321                     kind: ast::ClassSet::union(ast::ClassSetUnion {
5322                         span: span(2..2),
5323                         items: vec![],
5324                     }),
5325                 };
5326                 let union = ast::ClassSetUnion {
5327                     span: span(2..3),
5328                     items: vec![ast::ClassSetItem::Literal(ast::Literal {
5329                         span: span(2..3),
5330                         kind: ast::LiteralKind::Verbatim,
5331                         c: '-',
5332                     })],
5333                 };
5334                 Ok((set, union))
5335             }
5336         );
5337         assert_eq!(parser("[^-a]").parse_set_class_open(), {
5338             let set = ast::ClassBracketed {
5339                 span: span(0..3),
5340                 negated: true,
5341                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5342                     span: span(2..2),
5343                     items: vec![],
5344                 }),
5345             };
5346             let union = ast::ClassSetUnion {
5347                 span: span(2..3),
5348                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5349                     span: span(2..3),
5350                     kind: ast::LiteralKind::Verbatim,
5351                     c: '-',
5352                 })],
5353             };
5354             Ok((set, union))
5355         });
5356         assert_eq!(parser("[--a]").parse_set_class_open(), {
5357             let set = ast::ClassBracketed {
5358                 span: span(0..3),
5359                 negated: false,
5360                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5361                     span: span(1..1),
5362                     items: vec![],
5363                 }),
5364             };
5365             let union = ast::ClassSetUnion {
5366                 span: span(1..3),
5367                 items: vec![
5368                     ast::ClassSetItem::Literal(ast::Literal {
5369                         span: span(1..2),
5370                         kind: ast::LiteralKind::Verbatim,
5371                         c: '-',
5372                     }),
5373                     ast::ClassSetItem::Literal(ast::Literal {
5374                         span: span(2..3),
5375                         kind: ast::LiteralKind::Verbatim,
5376                         c: '-',
5377                     }),
5378                 ],
5379             };
5380             Ok((set, union))
5381         });
5382         assert_eq!(parser("[]a]").parse_set_class_open(), {
5383             let set = ast::ClassBracketed {
5384                 span: span(0..2),
5385                 negated: false,
5386                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5387                     span: span(1..1),
5388                     items: vec![],
5389                 }),
5390             };
5391             let union = ast::ClassSetUnion {
5392                 span: span(1..2),
5393                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5394                     span: span(1..2),
5395                     kind: ast::LiteralKind::Verbatim,
5396                     c: ']',
5397                 })],
5398             };
5399             Ok((set, union))
5400         });
5401         assert_eq!(
5402             parser_ignore_whitespace("[ ] a]").parse_set_class_open(),
5403             {
5404                 let set = ast::ClassBracketed {
5405                     span: span(0..4),
5406                     negated: false,
5407                     kind: ast::ClassSet::union(ast::ClassSetUnion {
5408                         span: span(2..2),
5409                         items: vec![],
5410                     }),
5411                 };
5412                 let union = ast::ClassSetUnion {
5413                     span: span(2..3),
5414                     items: vec![ast::ClassSetItem::Literal(ast::Literal {
5415                         span: span(2..3),
5416                         kind: ast::LiteralKind::Verbatim,
5417                         c: ']',
5418                     })],
5419                 };
5420                 Ok((set, union))
5421             }
5422         );
5423         assert_eq!(parser("[^]a]").parse_set_class_open(), {
5424             let set = ast::ClassBracketed {
5425                 span: span(0..3),
5426                 negated: true,
5427                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5428                     span: span(2..2),
5429                     items: vec![],
5430                 }),
5431             };
5432             let union = ast::ClassSetUnion {
5433                 span: span(2..3),
5434                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5435                     span: span(2..3),
5436                     kind: ast::LiteralKind::Verbatim,
5437                     c: ']',
5438                 })],
5439             };
5440             Ok((set, union))
5441         });
5442         assert_eq!(parser("[-]a]").parse_set_class_open(), {
5443             let set = ast::ClassBracketed {
5444                 span: span(0..2),
5445                 negated: false,
5446                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5447                     span: span(1..1),
5448                     items: vec![],
5449                 }),
5450             };
5451             let union = ast::ClassSetUnion {
5452                 span: span(1..2),
5453                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5454                     span: span(1..2),
5455                     kind: ast::LiteralKind::Verbatim,
5456                     c: '-',
5457                 })],
5458             };
5459             Ok((set, union))
5460         });
5461 
5462         assert_eq!(
5463             parser("[").parse_set_class_open().unwrap_err(),
5464             TestError {
5465                 span: span(0..1),
5466                 kind: ast::ErrorKind::ClassUnclosed,
5467             }
5468         );
5469         assert_eq!(
5470             parser_ignore_whitespace("[    ")
5471                 .parse_set_class_open()
5472                 .unwrap_err(),
5473             TestError {
5474                 span: span(0..5),
5475                 kind: ast::ErrorKind::ClassUnclosed,
5476             }
5477         );
5478         assert_eq!(
5479             parser("[^").parse_set_class_open().unwrap_err(),
5480             TestError {
5481                 span: span(0..2),
5482                 kind: ast::ErrorKind::ClassUnclosed,
5483             }
5484         );
5485         assert_eq!(
5486             parser("[]").parse_set_class_open().unwrap_err(),
5487             TestError {
5488                 span: span(0..2),
5489                 kind: ast::ErrorKind::ClassUnclosed,
5490             }
5491         );
5492         assert_eq!(
5493             parser("[-").parse_set_class_open().unwrap_err(),
5494             TestError {
5495                 span: span(0..0),
5496                 kind: ast::ErrorKind::ClassUnclosed,
5497             }
5498         );
5499         assert_eq!(
5500             parser("[--").parse_set_class_open().unwrap_err(),
5501             TestError {
5502                 span: span(0..0),
5503                 kind: ast::ErrorKind::ClassUnclosed,
5504             }
5505         );
5506 
5507         // See: https://github.com/rust-lang/regex/issues/792
5508         assert_eq!(
5509             parser("(?x)[-#]").parse_with_comments().unwrap_err(),
5510             TestError {
5511                 span: span(4..4),
5512                 kind: ast::ErrorKind::ClassUnclosed,
5513             }
5514         );
5515     }
5516 
5517     #[test]
maybe_parse_ascii_class()5518     fn maybe_parse_ascii_class() {
5519         assert_eq!(
5520             parser(r"[:alnum:]").maybe_parse_ascii_class(),
5521             Some(ast::ClassAscii {
5522                 span: span(0..9),
5523                 kind: ast::ClassAsciiKind::Alnum,
5524                 negated: false,
5525             })
5526         );
5527         assert_eq!(
5528             parser(r"[:alnum:]A").maybe_parse_ascii_class(),
5529             Some(ast::ClassAscii {
5530                 span: span(0..9),
5531                 kind: ast::ClassAsciiKind::Alnum,
5532                 negated: false,
5533             })
5534         );
5535         assert_eq!(
5536             parser(r"[:^alnum:]").maybe_parse_ascii_class(),
5537             Some(ast::ClassAscii {
5538                 span: span(0..10),
5539                 kind: ast::ClassAsciiKind::Alnum,
5540                 negated: true,
5541             })
5542         );
5543 
5544         let p = parser(r"[:");
5545         assert_eq!(p.maybe_parse_ascii_class(), None);
5546         assert_eq!(p.offset(), 0);
5547 
5548         let p = parser(r"[:^");
5549         assert_eq!(p.maybe_parse_ascii_class(), None);
5550         assert_eq!(p.offset(), 0);
5551 
5552         let p = parser(r"[^:alnum:]");
5553         assert_eq!(p.maybe_parse_ascii_class(), None);
5554         assert_eq!(p.offset(), 0);
5555 
5556         let p = parser(r"[:alnnum:]");
5557         assert_eq!(p.maybe_parse_ascii_class(), None);
5558         assert_eq!(p.offset(), 0);
5559 
5560         let p = parser(r"[:alnum]");
5561         assert_eq!(p.maybe_parse_ascii_class(), None);
5562         assert_eq!(p.offset(), 0);
5563 
5564         let p = parser(r"[:alnum:");
5565         assert_eq!(p.maybe_parse_ascii_class(), None);
5566         assert_eq!(p.offset(), 0);
5567     }
5568 
5569     #[test]
parse_unicode_class()5570     fn parse_unicode_class() {
5571         assert_eq!(
5572             parser(r"\pN").parse_escape(),
5573             Ok(Primitive::Unicode(ast::ClassUnicode {
5574                 span: span(0..3),
5575                 negated: false,
5576                 kind: ast::ClassUnicodeKind::OneLetter('N'),
5577             }))
5578         );
5579         assert_eq!(
5580             parser(r"\PN").parse_escape(),
5581             Ok(Primitive::Unicode(ast::ClassUnicode {
5582                 span: span(0..3),
5583                 negated: true,
5584                 kind: ast::ClassUnicodeKind::OneLetter('N'),
5585             }))
5586         );
5587         assert_eq!(
5588             parser(r"\p{N}").parse_escape(),
5589             Ok(Primitive::Unicode(ast::ClassUnicode {
5590                 span: span(0..5),
5591                 negated: false,
5592                 kind: ast::ClassUnicodeKind::Named(s("N")),
5593             }))
5594         );
5595         assert_eq!(
5596             parser(r"\P{N}").parse_escape(),
5597             Ok(Primitive::Unicode(ast::ClassUnicode {
5598                 span: span(0..5),
5599                 negated: true,
5600                 kind: ast::ClassUnicodeKind::Named(s("N")),
5601             }))
5602         );
5603         assert_eq!(
5604             parser(r"\p{Greek}").parse_escape(),
5605             Ok(Primitive::Unicode(ast::ClassUnicode {
5606                 span: span(0..9),
5607                 negated: false,
5608                 kind: ast::ClassUnicodeKind::Named(s("Greek")),
5609             }))
5610         );
5611 
5612         assert_eq!(
5613             parser(r"\p{scx:Katakana}").parse_escape(),
5614             Ok(Primitive::Unicode(ast::ClassUnicode {
5615                 span: span(0..16),
5616                 negated: false,
5617                 kind: ast::ClassUnicodeKind::NamedValue {
5618                     op: ast::ClassUnicodeOpKind::Colon,
5619                     name: s("scx"),
5620                     value: s("Katakana"),
5621                 },
5622             }))
5623         );
5624         assert_eq!(
5625             parser(r"\p{scx=Katakana}").parse_escape(),
5626             Ok(Primitive::Unicode(ast::ClassUnicode {
5627                 span: span(0..16),
5628                 negated: false,
5629                 kind: ast::ClassUnicodeKind::NamedValue {
5630                     op: ast::ClassUnicodeOpKind::Equal,
5631                     name: s("scx"),
5632                     value: s("Katakana"),
5633                 },
5634             }))
5635         );
5636         assert_eq!(
5637             parser(r"\p{scx!=Katakana}").parse_escape(),
5638             Ok(Primitive::Unicode(ast::ClassUnicode {
5639                 span: span(0..17),
5640                 negated: false,
5641                 kind: ast::ClassUnicodeKind::NamedValue {
5642                     op: ast::ClassUnicodeOpKind::NotEqual,
5643                     name: s("scx"),
5644                     value: s("Katakana"),
5645                 },
5646             }))
5647         );
5648 
5649         assert_eq!(
5650             parser(r"\p{:}").parse_escape(),
5651             Ok(Primitive::Unicode(ast::ClassUnicode {
5652                 span: span(0..5),
5653                 negated: false,
5654                 kind: ast::ClassUnicodeKind::NamedValue {
5655                     op: ast::ClassUnicodeOpKind::Colon,
5656                     name: s(""),
5657                     value: s(""),
5658                 },
5659             }))
5660         );
5661         assert_eq!(
5662             parser(r"\p{=}").parse_escape(),
5663             Ok(Primitive::Unicode(ast::ClassUnicode {
5664                 span: span(0..5),
5665                 negated: false,
5666                 kind: ast::ClassUnicodeKind::NamedValue {
5667                     op: ast::ClassUnicodeOpKind::Equal,
5668                     name: s(""),
5669                     value: s(""),
5670                 },
5671             }))
5672         );
5673         assert_eq!(
5674             parser(r"\p{!=}").parse_escape(),
5675             Ok(Primitive::Unicode(ast::ClassUnicode {
5676                 span: span(0..6),
5677                 negated: false,
5678                 kind: ast::ClassUnicodeKind::NamedValue {
5679                     op: ast::ClassUnicodeOpKind::NotEqual,
5680                     name: s(""),
5681                     value: s(""),
5682                 },
5683             }))
5684         );
5685 
5686         assert_eq!(
5687             parser(r"\p").parse_escape().unwrap_err(),
5688             TestError {
5689                 span: span(2..2),
5690                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5691             }
5692         );
5693         assert_eq!(
5694             parser(r"\p{").parse_escape().unwrap_err(),
5695             TestError {
5696                 span: span(3..3),
5697                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5698             }
5699         );
5700         assert_eq!(
5701             parser(r"\p{N").parse_escape().unwrap_err(),
5702             TestError {
5703                 span: span(4..4),
5704                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5705             }
5706         );
5707         assert_eq!(
5708             parser(r"\p{Greek").parse_escape().unwrap_err(),
5709             TestError {
5710                 span: span(8..8),
5711                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5712             }
5713         );
5714 
5715         assert_eq!(
5716             parser(r"\pNz").parse(),
5717             Ok(Ast::Concat(ast::Concat {
5718                 span: span(0..4),
5719                 asts: vec![
5720                     Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5721                         span: span(0..3),
5722                         negated: false,
5723                         kind: ast::ClassUnicodeKind::OneLetter('N'),
5724                     })),
5725                     Ast::Literal(ast::Literal {
5726                         span: span(3..4),
5727                         kind: ast::LiteralKind::Verbatim,
5728                         c: 'z',
5729                     }),
5730                 ],
5731             }))
5732         );
5733         assert_eq!(
5734             parser(r"\p{Greek}z").parse(),
5735             Ok(Ast::Concat(ast::Concat {
5736                 span: span(0..10),
5737                 asts: vec![
5738                     Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5739                         span: span(0..9),
5740                         negated: false,
5741                         kind: ast::ClassUnicodeKind::Named(s("Greek")),
5742                     })),
5743                     Ast::Literal(ast::Literal {
5744                         span: span(9..10),
5745                         kind: ast::LiteralKind::Verbatim,
5746                         c: 'z',
5747                     }),
5748                 ],
5749             }))
5750         );
5751         assert_eq!(
5752             parser(r"\p\{").parse().unwrap_err(),
5753             TestError {
5754                 span: span(2..3),
5755                 kind: ast::ErrorKind::UnicodeClassInvalid,
5756             }
5757         );
5758         assert_eq!(
5759             parser(r"\P\{").parse().unwrap_err(),
5760             TestError {
5761                 span: span(2..3),
5762                 kind: ast::ErrorKind::UnicodeClassInvalid,
5763             }
5764         );
5765     }
5766 
5767     #[test]
parse_perl_class()5768     fn parse_perl_class() {
5769         assert_eq!(
5770             parser(r"\d").parse_escape(),
5771             Ok(Primitive::Perl(ast::ClassPerl {
5772                 span: span(0..2),
5773                 kind: ast::ClassPerlKind::Digit,
5774                 negated: false,
5775             }))
5776         );
5777         assert_eq!(
5778             parser(r"\D").parse_escape(),
5779             Ok(Primitive::Perl(ast::ClassPerl {
5780                 span: span(0..2),
5781                 kind: ast::ClassPerlKind::Digit,
5782                 negated: true,
5783             }))
5784         );
5785         assert_eq!(
5786             parser(r"\s").parse_escape(),
5787             Ok(Primitive::Perl(ast::ClassPerl {
5788                 span: span(0..2),
5789                 kind: ast::ClassPerlKind::Space,
5790                 negated: false,
5791             }))
5792         );
5793         assert_eq!(
5794             parser(r"\S").parse_escape(),
5795             Ok(Primitive::Perl(ast::ClassPerl {
5796                 span: span(0..2),
5797                 kind: ast::ClassPerlKind::Space,
5798                 negated: true,
5799             }))
5800         );
5801         assert_eq!(
5802             parser(r"\w").parse_escape(),
5803             Ok(Primitive::Perl(ast::ClassPerl {
5804                 span: span(0..2),
5805                 kind: ast::ClassPerlKind::Word,
5806                 negated: false,
5807             }))
5808         );
5809         assert_eq!(
5810             parser(r"\W").parse_escape(),
5811             Ok(Primitive::Perl(ast::ClassPerl {
5812                 span: span(0..2),
5813                 kind: ast::ClassPerlKind::Word,
5814                 negated: true,
5815             }))
5816         );
5817 
5818         assert_eq!(
5819             parser(r"\d").parse(),
5820             Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl {
5821                 span: span(0..2),
5822                 kind: ast::ClassPerlKind::Digit,
5823                 negated: false,
5824             })))
5825         );
5826         assert_eq!(
5827             parser(r"\dz").parse(),
5828             Ok(Ast::Concat(ast::Concat {
5829                 span: span(0..3),
5830                 asts: vec![
5831                     Ast::Class(ast::Class::Perl(ast::ClassPerl {
5832                         span: span(0..2),
5833                         kind: ast::ClassPerlKind::Digit,
5834                         negated: false,
5835                     })),
5836                     Ast::Literal(ast::Literal {
5837                         span: span(2..3),
5838                         kind: ast::LiteralKind::Verbatim,
5839                         c: 'z',
5840                     }),
5841                 ],
5842             }))
5843         );
5844     }
5845 
5846     // This tests a bug fix where the nest limit checker wasn't decrementing
5847     // its depth during post-traversal, which causes long regexes to trip
5848     // the default limit too aggressively.
5849     #[test]
regression_454_nest_too_big()5850     fn regression_454_nest_too_big() {
5851         let pattern = r#"
5852         2(?:
5853           [45]\d{3}|
5854           7(?:
5855             1[0-267]|
5856             2[0-289]|
5857             3[0-29]|
5858             4[01]|
5859             5[1-3]|
5860             6[013]|
5861             7[0178]|
5862             91
5863           )|
5864           8(?:
5865             0[125]|
5866             [139][1-6]|
5867             2[0157-9]|
5868             41|
5869             6[1-35]|
5870             7[1-5]|
5871             8[1-8]|
5872             90
5873           )|
5874           9(?:
5875             0[0-2]|
5876             1[0-4]|
5877             2[568]|
5878             3[3-6]|
5879             5[5-7]|
5880             6[0167]|
5881             7[15]|
5882             8[0146-9]
5883           )
5884         )\d{4}
5885         "#;
5886         assert!(parser_nest_limit(pattern, 50).parse().is_ok());
5887     }
5888 
5889     // This tests that we treat a trailing `-` in a character class as a
5890     // literal `-` even when whitespace mode is enabled and there is whitespace
5891     // after the trailing `-`.
5892     #[test]
regression_455_trailing_dash_ignore_whitespace()5893     fn regression_455_trailing_dash_ignore_whitespace() {
5894         assert!(parser("(?x)[ / - ]").parse().is_ok());
5895         assert!(parser("(?x)[ a - ]").parse().is_ok());
5896         assert!(parser(
5897             "(?x)[
5898             a
5899             - ]
5900         "
5901         )
5902         .parse()
5903         .is_ok());
5904         assert!(parser(
5905             "(?x)[
5906             a # wat
5907             - ]
5908         "
5909         )
5910         .parse()
5911         .is_ok());
5912 
5913         assert!(parser("(?x)[ / -").parse().is_err());
5914         assert!(parser("(?x)[ / - ").parse().is_err());
5915         assert!(parser(
5916             "(?x)[
5917             / -
5918         "
5919         )
5920         .parse()
5921         .is_err());
5922         assert!(parser(
5923             "(?x)[
5924             / - # wat
5925         "
5926         )
5927         .parse()
5928         .is_err());
5929     }
5930 }
5931