xref: /aosp_15_r20/external/cronet/third_party/rust/chromium_crates_io/vendor/regex-1.10.4/src/regexset/bytes.rs (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 use alloc::string::String;
2 
3 use regex_automata::{meta, Input, PatternID, PatternSet, PatternSetIter};
4 
5 use crate::{bytes::RegexSetBuilder, Error};
6 
7 /// Match multiple, possibly overlapping, regexes in a single search.
8 ///
9 /// A regex set corresponds to the union of zero or more regular expressions.
10 /// That is, a regex set will match a haystack when at least one of its
11 /// constituent regexes matches. A regex set as its formulated here provides a
12 /// touch more power: it will also report *which* regular expressions in the
13 /// set match. Indeed, this is the key difference between regex sets and a
14 /// single `Regex` with many alternates, since only one alternate can match at
15 /// a time.
16 ///
17 /// For example, consider regular expressions to match email addresses and
18 /// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a
19 /// regex set is constructed from those regexes, then searching the haystack
20 /// `[email protected]` will report both regexes as matching. Of course, one
21 /// could accomplish this by compiling each regex on its own and doing two
22 /// searches over the haystack. The key advantage of using a regex set is
23 /// that it will report the matching regexes using a *single pass through the
24 /// haystack*. If one has hundreds or thousands of regexes to match repeatedly
25 /// (like a URL router for a complex web application or a user agent matcher),
26 /// then a regex set *can* realize huge performance gains.
27 ///
28 /// Unlike the top-level [`RegexSet`](crate::RegexSet), this `RegexSet`
29 /// searches haystacks with type `&[u8]` instead of `&str`. Consequently, this
30 /// `RegexSet` is permitted to match invalid UTF-8.
31 ///
32 /// # Limitations
33 ///
34 /// Regex sets are limited to answering the following two questions:
35 ///
36 /// 1. Does any regex in the set match?
37 /// 2. If so, which regexes in the set match?
38 ///
39 /// As with the main [`Regex`][crate::bytes::Regex] type, it is cheaper to ask
40 /// (1) instead of (2) since the matching engines can stop after the first
41 /// match is found.
42 ///
43 /// You cannot directly extract [`Match`][crate::bytes::Match] or
44 /// [`Captures`][crate::bytes::Captures] objects from a regex set. If you need
45 /// these operations, the recommended approach is to compile each pattern in
46 /// the set independently and scan the exact same haystack a second time with
47 /// those independently compiled patterns:
48 ///
49 /// ```
50 /// use regex::bytes::{Regex, RegexSet};
51 ///
52 /// let patterns = ["foo", "bar"];
53 /// // Both patterns will match different ranges of this string.
54 /// let hay = b"barfoo";
55 ///
56 /// // Compile a set matching any of our patterns.
57 /// let set = RegexSet::new(patterns).unwrap();
58 /// // Compile each pattern independently.
59 /// let regexes: Vec<_> = set
60 ///     .patterns()
61 ///     .iter()
62 ///     .map(|pat| Regex::new(pat).unwrap())
63 ///     .collect();
64 ///
65 /// // Match against the whole set first and identify the individual
66 /// // matching patterns.
67 /// let matches: Vec<&[u8]> = set
68 ///     .matches(hay)
69 ///     .into_iter()
70 ///     // Dereference the match index to get the corresponding
71 ///     // compiled pattern.
72 ///     .map(|index| &regexes[index])
73 ///     // To get match locations or any other info, we then have to search the
74 ///     // exact same haystack again, using our separately-compiled pattern.
75 ///     .map(|re| re.find(hay).unwrap().as_bytes())
76 ///     .collect();
77 ///
78 /// // Matches arrive in the order the constituent patterns were declared,
79 /// // not the order they appear in the haystack.
80 /// assert_eq!(vec![&b"foo"[..], &b"bar"[..]], matches);
81 /// ```
82 ///
83 /// # Performance
84 ///
85 /// A `RegexSet` has the same performance characteristics as `Regex`. Namely,
86 /// search takes `O(m * n)` time, where `m` is proportional to the size of the
87 /// regex set and `n` is proportional to the length of the haystack.
88 ///
89 /// # Trait implementations
90 ///
91 /// The `Default` trait is implemented for `RegexSet`. The default value
92 /// is an empty set. An empty set can also be explicitly constructed via
93 /// [`RegexSet::empty`].
94 ///
95 /// # Example
96 ///
97 /// This shows how the above two regexes (for matching email addresses and
98 /// domains) might work:
99 ///
100 /// ```
101 /// use regex::bytes::RegexSet;
102 ///
103 /// let set = RegexSet::new(&[
104 ///     r"[a-z]+@[a-z]+\.(com|org|net)",
105 ///     r"[a-z]+\.(com|org|net)",
106 /// ]).unwrap();
107 ///
108 /// // Ask whether any regexes in the set match.
109 /// assert!(set.is_match(b"[email protected]"));
110 ///
111 /// // Identify which regexes in the set match.
112 /// let matches: Vec<_> = set.matches(b"[email protected]").into_iter().collect();
113 /// assert_eq!(vec![0, 1], matches);
114 ///
115 /// // Try again, but with a haystack that only matches one of the regexes.
116 /// let matches: Vec<_> = set.matches(b"example.com").into_iter().collect();
117 /// assert_eq!(vec![1], matches);
118 ///
119 /// // Try again, but with a haystack that doesn't match any regex in the set.
120 /// let matches: Vec<_> = set.matches(b"example").into_iter().collect();
121 /// assert!(matches.is_empty());
122 /// ```
123 ///
124 /// Note that it would be possible to adapt the above example to using `Regex`
125 /// with an expression like:
126 ///
127 /// ```text
128 /// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net))
129 /// ```
130 ///
131 /// After a match, one could then inspect the capture groups to figure out
132 /// which alternates matched. The problem is that it is hard to make this
133 /// approach scale when there are many regexes since the overlap between each
134 /// alternate isn't always obvious to reason about.
135 #[derive(Clone)]
136 pub struct RegexSet {
137     pub(crate) meta: meta::Regex,
138     pub(crate) patterns: alloc::sync::Arc<[String]>,
139 }
140 
141 impl RegexSet {
142     /// Create a new regex set with the given regular expressions.
143     ///
144     /// This takes an iterator of `S`, where `S` is something that can produce
145     /// a `&str`. If any of the strings in the iterator are not valid regular
146     /// expressions, then an error is returned.
147     ///
148     /// # Example
149     ///
150     /// Create a new regex set from an iterator of strings:
151     ///
152     /// ```
153     /// use regex::bytes::RegexSet;
154     ///
155     /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap();
156     /// assert!(set.is_match(b"foo"));
157     /// ```
new<I, S>(exprs: I) -> Result<RegexSet, Error> where S: AsRef<str>, I: IntoIterator<Item = S>,158     pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error>
159     where
160         S: AsRef<str>,
161         I: IntoIterator<Item = S>,
162     {
163         RegexSetBuilder::new(exprs).build()
164     }
165 
166     /// Create a new empty regex set.
167     ///
168     /// An empty regex never matches anything.
169     ///
170     /// This is a convenience function for `RegexSet::new([])`, but doesn't
171     /// require one to specify the type of the input.
172     ///
173     /// # Example
174     ///
175     /// ```
176     /// use regex::bytes::RegexSet;
177     ///
178     /// let set = RegexSet::empty();
179     /// assert!(set.is_empty());
180     /// // an empty set matches nothing
181     /// assert!(!set.is_match(b""));
182     /// ```
empty() -> RegexSet183     pub fn empty() -> RegexSet {
184         let empty: [&str; 0] = [];
185         RegexSetBuilder::new(empty).build().unwrap()
186     }
187 
188     /// Returns true if and only if one of the regexes in this set matches
189     /// the haystack given.
190     ///
191     /// This method should be preferred if you only need to test whether any
192     /// of the regexes in the set should match, but don't care about *which*
193     /// regexes matched. This is because the underlying matching engine will
194     /// quit immediately after seeing the first match instead of continuing to
195     /// find all matches.
196     ///
197     /// Note that as with searches using [`Regex`](crate::bytes::Regex), the
198     /// expression is unanchored by default. That is, if the regex does not
199     /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted
200     /// to match anywhere in the haystack.
201     ///
202     /// # Example
203     ///
204     /// Tests whether a set matches somewhere in a haystack:
205     ///
206     /// ```
207     /// use regex::bytes::RegexSet;
208     ///
209     /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap();
210     /// assert!(set.is_match(b"foo"));
211     /// assert!(!set.is_match("☃".as_bytes()));
212     /// ```
213     #[inline]
is_match(&self, haystack: &[u8]) -> bool214     pub fn is_match(&self, haystack: &[u8]) -> bool {
215         self.is_match_at(haystack, 0)
216     }
217 
218     /// Returns true if and only if one of the regexes in this set matches the
219     /// haystack given, with the search starting at the offset given.
220     ///
221     /// The significance of the starting point is that it takes the surrounding
222     /// context into consideration. For example, the `\A` anchor can only
223     /// match when `start == 0`.
224     ///
225     /// # Panics
226     ///
227     /// This panics when `start >= haystack.len() + 1`.
228     ///
229     /// # Example
230     ///
231     /// This example shows the significance of `start`. Namely, consider a
232     /// haystack `foobar` and a desire to execute a search starting at offset
233     /// `3`. You could search a substring explicitly, but then the look-around
234     /// assertions won't work correctly. Instead, you can use this method to
235     /// specify the start position of a search.
236     ///
237     /// ```
238     /// use regex::bytes::RegexSet;
239     ///
240     /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap();
241     /// let hay = b"foobar";
242     /// // We get a match here, but it's probably not intended.
243     /// assert!(set.is_match(&hay[3..]));
244     /// // No match because the  assertions take the context into account.
245     /// assert!(!set.is_match_at(hay, 3));
246     /// ```
247     #[inline]
is_match_at(&self, haystack: &[u8], start: usize) -> bool248     pub fn is_match_at(&self, haystack: &[u8], start: usize) -> bool {
249         self.meta.is_match(Input::new(haystack).span(start..haystack.len()))
250     }
251 
252     /// Returns the set of regexes that match in the given haystack.
253     ///
254     /// The set returned contains the index of each regex that matches in
255     /// the given haystack. The index is in correspondence with the order of
256     /// regular expressions given to `RegexSet`'s constructor.
257     ///
258     /// The set can also be used to iterate over the matched indices. The order
259     /// of iteration is always ascending with respect to the matching indices.
260     ///
261     /// Note that as with searches using [`Regex`](crate::bytes::Regex), the
262     /// expression is unanchored by default. That is, if the regex does not
263     /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted
264     /// to match anywhere in the haystack.
265     ///
266     /// # Example
267     ///
268     /// Tests which regular expressions match the given haystack:
269     ///
270     /// ```
271     /// use regex::bytes::RegexSet;
272     ///
273     /// let set = RegexSet::new([
274     ///     r"\w+",
275     ///     r"\d+",
276     ///     r"\pL+",
277     ///     r"foo",
278     ///     r"bar",
279     ///     r"barfoo",
280     ///     r"foobar",
281     /// ]).unwrap();
282     /// let matches: Vec<_> = set.matches(b"foobar").into_iter().collect();
283     /// assert_eq!(matches, vec![0, 2, 3, 4, 6]);
284     ///
285     /// // You can also test whether a particular regex matched:
286     /// let matches = set.matches(b"foobar");
287     /// assert!(!matches.matched(5));
288     /// assert!(matches.matched(6));
289     /// ```
290     #[inline]
matches(&self, haystack: &[u8]) -> SetMatches291     pub fn matches(&self, haystack: &[u8]) -> SetMatches {
292         self.matches_at(haystack, 0)
293     }
294 
295     /// Returns the set of regexes that match in the given haystack.
296     ///
297     /// The set returned contains the index of each regex that matches in
298     /// the given haystack. The index is in correspondence with the order of
299     /// regular expressions given to `RegexSet`'s constructor.
300     ///
301     /// The set can also be used to iterate over the matched indices. The order
302     /// of iteration is always ascending with respect to the matching indices.
303     ///
304     /// The significance of the starting point is that it takes the surrounding
305     /// context into consideration. For example, the `\A` anchor can only
306     /// match when `start == 0`.
307     ///
308     /// # Panics
309     ///
310     /// This panics when `start >= haystack.len() + 1`.
311     ///
312     /// # Example
313     ///
314     /// Tests which regular expressions match the given haystack:
315     ///
316     /// ```
317     /// use regex::bytes::RegexSet;
318     ///
319     /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap();
320     /// let hay = b"foobar";
321     /// // We get matches here, but it's probably not intended.
322     /// let matches: Vec<_> = set.matches(&hay[3..]).into_iter().collect();
323     /// assert_eq!(matches, vec![0, 1]);
324     /// // No matches because the  assertions take the context into account.
325     /// let matches: Vec<_> = set.matches_at(hay, 3).into_iter().collect();
326     /// assert_eq!(matches, vec![]);
327     /// ```
328     #[inline]
matches_at(&self, haystack: &[u8], start: usize) -> SetMatches329     pub fn matches_at(&self, haystack: &[u8], start: usize) -> SetMatches {
330         let input = Input::new(haystack).span(start..haystack.len());
331         let mut patset = PatternSet::new(self.meta.pattern_len());
332         self.meta.which_overlapping_matches(&input, &mut patset);
333         SetMatches(patset)
334     }
335 
336     /// Returns the same as matches, but starts the search at the given
337     /// offset and stores the matches into the slice given.
338     ///
339     /// The significance of the starting point is that it takes the surrounding
340     /// context into consideration. For example, the `\A` anchor can only
341     /// match when `start == 0`.
342     ///
343     /// `matches` must have a length that is at least the number of regexes
344     /// in this set.
345     ///
346     /// This method returns true if and only if at least one member of
347     /// `matches` is true after executing the set against `haystack`.
348     #[doc(hidden)]
349     #[inline]
matches_read_at( &self, matches: &mut [bool], haystack: &[u8], start: usize, ) -> bool350     pub fn matches_read_at(
351         &self,
352         matches: &mut [bool],
353         haystack: &[u8],
354         start: usize,
355     ) -> bool {
356         // This is pretty dumb. We should try to fix this, but the
357         // regex-automata API doesn't provide a way to store matches in an
358         // arbitrary &mut [bool]. Thankfully, this API is is doc(hidden) and
359         // thus not public... But regex-capi currently uses it. We should
360         // fix regex-capi to use a PatternSet, maybe? Not sure... PatternSet
361         // is in regex-automata, not regex. So maybe we should just accept a
362         // 'SetMatches', which is basically just a newtype around PatternSet.
363         let mut patset = PatternSet::new(self.meta.pattern_len());
364         let mut input = Input::new(haystack);
365         input.set_start(start);
366         self.meta.which_overlapping_matches(&input, &mut patset);
367         for pid in patset.iter() {
368             matches[pid] = true;
369         }
370         !patset.is_empty()
371     }
372 
373     /// An alias for `matches_read_at` to preserve backward compatibility.
374     ///
375     /// The `regex-capi` crate used this method, so to avoid breaking that
376     /// crate, we continue to export it as an undocumented API.
377     #[doc(hidden)]
378     #[inline]
read_matches_at( &self, matches: &mut [bool], haystack: &[u8], start: usize, ) -> bool379     pub fn read_matches_at(
380         &self,
381         matches: &mut [bool],
382         haystack: &[u8],
383         start: usize,
384     ) -> bool {
385         self.matches_read_at(matches, haystack, start)
386     }
387 
388     /// Returns the total number of regexes in this set.
389     ///
390     /// # Example
391     ///
392     /// ```
393     /// use regex::bytes::RegexSet;
394     ///
395     /// assert_eq!(0, RegexSet::empty().len());
396     /// assert_eq!(1, RegexSet::new([r"[0-9]"]).unwrap().len());
397     /// assert_eq!(2, RegexSet::new([r"[0-9]", r"[a-z]"]).unwrap().len());
398     /// ```
399     #[inline]
len(&self) -> usize400     pub fn len(&self) -> usize {
401         self.meta.pattern_len()
402     }
403 
404     /// Returns `true` if this set contains no regexes.
405     ///
406     /// # Example
407     ///
408     /// ```
409     /// use regex::bytes::RegexSet;
410     ///
411     /// assert!(RegexSet::empty().is_empty());
412     /// assert!(!RegexSet::new([r"[0-9]"]).unwrap().is_empty());
413     /// ```
414     #[inline]
is_empty(&self) -> bool415     pub fn is_empty(&self) -> bool {
416         self.meta.pattern_len() == 0
417     }
418 
419     /// Returns the regex patterns that this regex set was constructed from.
420     ///
421     /// This function can be used to determine the pattern for a match. The
422     /// slice returned has exactly as many patterns givens to this regex set,
423     /// and the order of the slice is the same as the order of the patterns
424     /// provided to the set.
425     ///
426     /// # Example
427     ///
428     /// ```
429     /// use regex::bytes::RegexSet;
430     ///
431     /// let set = RegexSet::new(&[
432     ///     r"\w+",
433     ///     r"\d+",
434     ///     r"\pL+",
435     ///     r"foo",
436     ///     r"bar",
437     ///     r"barfoo",
438     ///     r"foobar",
439     /// ]).unwrap();
440     /// let matches: Vec<_> = set
441     ///     .matches(b"foobar")
442     ///     .into_iter()
443     ///     .map(|index| &set.patterns()[index])
444     ///     .collect();
445     /// assert_eq!(matches, vec![r"\w+", r"\pL+", r"foo", r"bar", r"foobar"]);
446     /// ```
447     #[inline]
patterns(&self) -> &[String]448     pub fn patterns(&self) -> &[String] {
449         &self.patterns
450     }
451 }
452 
453 impl Default for RegexSet {
default() -> Self454     fn default() -> Self {
455         RegexSet::empty()
456     }
457 }
458 
459 /// A set of matches returned by a regex set.
460 ///
461 /// Values of this type are constructed by [`RegexSet::matches`].
462 #[derive(Clone, Debug)]
463 pub struct SetMatches(PatternSet);
464 
465 impl SetMatches {
466     /// Whether this set contains any matches.
467     ///
468     /// # Example
469     ///
470     /// ```
471     /// use regex::bytes::RegexSet;
472     ///
473     /// let set = RegexSet::new(&[
474     ///     r"[a-z]+@[a-z]+\.(com|org|net)",
475     ///     r"[a-z]+\.(com|org|net)",
476     /// ]).unwrap();
477     /// let matches = set.matches(b"[email protected]");
478     /// assert!(matches.matched_any());
479     /// ```
480     #[inline]
matched_any(&self) -> bool481     pub fn matched_any(&self) -> bool {
482         !self.0.is_empty()
483     }
484 
485     /// Whether the regex at the given index matched.
486     ///
487     /// The index for a regex is determined by its insertion order upon the
488     /// initial construction of a `RegexSet`, starting at `0`.
489     ///
490     /// # Panics
491     ///
492     /// If `index` is greater than or equal to the number of regexes in the
493     /// original set that produced these matches. Equivalently, when `index`
494     /// is greater than or equal to [`SetMatches::len`].
495     ///
496     /// # Example
497     ///
498     /// ```
499     /// use regex::bytes::RegexSet;
500     ///
501     /// let set = RegexSet::new([
502     ///     r"[a-z]+@[a-z]+\.(com|org|net)",
503     ///     r"[a-z]+\.(com|org|net)",
504     /// ]).unwrap();
505     /// let matches = set.matches(b"example.com");
506     /// assert!(!matches.matched(0));
507     /// assert!(matches.matched(1));
508     /// ```
509     #[inline]
matched(&self, index: usize) -> bool510     pub fn matched(&self, index: usize) -> bool {
511         self.0.contains(PatternID::new_unchecked(index))
512     }
513 
514     /// The total number of regexes in the set that created these matches.
515     ///
516     /// **WARNING:** This always returns the same value as [`RegexSet::len`].
517     /// In particular, it does *not* return the number of elements yielded by
518     /// [`SetMatches::iter`]. The only way to determine the total number of
519     /// matched regexes is to iterate over them.
520     ///
521     /// # Example
522     ///
523     /// Notice that this method returns the total number of regexes in the
524     /// original set, and *not* the total number of regexes that matched.
525     ///
526     /// ```
527     /// use regex::bytes::RegexSet;
528     ///
529     /// let set = RegexSet::new([
530     ///     r"[a-z]+@[a-z]+\.(com|org|net)",
531     ///     r"[a-z]+\.(com|org|net)",
532     /// ]).unwrap();
533     /// let matches = set.matches(b"example.com");
534     /// // Total number of patterns that matched.
535     /// assert_eq!(1, matches.iter().count());
536     /// // Total number of patterns in the set.
537     /// assert_eq!(2, matches.len());
538     /// ```
539     #[inline]
len(&self) -> usize540     pub fn len(&self) -> usize {
541         self.0.capacity()
542     }
543 
544     /// Returns an iterator over the indices of the regexes that matched.
545     ///
546     /// This will always produces matches in ascending order, where the index
547     /// yielded corresponds to the index of the regex that matched with respect
548     /// to its position when initially building the set.
549     ///
550     /// # Example
551     ///
552     /// ```
553     /// use regex::bytes::RegexSet;
554     ///
555     /// let set = RegexSet::new([
556     ///     r"[0-9]",
557     ///     r"[a-z]",
558     ///     r"[A-Z]",
559     ///     r"\p{Greek}",
560     /// ]).unwrap();
561     /// let hay = "βa1".as_bytes();
562     /// let matches: Vec<_> = set.matches(hay).iter().collect();
563     /// assert_eq!(matches, vec![0, 1, 3]);
564     /// ```
565     ///
566     /// Note that `SetMatches` also implemnets the `IntoIterator` trait, so
567     /// this method is not always needed. For example:
568     ///
569     /// ```
570     /// use regex::bytes::RegexSet;
571     ///
572     /// let set = RegexSet::new([
573     ///     r"[0-9]",
574     ///     r"[a-z]",
575     ///     r"[A-Z]",
576     ///     r"\p{Greek}",
577     /// ]).unwrap();
578     /// let hay = "βa1".as_bytes();
579     /// let mut matches = vec![];
580     /// for index in set.matches(hay) {
581     ///     matches.push(index);
582     /// }
583     /// assert_eq!(matches, vec![0, 1, 3]);
584     /// ```
585     #[inline]
iter(&self) -> SetMatchesIter<'_>586     pub fn iter(&self) -> SetMatchesIter<'_> {
587         SetMatchesIter(self.0.iter())
588     }
589 }
590 
591 impl IntoIterator for SetMatches {
592     type IntoIter = SetMatchesIntoIter;
593     type Item = usize;
594 
into_iter(self) -> Self::IntoIter595     fn into_iter(self) -> Self::IntoIter {
596         let it = 0..self.0.capacity();
597         SetMatchesIntoIter { patset: self.0, it }
598     }
599 }
600 
601 impl<'a> IntoIterator for &'a SetMatches {
602     type IntoIter = SetMatchesIter<'a>;
603     type Item = usize;
604 
into_iter(self) -> Self::IntoIter605     fn into_iter(self) -> Self::IntoIter {
606         self.iter()
607     }
608 }
609 
610 /// An owned iterator over the set of matches from a regex set.
611 ///
612 /// This will always produces matches in ascending order of index, where the
613 /// index corresponds to the index of the regex that matched with respect to
614 /// its position when initially building the set.
615 ///
616 /// This iterator is created by calling `SetMatches::into_iter` via the
617 /// `IntoIterator` trait. This is automatically done in `for` loops.
618 ///
619 /// # Example
620 ///
621 /// ```
622 /// use regex::bytes::RegexSet;
623 ///
624 /// let set = RegexSet::new([
625 ///     r"[0-9]",
626 ///     r"[a-z]",
627 ///     r"[A-Z]",
628 ///     r"\p{Greek}",
629 /// ]).unwrap();
630 /// let hay = "βa1".as_bytes();
631 /// let mut matches = vec![];
632 /// for index in set.matches(hay) {
633 ///     matches.push(index);
634 /// }
635 /// assert_eq!(matches, vec![0, 1, 3]);
636 /// ```
637 #[derive(Debug)]
638 pub struct SetMatchesIntoIter {
639     patset: PatternSet,
640     it: core::ops::Range<usize>,
641 }
642 
643 impl Iterator for SetMatchesIntoIter {
644     type Item = usize;
645 
next(&mut self) -> Option<usize>646     fn next(&mut self) -> Option<usize> {
647         loop {
648             let id = self.it.next()?;
649             if self.patset.contains(PatternID::new_unchecked(id)) {
650                 return Some(id);
651             }
652         }
653     }
654 
size_hint(&self) -> (usize, Option<usize>)655     fn size_hint(&self) -> (usize, Option<usize>) {
656         self.it.size_hint()
657     }
658 }
659 
660 impl DoubleEndedIterator for SetMatchesIntoIter {
next_back(&mut self) -> Option<usize>661     fn next_back(&mut self) -> Option<usize> {
662         loop {
663             let id = self.it.next_back()?;
664             if self.patset.contains(PatternID::new_unchecked(id)) {
665                 return Some(id);
666             }
667         }
668     }
669 }
670 
671 impl core::iter::FusedIterator for SetMatchesIntoIter {}
672 
673 /// A borrowed iterator over the set of matches from a regex set.
674 ///
675 /// The lifetime `'a` refers to the lifetime of the [`SetMatches`] value that
676 /// created this iterator.
677 ///
678 /// This will always produces matches in ascending order, where the index
679 /// corresponds to the index of the regex that matched with respect to its
680 /// position when initially building the set.
681 ///
682 /// This iterator is created by the [`SetMatches::iter`] method.
683 #[derive(Clone, Debug)]
684 pub struct SetMatchesIter<'a>(PatternSetIter<'a>);
685 
686 impl<'a> Iterator for SetMatchesIter<'a> {
687     type Item = usize;
688 
next(&mut self) -> Option<usize>689     fn next(&mut self) -> Option<usize> {
690         self.0.next().map(|pid| pid.as_usize())
691     }
692 
size_hint(&self) -> (usize, Option<usize>)693     fn size_hint(&self) -> (usize, Option<usize>) {
694         self.0.size_hint()
695     }
696 }
697 
698 impl<'a> DoubleEndedIterator for SetMatchesIter<'a> {
next_back(&mut self) -> Option<usize>699     fn next_back(&mut self) -> Option<usize> {
700         self.0.next_back().map(|pid| pid.as_usize())
701     }
702 }
703 
704 impl<'a> core::iter::FusedIterator for SetMatchesIter<'a> {}
705 
706 impl core::fmt::Debug for RegexSet {
fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result707     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
708         write!(f, "RegexSet({:?})", self.patterns())
709     }
710 }
711