1 use alloc::string::String; 2 3 use regex_automata::{meta, Input, PatternID, PatternSet, PatternSetIter}; 4 5 use crate::{bytes::RegexSetBuilder, Error}; 6 7 /// Match multiple, possibly overlapping, regexes in a single search. 8 /// 9 /// A regex set corresponds to the union of zero or more regular expressions. 10 /// That is, a regex set will match a haystack when at least one of its 11 /// constituent regexes matches. A regex set as its formulated here provides a 12 /// touch more power: it will also report *which* regular expressions in the 13 /// set match. Indeed, this is the key difference between regex sets and a 14 /// single `Regex` with many alternates, since only one alternate can match at 15 /// a time. 16 /// 17 /// For example, consider regular expressions to match email addresses and 18 /// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a 19 /// regex set is constructed from those regexes, then searching the haystack 20 /// `[email protected]` will report both regexes as matching. Of course, one 21 /// could accomplish this by compiling each regex on its own and doing two 22 /// searches over the haystack. The key advantage of using a regex set is 23 /// that it will report the matching regexes using a *single pass through the 24 /// haystack*. If one has hundreds or thousands of regexes to match repeatedly 25 /// (like a URL router for a complex web application or a user agent matcher), 26 /// then a regex set *can* realize huge performance gains. 27 /// 28 /// Unlike the top-level [`RegexSet`](crate::RegexSet), this `RegexSet` 29 /// searches haystacks with type `&[u8]` instead of `&str`. Consequently, this 30 /// `RegexSet` is permitted to match invalid UTF-8. 31 /// 32 /// # Limitations 33 /// 34 /// Regex sets are limited to answering the following two questions: 35 /// 36 /// 1. Does any regex in the set match? 37 /// 2. If so, which regexes in the set match? 38 /// 39 /// As with the main [`Regex`][crate::bytes::Regex] type, it is cheaper to ask 40 /// (1) instead of (2) since the matching engines can stop after the first 41 /// match is found. 42 /// 43 /// You cannot directly extract [`Match`][crate::bytes::Match] or 44 /// [`Captures`][crate::bytes::Captures] objects from a regex set. If you need 45 /// these operations, the recommended approach is to compile each pattern in 46 /// the set independently and scan the exact same haystack a second time with 47 /// those independently compiled patterns: 48 /// 49 /// ``` 50 /// use regex::bytes::{Regex, RegexSet}; 51 /// 52 /// let patterns = ["foo", "bar"]; 53 /// // Both patterns will match different ranges of this string. 54 /// let hay = b"barfoo"; 55 /// 56 /// // Compile a set matching any of our patterns. 57 /// let set = RegexSet::new(patterns).unwrap(); 58 /// // Compile each pattern independently. 59 /// let regexes: Vec<_> = set 60 /// .patterns() 61 /// .iter() 62 /// .map(|pat| Regex::new(pat).unwrap()) 63 /// .collect(); 64 /// 65 /// // Match against the whole set first and identify the individual 66 /// // matching patterns. 67 /// let matches: Vec<&[u8]> = set 68 /// .matches(hay) 69 /// .into_iter() 70 /// // Dereference the match index to get the corresponding 71 /// // compiled pattern. 72 /// .map(|index| ®exes[index]) 73 /// // To get match locations or any other info, we then have to search the 74 /// // exact same haystack again, using our separately-compiled pattern. 75 /// .map(|re| re.find(hay).unwrap().as_bytes()) 76 /// .collect(); 77 /// 78 /// // Matches arrive in the order the constituent patterns were declared, 79 /// // not the order they appear in the haystack. 80 /// assert_eq!(vec![&b"foo"[..], &b"bar"[..]], matches); 81 /// ``` 82 /// 83 /// # Performance 84 /// 85 /// A `RegexSet` has the same performance characteristics as `Regex`. Namely, 86 /// search takes `O(m * n)` time, where `m` is proportional to the size of the 87 /// regex set and `n` is proportional to the length of the haystack. 88 /// 89 /// # Trait implementations 90 /// 91 /// The `Default` trait is implemented for `RegexSet`. The default value 92 /// is an empty set. An empty set can also be explicitly constructed via 93 /// [`RegexSet::empty`]. 94 /// 95 /// # Example 96 /// 97 /// This shows how the above two regexes (for matching email addresses and 98 /// domains) might work: 99 /// 100 /// ``` 101 /// use regex::bytes::RegexSet; 102 /// 103 /// let set = RegexSet::new(&[ 104 /// r"[a-z]+@[a-z]+\.(com|org|net)", 105 /// r"[a-z]+\.(com|org|net)", 106 /// ]).unwrap(); 107 /// 108 /// // Ask whether any regexes in the set match. 109 /// assert!(set.is_match(b"[email protected]")); 110 /// 111 /// // Identify which regexes in the set match. 112 /// let matches: Vec<_> = set.matches(b"[email protected]").into_iter().collect(); 113 /// assert_eq!(vec![0, 1], matches); 114 /// 115 /// // Try again, but with a haystack that only matches one of the regexes. 116 /// let matches: Vec<_> = set.matches(b"example.com").into_iter().collect(); 117 /// assert_eq!(vec![1], matches); 118 /// 119 /// // Try again, but with a haystack that doesn't match any regex in the set. 120 /// let matches: Vec<_> = set.matches(b"example").into_iter().collect(); 121 /// assert!(matches.is_empty()); 122 /// ``` 123 /// 124 /// Note that it would be possible to adapt the above example to using `Regex` 125 /// with an expression like: 126 /// 127 /// ```text 128 /// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net)) 129 /// ``` 130 /// 131 /// After a match, one could then inspect the capture groups to figure out 132 /// which alternates matched. The problem is that it is hard to make this 133 /// approach scale when there are many regexes since the overlap between each 134 /// alternate isn't always obvious to reason about. 135 #[derive(Clone)] 136 pub struct RegexSet { 137 pub(crate) meta: meta::Regex, 138 pub(crate) patterns: alloc::sync::Arc<[String]>, 139 } 140 141 impl RegexSet { 142 /// Create a new regex set with the given regular expressions. 143 /// 144 /// This takes an iterator of `S`, where `S` is something that can produce 145 /// a `&str`. If any of the strings in the iterator are not valid regular 146 /// expressions, then an error is returned. 147 /// 148 /// # Example 149 /// 150 /// Create a new regex set from an iterator of strings: 151 /// 152 /// ``` 153 /// use regex::bytes::RegexSet; 154 /// 155 /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap(); 156 /// assert!(set.is_match(b"foo")); 157 /// ``` new<I, S>(exprs: I) -> Result<RegexSet, Error> where S: AsRef<str>, I: IntoIterator<Item = S>,158 pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error> 159 where 160 S: AsRef<str>, 161 I: IntoIterator<Item = S>, 162 { 163 RegexSetBuilder::new(exprs).build() 164 } 165 166 /// Create a new empty regex set. 167 /// 168 /// An empty regex never matches anything. 169 /// 170 /// This is a convenience function for `RegexSet::new([])`, but doesn't 171 /// require one to specify the type of the input. 172 /// 173 /// # Example 174 /// 175 /// ``` 176 /// use regex::bytes::RegexSet; 177 /// 178 /// let set = RegexSet::empty(); 179 /// assert!(set.is_empty()); 180 /// // an empty set matches nothing 181 /// assert!(!set.is_match(b"")); 182 /// ``` empty() -> RegexSet183 pub fn empty() -> RegexSet { 184 let empty: [&str; 0] = []; 185 RegexSetBuilder::new(empty).build().unwrap() 186 } 187 188 /// Returns true if and only if one of the regexes in this set matches 189 /// the haystack given. 190 /// 191 /// This method should be preferred if you only need to test whether any 192 /// of the regexes in the set should match, but don't care about *which* 193 /// regexes matched. This is because the underlying matching engine will 194 /// quit immediately after seeing the first match instead of continuing to 195 /// find all matches. 196 /// 197 /// Note that as with searches using [`Regex`](crate::bytes::Regex), the 198 /// expression is unanchored by default. That is, if the regex does not 199 /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted 200 /// to match anywhere in the haystack. 201 /// 202 /// # Example 203 /// 204 /// Tests whether a set matches somewhere in a haystack: 205 /// 206 /// ``` 207 /// use regex::bytes::RegexSet; 208 /// 209 /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap(); 210 /// assert!(set.is_match(b"foo")); 211 /// assert!(!set.is_match("☃".as_bytes())); 212 /// ``` 213 #[inline] is_match(&self, haystack: &[u8]) -> bool214 pub fn is_match(&self, haystack: &[u8]) -> bool { 215 self.is_match_at(haystack, 0) 216 } 217 218 /// Returns true if and only if one of the regexes in this set matches the 219 /// haystack given, with the search starting at the offset given. 220 /// 221 /// The significance of the starting point is that it takes the surrounding 222 /// context into consideration. For example, the `\A` anchor can only 223 /// match when `start == 0`. 224 /// 225 /// # Panics 226 /// 227 /// This panics when `start >= haystack.len() + 1`. 228 /// 229 /// # Example 230 /// 231 /// This example shows the significance of `start`. Namely, consider a 232 /// haystack `foobar` and a desire to execute a search starting at offset 233 /// `3`. You could search a substring explicitly, but then the look-around 234 /// assertions won't work correctly. Instead, you can use this method to 235 /// specify the start position of a search. 236 /// 237 /// ``` 238 /// use regex::bytes::RegexSet; 239 /// 240 /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap(); 241 /// let hay = b"foobar"; 242 /// // We get a match here, but it's probably not intended. 243 /// assert!(set.is_match(&hay[3..])); 244 /// // No match because the assertions take the context into account. 245 /// assert!(!set.is_match_at(hay, 3)); 246 /// ``` 247 #[inline] is_match_at(&self, haystack: &[u8], start: usize) -> bool248 pub fn is_match_at(&self, haystack: &[u8], start: usize) -> bool { 249 self.meta.is_match(Input::new(haystack).span(start..haystack.len())) 250 } 251 252 /// Returns the set of regexes that match in the given haystack. 253 /// 254 /// The set returned contains the index of each regex that matches in 255 /// the given haystack. The index is in correspondence with the order of 256 /// regular expressions given to `RegexSet`'s constructor. 257 /// 258 /// The set can also be used to iterate over the matched indices. The order 259 /// of iteration is always ascending with respect to the matching indices. 260 /// 261 /// Note that as with searches using [`Regex`](crate::bytes::Regex), the 262 /// expression is unanchored by default. That is, if the regex does not 263 /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted 264 /// to match anywhere in the haystack. 265 /// 266 /// # Example 267 /// 268 /// Tests which regular expressions match the given haystack: 269 /// 270 /// ``` 271 /// use regex::bytes::RegexSet; 272 /// 273 /// let set = RegexSet::new([ 274 /// r"\w+", 275 /// r"\d+", 276 /// r"\pL+", 277 /// r"foo", 278 /// r"bar", 279 /// r"barfoo", 280 /// r"foobar", 281 /// ]).unwrap(); 282 /// let matches: Vec<_> = set.matches(b"foobar").into_iter().collect(); 283 /// assert_eq!(matches, vec![0, 2, 3, 4, 6]); 284 /// 285 /// // You can also test whether a particular regex matched: 286 /// let matches = set.matches(b"foobar"); 287 /// assert!(!matches.matched(5)); 288 /// assert!(matches.matched(6)); 289 /// ``` 290 #[inline] matches(&self, haystack: &[u8]) -> SetMatches291 pub fn matches(&self, haystack: &[u8]) -> SetMatches { 292 self.matches_at(haystack, 0) 293 } 294 295 /// Returns the set of regexes that match in the given haystack. 296 /// 297 /// The set returned contains the index of each regex that matches in 298 /// the given haystack. The index is in correspondence with the order of 299 /// regular expressions given to `RegexSet`'s constructor. 300 /// 301 /// The set can also be used to iterate over the matched indices. The order 302 /// of iteration is always ascending with respect to the matching indices. 303 /// 304 /// The significance of the starting point is that it takes the surrounding 305 /// context into consideration. For example, the `\A` anchor can only 306 /// match when `start == 0`. 307 /// 308 /// # Panics 309 /// 310 /// This panics when `start >= haystack.len() + 1`. 311 /// 312 /// # Example 313 /// 314 /// Tests which regular expressions match the given haystack: 315 /// 316 /// ``` 317 /// use regex::bytes::RegexSet; 318 /// 319 /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap(); 320 /// let hay = b"foobar"; 321 /// // We get matches here, but it's probably not intended. 322 /// let matches: Vec<_> = set.matches(&hay[3..]).into_iter().collect(); 323 /// assert_eq!(matches, vec![0, 1]); 324 /// // No matches because the assertions take the context into account. 325 /// let matches: Vec<_> = set.matches_at(hay, 3).into_iter().collect(); 326 /// assert_eq!(matches, vec![]); 327 /// ``` 328 #[inline] matches_at(&self, haystack: &[u8], start: usize) -> SetMatches329 pub fn matches_at(&self, haystack: &[u8], start: usize) -> SetMatches { 330 let input = Input::new(haystack).span(start..haystack.len()); 331 let mut patset = PatternSet::new(self.meta.pattern_len()); 332 self.meta.which_overlapping_matches(&input, &mut patset); 333 SetMatches(patset) 334 } 335 336 /// Returns the same as matches, but starts the search at the given 337 /// offset and stores the matches into the slice given. 338 /// 339 /// The significance of the starting point is that it takes the surrounding 340 /// context into consideration. For example, the `\A` anchor can only 341 /// match when `start == 0`. 342 /// 343 /// `matches` must have a length that is at least the number of regexes 344 /// in this set. 345 /// 346 /// This method returns true if and only if at least one member of 347 /// `matches` is true after executing the set against `haystack`. 348 #[doc(hidden)] 349 #[inline] matches_read_at( &self, matches: &mut [bool], haystack: &[u8], start: usize, ) -> bool350 pub fn matches_read_at( 351 &self, 352 matches: &mut [bool], 353 haystack: &[u8], 354 start: usize, 355 ) -> bool { 356 // This is pretty dumb. We should try to fix this, but the 357 // regex-automata API doesn't provide a way to store matches in an 358 // arbitrary &mut [bool]. Thankfully, this API is is doc(hidden) and 359 // thus not public... But regex-capi currently uses it. We should 360 // fix regex-capi to use a PatternSet, maybe? Not sure... PatternSet 361 // is in regex-automata, not regex. So maybe we should just accept a 362 // 'SetMatches', which is basically just a newtype around PatternSet. 363 let mut patset = PatternSet::new(self.meta.pattern_len()); 364 let mut input = Input::new(haystack); 365 input.set_start(start); 366 self.meta.which_overlapping_matches(&input, &mut patset); 367 for pid in patset.iter() { 368 matches[pid] = true; 369 } 370 !patset.is_empty() 371 } 372 373 /// An alias for `matches_read_at` to preserve backward compatibility. 374 /// 375 /// The `regex-capi` crate used this method, so to avoid breaking that 376 /// crate, we continue to export it as an undocumented API. 377 #[doc(hidden)] 378 #[inline] read_matches_at( &self, matches: &mut [bool], haystack: &[u8], start: usize, ) -> bool379 pub fn read_matches_at( 380 &self, 381 matches: &mut [bool], 382 haystack: &[u8], 383 start: usize, 384 ) -> bool { 385 self.matches_read_at(matches, haystack, start) 386 } 387 388 /// Returns the total number of regexes in this set. 389 /// 390 /// # Example 391 /// 392 /// ``` 393 /// use regex::bytes::RegexSet; 394 /// 395 /// assert_eq!(0, RegexSet::empty().len()); 396 /// assert_eq!(1, RegexSet::new([r"[0-9]"]).unwrap().len()); 397 /// assert_eq!(2, RegexSet::new([r"[0-9]", r"[a-z]"]).unwrap().len()); 398 /// ``` 399 #[inline] len(&self) -> usize400 pub fn len(&self) -> usize { 401 self.meta.pattern_len() 402 } 403 404 /// Returns `true` if this set contains no regexes. 405 /// 406 /// # Example 407 /// 408 /// ``` 409 /// use regex::bytes::RegexSet; 410 /// 411 /// assert!(RegexSet::empty().is_empty()); 412 /// assert!(!RegexSet::new([r"[0-9]"]).unwrap().is_empty()); 413 /// ``` 414 #[inline] is_empty(&self) -> bool415 pub fn is_empty(&self) -> bool { 416 self.meta.pattern_len() == 0 417 } 418 419 /// Returns the regex patterns that this regex set was constructed from. 420 /// 421 /// This function can be used to determine the pattern for a match. The 422 /// slice returned has exactly as many patterns givens to this regex set, 423 /// and the order of the slice is the same as the order of the patterns 424 /// provided to the set. 425 /// 426 /// # Example 427 /// 428 /// ``` 429 /// use regex::bytes::RegexSet; 430 /// 431 /// let set = RegexSet::new(&[ 432 /// r"\w+", 433 /// r"\d+", 434 /// r"\pL+", 435 /// r"foo", 436 /// r"bar", 437 /// r"barfoo", 438 /// r"foobar", 439 /// ]).unwrap(); 440 /// let matches: Vec<_> = set 441 /// .matches(b"foobar") 442 /// .into_iter() 443 /// .map(|index| &set.patterns()[index]) 444 /// .collect(); 445 /// assert_eq!(matches, vec![r"\w+", r"\pL+", r"foo", r"bar", r"foobar"]); 446 /// ``` 447 #[inline] patterns(&self) -> &[String]448 pub fn patterns(&self) -> &[String] { 449 &self.patterns 450 } 451 } 452 453 impl Default for RegexSet { default() -> Self454 fn default() -> Self { 455 RegexSet::empty() 456 } 457 } 458 459 /// A set of matches returned by a regex set. 460 /// 461 /// Values of this type are constructed by [`RegexSet::matches`]. 462 #[derive(Clone, Debug)] 463 pub struct SetMatches(PatternSet); 464 465 impl SetMatches { 466 /// Whether this set contains any matches. 467 /// 468 /// # Example 469 /// 470 /// ``` 471 /// use regex::bytes::RegexSet; 472 /// 473 /// let set = RegexSet::new(&[ 474 /// r"[a-z]+@[a-z]+\.(com|org|net)", 475 /// r"[a-z]+\.(com|org|net)", 476 /// ]).unwrap(); 477 /// let matches = set.matches(b"[email protected]"); 478 /// assert!(matches.matched_any()); 479 /// ``` 480 #[inline] matched_any(&self) -> bool481 pub fn matched_any(&self) -> bool { 482 !self.0.is_empty() 483 } 484 485 /// Whether the regex at the given index matched. 486 /// 487 /// The index for a regex is determined by its insertion order upon the 488 /// initial construction of a `RegexSet`, starting at `0`. 489 /// 490 /// # Panics 491 /// 492 /// If `index` is greater than or equal to the number of regexes in the 493 /// original set that produced these matches. Equivalently, when `index` 494 /// is greater than or equal to [`SetMatches::len`]. 495 /// 496 /// # Example 497 /// 498 /// ``` 499 /// use regex::bytes::RegexSet; 500 /// 501 /// let set = RegexSet::new([ 502 /// r"[a-z]+@[a-z]+\.(com|org|net)", 503 /// r"[a-z]+\.(com|org|net)", 504 /// ]).unwrap(); 505 /// let matches = set.matches(b"example.com"); 506 /// assert!(!matches.matched(0)); 507 /// assert!(matches.matched(1)); 508 /// ``` 509 #[inline] matched(&self, index: usize) -> bool510 pub fn matched(&self, index: usize) -> bool { 511 self.0.contains(PatternID::new_unchecked(index)) 512 } 513 514 /// The total number of regexes in the set that created these matches. 515 /// 516 /// **WARNING:** This always returns the same value as [`RegexSet::len`]. 517 /// In particular, it does *not* return the number of elements yielded by 518 /// [`SetMatches::iter`]. The only way to determine the total number of 519 /// matched regexes is to iterate over them. 520 /// 521 /// # Example 522 /// 523 /// Notice that this method returns the total number of regexes in the 524 /// original set, and *not* the total number of regexes that matched. 525 /// 526 /// ``` 527 /// use regex::bytes::RegexSet; 528 /// 529 /// let set = RegexSet::new([ 530 /// r"[a-z]+@[a-z]+\.(com|org|net)", 531 /// r"[a-z]+\.(com|org|net)", 532 /// ]).unwrap(); 533 /// let matches = set.matches(b"example.com"); 534 /// // Total number of patterns that matched. 535 /// assert_eq!(1, matches.iter().count()); 536 /// // Total number of patterns in the set. 537 /// assert_eq!(2, matches.len()); 538 /// ``` 539 #[inline] len(&self) -> usize540 pub fn len(&self) -> usize { 541 self.0.capacity() 542 } 543 544 /// Returns an iterator over the indices of the regexes that matched. 545 /// 546 /// This will always produces matches in ascending order, where the index 547 /// yielded corresponds to the index of the regex that matched with respect 548 /// to its position when initially building the set. 549 /// 550 /// # Example 551 /// 552 /// ``` 553 /// use regex::bytes::RegexSet; 554 /// 555 /// let set = RegexSet::new([ 556 /// r"[0-9]", 557 /// r"[a-z]", 558 /// r"[A-Z]", 559 /// r"\p{Greek}", 560 /// ]).unwrap(); 561 /// let hay = "βa1".as_bytes(); 562 /// let matches: Vec<_> = set.matches(hay).iter().collect(); 563 /// assert_eq!(matches, vec![0, 1, 3]); 564 /// ``` 565 /// 566 /// Note that `SetMatches` also implemnets the `IntoIterator` trait, so 567 /// this method is not always needed. For example: 568 /// 569 /// ``` 570 /// use regex::bytes::RegexSet; 571 /// 572 /// let set = RegexSet::new([ 573 /// r"[0-9]", 574 /// r"[a-z]", 575 /// r"[A-Z]", 576 /// r"\p{Greek}", 577 /// ]).unwrap(); 578 /// let hay = "βa1".as_bytes(); 579 /// let mut matches = vec![]; 580 /// for index in set.matches(hay) { 581 /// matches.push(index); 582 /// } 583 /// assert_eq!(matches, vec![0, 1, 3]); 584 /// ``` 585 #[inline] iter(&self) -> SetMatchesIter<'_>586 pub fn iter(&self) -> SetMatchesIter<'_> { 587 SetMatchesIter(self.0.iter()) 588 } 589 } 590 591 impl IntoIterator for SetMatches { 592 type IntoIter = SetMatchesIntoIter; 593 type Item = usize; 594 into_iter(self) -> Self::IntoIter595 fn into_iter(self) -> Self::IntoIter { 596 let it = 0..self.0.capacity(); 597 SetMatchesIntoIter { patset: self.0, it } 598 } 599 } 600 601 impl<'a> IntoIterator for &'a SetMatches { 602 type IntoIter = SetMatchesIter<'a>; 603 type Item = usize; 604 into_iter(self) -> Self::IntoIter605 fn into_iter(self) -> Self::IntoIter { 606 self.iter() 607 } 608 } 609 610 /// An owned iterator over the set of matches from a regex set. 611 /// 612 /// This will always produces matches in ascending order of index, where the 613 /// index corresponds to the index of the regex that matched with respect to 614 /// its position when initially building the set. 615 /// 616 /// This iterator is created by calling `SetMatches::into_iter` via the 617 /// `IntoIterator` trait. This is automatically done in `for` loops. 618 /// 619 /// # Example 620 /// 621 /// ``` 622 /// use regex::bytes::RegexSet; 623 /// 624 /// let set = RegexSet::new([ 625 /// r"[0-9]", 626 /// r"[a-z]", 627 /// r"[A-Z]", 628 /// r"\p{Greek}", 629 /// ]).unwrap(); 630 /// let hay = "βa1".as_bytes(); 631 /// let mut matches = vec![]; 632 /// for index in set.matches(hay) { 633 /// matches.push(index); 634 /// } 635 /// assert_eq!(matches, vec![0, 1, 3]); 636 /// ``` 637 #[derive(Debug)] 638 pub struct SetMatchesIntoIter { 639 patset: PatternSet, 640 it: core::ops::Range<usize>, 641 } 642 643 impl Iterator for SetMatchesIntoIter { 644 type Item = usize; 645 next(&mut self) -> Option<usize>646 fn next(&mut self) -> Option<usize> { 647 loop { 648 let id = self.it.next()?; 649 if self.patset.contains(PatternID::new_unchecked(id)) { 650 return Some(id); 651 } 652 } 653 } 654 size_hint(&self) -> (usize, Option<usize>)655 fn size_hint(&self) -> (usize, Option<usize>) { 656 self.it.size_hint() 657 } 658 } 659 660 impl DoubleEndedIterator for SetMatchesIntoIter { next_back(&mut self) -> Option<usize>661 fn next_back(&mut self) -> Option<usize> { 662 loop { 663 let id = self.it.next_back()?; 664 if self.patset.contains(PatternID::new_unchecked(id)) { 665 return Some(id); 666 } 667 } 668 } 669 } 670 671 impl core::iter::FusedIterator for SetMatchesIntoIter {} 672 673 /// A borrowed iterator over the set of matches from a regex set. 674 /// 675 /// The lifetime `'a` refers to the lifetime of the [`SetMatches`] value that 676 /// created this iterator. 677 /// 678 /// This will always produces matches in ascending order, where the index 679 /// corresponds to the index of the regex that matched with respect to its 680 /// position when initially building the set. 681 /// 682 /// This iterator is created by the [`SetMatches::iter`] method. 683 #[derive(Clone, Debug)] 684 pub struct SetMatchesIter<'a>(PatternSetIter<'a>); 685 686 impl<'a> Iterator for SetMatchesIter<'a> { 687 type Item = usize; 688 next(&mut self) -> Option<usize>689 fn next(&mut self) -> Option<usize> { 690 self.0.next().map(|pid| pid.as_usize()) 691 } 692 size_hint(&self) -> (usize, Option<usize>)693 fn size_hint(&self) -> (usize, Option<usize>) { 694 self.0.size_hint() 695 } 696 } 697 698 impl<'a> DoubleEndedIterator for SetMatchesIter<'a> { next_back(&mut self) -> Option<usize>699 fn next_back(&mut self) -> Option<usize> { 700 self.0.next_back().map(|pid| pid.as_usize()) 701 } 702 } 703 704 impl<'a> core::iter::FusedIterator for SetMatchesIter<'a> {} 705 706 impl core::fmt::Debug for RegexSet { fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result707 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 708 write!(f, "RegexSet({:?})", self.patterns()) 709 } 710 } 711