xref: /aosp_15_r20/external/cronet/third_party/rust/chromium_crates_io/vendor/regex-1.10.4/src/regex/string.rs (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 use alloc::{borrow::Cow, string::String, sync::Arc};
2 
3 use regex_automata::{meta, util::captures, Input, PatternID};
4 
5 use crate::{error::Error, RegexBuilder};
6 
7 /// A compiled regular expression for searching Unicode haystacks.
8 ///
9 /// A `Regex` can be used to search haystacks, split haystacks into substrings
10 /// or replace substrings in a haystack with a different substring. All
11 /// searching is done with an implicit `(?s:.)*?` at the beginning and end of
12 /// an pattern. To force an expression to match the whole string (or a prefix
13 /// or a suffix), you must use an anchor like `^` or `$` (or `\A` and `\z`).
14 ///
15 /// While this crate will handle Unicode strings (whether in the regular
16 /// expression or in the haystack), all positions returned are **byte
17 /// offsets**. Every byte offset is guaranteed to be at a Unicode code point
18 /// boundary. That is, all offsets returned by the `Regex` API are guaranteed
19 /// to be ranges that can slice a `&str` without panicking. If you want to
20 /// relax this requirement, then you must search `&[u8]` haystacks with a
21 /// [`bytes::Regex`](crate::bytes::Regex).
22 ///
23 /// The only methods that allocate new strings are the string replacement
24 /// methods. All other methods (searching and splitting) return borrowed
25 /// references into the haystack given.
26 ///
27 /// # Example
28 ///
29 /// Find the offsets of a US phone number:
30 ///
31 /// ```
32 /// use regex::Regex;
33 ///
34 /// let re = Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}").unwrap();
35 /// let m = re.find("phone: 111-222-3333").unwrap();
36 /// assert_eq!(7..19, m.range());
37 /// ```
38 ///
39 /// # Example: extracting capture groups
40 ///
41 /// A common way to use regexes is with capture groups. That is, instead of
42 /// just looking for matches of an entire regex, parentheses are used to create
43 /// groups that represent part of the match.
44 ///
45 /// For example, consider a haystack with multiple lines, and each line has
46 /// three whitespace delimited fields where the second field is expected to be
47 /// a number and the third field a boolean. To make this convenient, we use
48 /// the [`Captures::extract`] API to put the strings that match each group
49 /// into a fixed size array:
50 ///
51 /// ```
52 /// use regex::Regex;
53 ///
54 /// let hay = "
55 /// rabbit         54 true
56 /// groundhog 2 true
57 /// does not match
58 /// fox   109    false
59 /// ";
60 /// let re = Regex::new(r"(?m)^\s*(\S+)\s+([0-9]+)\s+(true|false)\s*$").unwrap();
61 /// let mut fields: Vec<(&str, i64, bool)> = vec![];
62 /// for (_, [f1, f2, f3]) in re.captures_iter(hay).map(|caps| caps.extract()) {
63 ///     fields.push((f1, f2.parse()?, f3.parse()?));
64 /// }
65 /// assert_eq!(fields, vec![
66 ///     ("rabbit", 54, true),
67 ///     ("groundhog", 2, true),
68 ///     ("fox", 109, false),
69 /// ]);
70 ///
71 /// # Ok::<(), Box<dyn std::error::Error>>(())
72 /// ```
73 ///
74 /// # Example: searching with the `Pattern` trait
75 ///
76 /// **Note**: This section requires that this crate is compiled with the
77 /// `pattern` Cargo feature enabled, which **requires nightly Rust**.
78 ///
79 /// Since `Regex` implements `Pattern` from the standard library, one can
80 /// use regexes with methods defined on `&str`. For example, `is_match`,
81 /// `find`, `find_iter` and `split` can, in some cases, be replaced with
82 /// `str::contains`, `str::find`, `str::match_indices` and `str::split`.
83 ///
84 /// Here are some examples:
85 ///
86 /// ```ignore
87 /// use regex::Regex;
88 ///
89 /// let re = Regex::new(r"\d+").unwrap();
90 /// let hay = "a111b222c";
91 ///
92 /// assert!(hay.contains(&re));
93 /// assert_eq!(hay.find(&re), Some(1));
94 /// assert_eq!(hay.match_indices(&re).collect::<Vec<_>>(), vec![
95 ///     (1, "111"),
96 ///     (5, "222"),
97 /// ]);
98 /// assert_eq!(hay.split(&re).collect::<Vec<_>>(), vec!["a", "b", "c"]);
99 /// ```
100 #[derive(Clone)]
101 pub struct Regex {
102     pub(crate) meta: meta::Regex,
103     pub(crate) pattern: Arc<str>,
104 }
105 
106 impl core::fmt::Display for Regex {
107     /// Shows the original regular expression.
fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result108     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
109         write!(f, "{}", self.as_str())
110     }
111 }
112 
113 impl core::fmt::Debug for Regex {
114     /// Shows the original regular expression.
fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result115     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
116         f.debug_tuple("Regex").field(&self.as_str()).finish()
117     }
118 }
119 
120 impl core::str::FromStr for Regex {
121     type Err = Error;
122 
123     /// Attempts to parse a string into a regular expression
from_str(s: &str) -> Result<Regex, Error>124     fn from_str(s: &str) -> Result<Regex, Error> {
125         Regex::new(s)
126     }
127 }
128 
129 impl TryFrom<&str> for Regex {
130     type Error = Error;
131 
132     /// Attempts to parse a string into a regular expression
try_from(s: &str) -> Result<Regex, Error>133     fn try_from(s: &str) -> Result<Regex, Error> {
134         Regex::new(s)
135     }
136 }
137 
138 impl TryFrom<String> for Regex {
139     type Error = Error;
140 
141     /// Attempts to parse a string into a regular expression
try_from(s: String) -> Result<Regex, Error>142     fn try_from(s: String) -> Result<Regex, Error> {
143         Regex::new(&s)
144     }
145 }
146 
147 /// Core regular expression methods.
148 impl Regex {
149     /// Compiles a regular expression. Once compiled, it can be used repeatedly
150     /// to search, split or replace substrings in a haystack.
151     ///
152     /// Note that regex compilation tends to be a somewhat expensive process,
153     /// and unlike higher level environments, compilation is not automatically
154     /// cached for you. One should endeavor to compile a regex once and then
155     /// reuse it. For example, it's a bad idea to compile the same regex
156     /// repeatedly in a loop.
157     ///
158     /// # Errors
159     ///
160     /// If an invalid pattern is given, then an error is returned.
161     /// An error is also returned if the pattern is valid, but would
162     /// produce a regex that is bigger than the configured size limit via
163     /// [`RegexBuilder::size_limit`]. (A reasonable size limit is enabled by
164     /// default.)
165     ///
166     /// # Example
167     ///
168     /// ```
169     /// use regex::Regex;
170     ///
171     /// // An Invalid pattern because of an unclosed parenthesis
172     /// assert!(Regex::new(r"foo(bar").is_err());
173     /// // An invalid pattern because the regex would be too big
174     /// // because Unicode tends to inflate things.
175     /// assert!(Regex::new(r"\w{1000}").is_err());
176     /// // Disabling Unicode can make the regex much smaller,
177     /// // potentially by up to or more than an order of magnitude.
178     /// assert!(Regex::new(r"(?-u:\w){1000}").is_ok());
179     /// ```
new(re: &str) -> Result<Regex, Error>180     pub fn new(re: &str) -> Result<Regex, Error> {
181         RegexBuilder::new(re).build()
182     }
183 
184     /// Returns true if and only if there is a match for the regex anywhere
185     /// in the haystack given.
186     ///
187     /// It is recommended to use this method if all you need to do is test
188     /// whether a match exists, since the underlying matching engine may be
189     /// able to do less work.
190     ///
191     /// # Example
192     ///
193     /// Test if some haystack contains at least one word with exactly 13
194     /// Unicode word characters:
195     ///
196     /// ```
197     /// use regex::Regex;
198     ///
199     /// let re = Regex::new(r"\b\w{13}\b").unwrap();
200     /// let hay = "I categorically deny having triskaidekaphobia.";
201     /// assert!(re.is_match(hay));
202     /// ```
203     #[inline]
is_match(&self, haystack: &str) -> bool204     pub fn is_match(&self, haystack: &str) -> bool {
205         self.is_match_at(haystack, 0)
206     }
207 
208     /// This routine searches for the first match of this regex in the
209     /// haystack given, and if found, returns a [`Match`]. The `Match`
210     /// provides access to both the byte offsets of the match and the actual
211     /// substring that matched.
212     ///
213     /// Note that this should only be used if you want to find the entire
214     /// match. If instead you just want to test the existence of a match,
215     /// it's potentially faster to use `Regex::is_match(hay)` instead of
216     /// `Regex::find(hay).is_some()`.
217     ///
218     /// # Example
219     ///
220     /// Find the first word with exactly 13 Unicode word characters:
221     ///
222     /// ```
223     /// use regex::Regex;
224     ///
225     /// let re = Regex::new(r"\b\w{13}\b").unwrap();
226     /// let hay = "I categorically deny having triskaidekaphobia.";
227     /// let mat = re.find(hay).unwrap();
228     /// assert_eq!(2..15, mat.range());
229     /// assert_eq!("categorically", mat.as_str());
230     /// ```
231     #[inline]
find<'h>(&self, haystack: &'h str) -> Option<Match<'h>>232     pub fn find<'h>(&self, haystack: &'h str) -> Option<Match<'h>> {
233         self.find_at(haystack, 0)
234     }
235 
236     /// Returns an iterator that yields successive non-overlapping matches in
237     /// the given haystack. The iterator yields values of type [`Match`].
238     ///
239     /// # Time complexity
240     ///
241     /// Note that since `find_iter` runs potentially many searches on the
242     /// haystack and since each search has worst case `O(m * n)` time
243     /// complexity, the overall worst case time complexity for iteration is
244     /// `O(m * n^2)`.
245     ///
246     /// # Example
247     ///
248     /// Find every word with exactly 13 Unicode word characters:
249     ///
250     /// ```
251     /// use regex::Regex;
252     ///
253     /// let re = Regex::new(r"\b\w{13}\b").unwrap();
254     /// let hay = "Retroactively relinquishing remunerations is reprehensible.";
255     /// let matches: Vec<_> = re.find_iter(hay).map(|m| m.as_str()).collect();
256     /// assert_eq!(matches, vec![
257     ///     "Retroactively",
258     ///     "relinquishing",
259     ///     "remunerations",
260     ///     "reprehensible",
261     /// ]);
262     /// ```
263     #[inline]
find_iter<'r, 'h>(&'r self, haystack: &'h str) -> Matches<'r, 'h>264     pub fn find_iter<'r, 'h>(&'r self, haystack: &'h str) -> Matches<'r, 'h> {
265         Matches { haystack, it: self.meta.find_iter(haystack) }
266     }
267 
268     /// This routine searches for the first match of this regex in the haystack
269     /// given, and if found, returns not only the overall match but also the
270     /// matches of each capture group in the regex. If no match is found, then
271     /// `None` is returned.
272     ///
273     /// Capture group `0` always corresponds to an implicit unnamed group that
274     /// includes the entire match. If a match is found, this group is always
275     /// present. Subsequent groups may be named and are numbered, starting
276     /// at 1, by the order in which the opening parenthesis appears in the
277     /// pattern. For example, in the pattern `(?<a>.(?<b>.))(?<c>.)`, `a`,
278     /// `b` and `c` correspond to capture group indices `1`, `2` and `3`,
279     /// respectively.
280     ///
281     /// You should only use `captures` if you need access to the capture group
282     /// matches. Otherwise, [`Regex::find`] is generally faster for discovering
283     /// just the overall match.
284     ///
285     /// # Example
286     ///
287     /// Say you have some haystack with movie names and their release years,
288     /// like "'Citizen Kane' (1941)". It'd be nice if we could search for
289     /// substrings looking like that, while also extracting the movie name and
290     /// its release year separately. The example below shows how to do that.
291     ///
292     /// ```
293     /// use regex::Regex;
294     ///
295     /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap();
296     /// let hay = "Not my favorite movie: 'Citizen Kane' (1941).";
297     /// let caps = re.captures(hay).unwrap();
298     /// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)");
299     /// assert_eq!(caps.get(1).unwrap().as_str(), "Citizen Kane");
300     /// assert_eq!(caps.get(2).unwrap().as_str(), "1941");
301     /// // You can also access the groups by index using the Index notation.
302     /// // Note that this will panic on an invalid index. In this case, these
303     /// // accesses are always correct because the overall regex will only
304     /// // match when these capture groups match.
305     /// assert_eq!(&caps[0], "'Citizen Kane' (1941)");
306     /// assert_eq!(&caps[1], "Citizen Kane");
307     /// assert_eq!(&caps[2], "1941");
308     /// ```
309     ///
310     /// Note that the full match is at capture group `0`. Each subsequent
311     /// capture group is indexed by the order of its opening `(`.
312     ///
313     /// We can make this example a bit clearer by using *named* capture groups:
314     ///
315     /// ```
316     /// use regex::Regex;
317     ///
318     /// let re = Regex::new(r"'(?<title>[^']+)'\s+\((?<year>\d{4})\)").unwrap();
319     /// let hay = "Not my favorite movie: 'Citizen Kane' (1941).";
320     /// let caps = re.captures(hay).unwrap();
321     /// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)");
322     /// assert_eq!(caps.name("title").unwrap().as_str(), "Citizen Kane");
323     /// assert_eq!(caps.name("year").unwrap().as_str(), "1941");
324     /// // You can also access the groups by name using the Index notation.
325     /// // Note that this will panic on an invalid group name. In this case,
326     /// // these accesses are always correct because the overall regex will
327     /// // only match when these capture groups match.
328     /// assert_eq!(&caps[0], "'Citizen Kane' (1941)");
329     /// assert_eq!(&caps["title"], "Citizen Kane");
330     /// assert_eq!(&caps["year"], "1941");
331     /// ```
332     ///
333     /// Here we name the capture groups, which we can access with the `name`
334     /// method or the `Index` notation with a `&str`. Note that the named
335     /// capture groups are still accessible with `get` or the `Index` notation
336     /// with a `usize`.
337     ///
338     /// The `0`th capture group is always unnamed, so it must always be
339     /// accessed with `get(0)` or `[0]`.
340     ///
341     /// Finally, one other way to to get the matched substrings is with the
342     /// [`Captures::extract`] API:
343     ///
344     /// ```
345     /// use regex::Regex;
346     ///
347     /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap();
348     /// let hay = "Not my favorite movie: 'Citizen Kane' (1941).";
349     /// let (full, [title, year]) = re.captures(hay).unwrap().extract();
350     /// assert_eq!(full, "'Citizen Kane' (1941)");
351     /// assert_eq!(title, "Citizen Kane");
352     /// assert_eq!(year, "1941");
353     /// ```
354     #[inline]
captures<'h>(&self, haystack: &'h str) -> Option<Captures<'h>>355     pub fn captures<'h>(&self, haystack: &'h str) -> Option<Captures<'h>> {
356         self.captures_at(haystack, 0)
357     }
358 
359     /// Returns an iterator that yields successive non-overlapping matches in
360     /// the given haystack. The iterator yields values of type [`Captures`].
361     ///
362     /// This is the same as [`Regex::find_iter`], but instead of only providing
363     /// access to the overall match, each value yield includes access to the
364     /// matches of all capture groups in the regex. Reporting this extra match
365     /// data is potentially costly, so callers should only use `captures_iter`
366     /// over `find_iter` when they actually need access to the capture group
367     /// matches.
368     ///
369     /// # Time complexity
370     ///
371     /// Note that since `captures_iter` runs potentially many searches on the
372     /// haystack and since each search has worst case `O(m * n)` time
373     /// complexity, the overall worst case time complexity for iteration is
374     /// `O(m * n^2)`.
375     ///
376     /// # Example
377     ///
378     /// We can use this to find all movie titles and their release years in
379     /// some haystack, where the movie is formatted like "'Title' (xxxx)":
380     ///
381     /// ```
382     /// use regex::Regex;
383     ///
384     /// let re = Regex::new(r"'([^']+)'\s+\(([0-9]{4})\)").unwrap();
385     /// let hay = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931).";
386     /// let mut movies = vec![];
387     /// for (_, [title, year]) in re.captures_iter(hay).map(|c| c.extract()) {
388     ///     movies.push((title, year.parse::<i64>()?));
389     /// }
390     /// assert_eq!(movies, vec![
391     ///     ("Citizen Kane", 1941),
392     ///     ("The Wizard of Oz", 1939),
393     ///     ("M", 1931),
394     /// ]);
395     /// # Ok::<(), Box<dyn std::error::Error>>(())
396     /// ```
397     ///
398     /// Or with named groups:
399     ///
400     /// ```
401     /// use regex::Regex;
402     ///
403     /// let re = Regex::new(r"'(?<title>[^']+)'\s+\((?<year>[0-9]{4})\)").unwrap();
404     /// let hay = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931).";
405     /// let mut it = re.captures_iter(hay);
406     ///
407     /// let caps = it.next().unwrap();
408     /// assert_eq!(&caps["title"], "Citizen Kane");
409     /// assert_eq!(&caps["year"], "1941");
410     ///
411     /// let caps = it.next().unwrap();
412     /// assert_eq!(&caps["title"], "The Wizard of Oz");
413     /// assert_eq!(&caps["year"], "1939");
414     ///
415     /// let caps = it.next().unwrap();
416     /// assert_eq!(&caps["title"], "M");
417     /// assert_eq!(&caps["year"], "1931");
418     /// ```
419     #[inline]
captures_iter<'r, 'h>( &'r self, haystack: &'h str, ) -> CaptureMatches<'r, 'h>420     pub fn captures_iter<'r, 'h>(
421         &'r self,
422         haystack: &'h str,
423     ) -> CaptureMatches<'r, 'h> {
424         CaptureMatches { haystack, it: self.meta.captures_iter(haystack) }
425     }
426 
427     /// Returns an iterator of substrings of the haystack given, delimited by a
428     /// match of the regex. Namely, each element of the iterator corresponds to
429     /// a part of the haystack that *isn't* matched by the regular expression.
430     ///
431     /// # Time complexity
432     ///
433     /// Since iterators over all matches requires running potentially many
434     /// searches on the haystack, and since each search has worst case
435     /// `O(m * n)` time complexity, the overall worst case time complexity for
436     /// this routine is `O(m * n^2)`.
437     ///
438     /// # Example
439     ///
440     /// To split a string delimited by arbitrary amounts of spaces or tabs:
441     ///
442     /// ```
443     /// use regex::Regex;
444     ///
445     /// let re = Regex::new(r"[ \t]+").unwrap();
446     /// let hay = "a b \t  c\td    e";
447     /// let fields: Vec<&str> = re.split(hay).collect();
448     /// assert_eq!(fields, vec!["a", "b", "c", "d", "e"]);
449     /// ```
450     ///
451     /// # Example: more cases
452     ///
453     /// Basic usage:
454     ///
455     /// ```
456     /// use regex::Regex;
457     ///
458     /// let re = Regex::new(r" ").unwrap();
459     /// let hay = "Mary had a little lamb";
460     /// let got: Vec<&str> = re.split(hay).collect();
461     /// assert_eq!(got, vec!["Mary", "had", "a", "little", "lamb"]);
462     ///
463     /// let re = Regex::new(r"X").unwrap();
464     /// let hay = "";
465     /// let got: Vec<&str> = re.split(hay).collect();
466     /// assert_eq!(got, vec![""]);
467     ///
468     /// let re = Regex::new(r"X").unwrap();
469     /// let hay = "lionXXtigerXleopard";
470     /// let got: Vec<&str> = re.split(hay).collect();
471     /// assert_eq!(got, vec!["lion", "", "tiger", "leopard"]);
472     ///
473     /// let re = Regex::new(r"::").unwrap();
474     /// let hay = "lion::tiger::leopard";
475     /// let got: Vec<&str> = re.split(hay).collect();
476     /// assert_eq!(got, vec!["lion", "tiger", "leopard"]);
477     /// ```
478     ///
479     /// If a haystack contains multiple contiguous matches, you will end up
480     /// with empty spans yielded by the iterator:
481     ///
482     /// ```
483     /// use regex::Regex;
484     ///
485     /// let re = Regex::new(r"X").unwrap();
486     /// let hay = "XXXXaXXbXc";
487     /// let got: Vec<&str> = re.split(hay).collect();
488     /// assert_eq!(got, vec!["", "", "", "", "a", "", "b", "c"]);
489     ///
490     /// let re = Regex::new(r"/").unwrap();
491     /// let hay = "(///)";
492     /// let got: Vec<&str> = re.split(hay).collect();
493     /// assert_eq!(got, vec!["(", "", "", ")"]);
494     /// ```
495     ///
496     /// Separators at the start or end of a haystack are neighbored by empty
497     /// substring.
498     ///
499     /// ```
500     /// use regex::Regex;
501     ///
502     /// let re = Regex::new(r"0").unwrap();
503     /// let hay = "010";
504     /// let got: Vec<&str> = re.split(hay).collect();
505     /// assert_eq!(got, vec!["", "1", ""]);
506     /// ```
507     ///
508     /// When the empty string is used as a regex, it splits at every valid
509     /// UTF-8 boundary by default (which includes the beginning and end of the
510     /// haystack):
511     ///
512     /// ```
513     /// use regex::Regex;
514     ///
515     /// let re = Regex::new(r"").unwrap();
516     /// let hay = "rust";
517     /// let got: Vec<&str> = re.split(hay).collect();
518     /// assert_eq!(got, vec!["", "r", "u", "s", "t", ""]);
519     ///
520     /// // Splitting by an empty string is UTF-8 aware by default!
521     /// let re = Regex::new(r"").unwrap();
522     /// let hay = "☃";
523     /// let got: Vec<&str> = re.split(hay).collect();
524     /// assert_eq!(got, vec!["", "☃", ""]);
525     /// ```
526     ///
527     /// Contiguous separators (commonly shows up with whitespace), can lead to
528     /// possibly surprising behavior. For example, this code is correct:
529     ///
530     /// ```
531     /// use regex::Regex;
532     ///
533     /// let re = Regex::new(r" ").unwrap();
534     /// let hay = "    a  b c";
535     /// let got: Vec<&str> = re.split(hay).collect();
536     /// assert_eq!(got, vec!["", "", "", "", "a", "", "b", "c"]);
537     /// ```
538     ///
539     /// It does *not* give you `["a", "b", "c"]`. For that behavior, you'd want
540     /// to match contiguous space characters:
541     ///
542     /// ```
543     /// use regex::Regex;
544     ///
545     /// let re = Regex::new(r" +").unwrap();
546     /// let hay = "    a  b c";
547     /// let got: Vec<&str> = re.split(hay).collect();
548     /// // N.B. This does still include a leading empty span because ' +'
549     /// // matches at the beginning of the haystack.
550     /// assert_eq!(got, vec!["", "a", "b", "c"]);
551     /// ```
552     #[inline]
split<'r, 'h>(&'r self, haystack: &'h str) -> Split<'r, 'h>553     pub fn split<'r, 'h>(&'r self, haystack: &'h str) -> Split<'r, 'h> {
554         Split { haystack, it: self.meta.split(haystack) }
555     }
556 
557     /// Returns an iterator of at most `limit` substrings of the haystack
558     /// given, delimited by a match of the regex. (A `limit` of `0` will return
559     /// no substrings.) Namely, each element of the iterator corresponds to a
560     /// part of the haystack that *isn't* matched by the regular expression.
561     /// The remainder of the haystack that is not split will be the last
562     /// element in the iterator.
563     ///
564     /// # Time complexity
565     ///
566     /// Since iterators over all matches requires running potentially many
567     /// searches on the haystack, and since each search has worst case
568     /// `O(m * n)` time complexity, the overall worst case time complexity for
569     /// this routine is `O(m * n^2)`.
570     ///
571     /// Although note that the worst case time here has an upper bound given
572     /// by the `limit` parameter.
573     ///
574     /// # Example
575     ///
576     /// Get the first two words in some haystack:
577     ///
578     /// ```
579     /// use regex::Regex;
580     ///
581     /// let re = Regex::new(r"\W+").unwrap();
582     /// let hay = "Hey! How are you?";
583     /// let fields: Vec<&str> = re.splitn(hay, 3).collect();
584     /// assert_eq!(fields, vec!["Hey", "How", "are you?"]);
585     /// ```
586     ///
587     /// # Examples: more cases
588     ///
589     /// ```
590     /// use regex::Regex;
591     ///
592     /// let re = Regex::new(r" ").unwrap();
593     /// let hay = "Mary had a little lamb";
594     /// let got: Vec<&str> = re.splitn(hay, 3).collect();
595     /// assert_eq!(got, vec!["Mary", "had", "a little lamb"]);
596     ///
597     /// let re = Regex::new(r"X").unwrap();
598     /// let hay = "";
599     /// let got: Vec<&str> = re.splitn(hay, 3).collect();
600     /// assert_eq!(got, vec![""]);
601     ///
602     /// let re = Regex::new(r"X").unwrap();
603     /// let hay = "lionXXtigerXleopard";
604     /// let got: Vec<&str> = re.splitn(hay, 3).collect();
605     /// assert_eq!(got, vec!["lion", "", "tigerXleopard"]);
606     ///
607     /// let re = Regex::new(r"::").unwrap();
608     /// let hay = "lion::tiger::leopard";
609     /// let got: Vec<&str> = re.splitn(hay, 2).collect();
610     /// assert_eq!(got, vec!["lion", "tiger::leopard"]);
611     ///
612     /// let re = Regex::new(r"X").unwrap();
613     /// let hay = "abcXdef";
614     /// let got: Vec<&str> = re.splitn(hay, 1).collect();
615     /// assert_eq!(got, vec!["abcXdef"]);
616     ///
617     /// let re = Regex::new(r"X").unwrap();
618     /// let hay = "abcdef";
619     /// let got: Vec<&str> = re.splitn(hay, 2).collect();
620     /// assert_eq!(got, vec!["abcdef"]);
621     ///
622     /// let re = Regex::new(r"X").unwrap();
623     /// let hay = "abcXdef";
624     /// let got: Vec<&str> = re.splitn(hay, 0).collect();
625     /// assert!(got.is_empty());
626     /// ```
627     #[inline]
splitn<'r, 'h>( &'r self, haystack: &'h str, limit: usize, ) -> SplitN<'r, 'h>628     pub fn splitn<'r, 'h>(
629         &'r self,
630         haystack: &'h str,
631         limit: usize,
632     ) -> SplitN<'r, 'h> {
633         SplitN { haystack, it: self.meta.splitn(haystack, limit) }
634     }
635 
636     /// Replaces the leftmost-first match in the given haystack with the
637     /// replacement provided. The replacement can be a regular string (where
638     /// `$N` and `$name` are expanded to match capture groups) or a function
639     /// that takes a [`Captures`] and returns the replaced string.
640     ///
641     /// If no match is found, then the haystack is returned unchanged. In that
642     /// case, this implementation will likely return a `Cow::Borrowed` value
643     /// such that no allocation is performed.
644     ///
645     /// When a `Cow::Borrowed` is returned, the value returned is guaranteed
646     /// to be equivalent to the `haystack` given.
647     ///
648     /// # Replacement string syntax
649     ///
650     /// All instances of `$ref` in the replacement string are replaced with
651     /// the substring corresponding to the capture group identified by `ref`.
652     ///
653     /// `ref` may be an integer corresponding to the index of the capture group
654     /// (counted by order of opening parenthesis where `0` is the entire match)
655     /// or it can be a name (consisting of letters, digits or underscores)
656     /// corresponding to a named capture group.
657     ///
658     /// If `ref` isn't a valid capture group (whether the name doesn't exist or
659     /// isn't a valid index), then it is replaced with the empty string.
660     ///
661     /// The longest possible name is used. For example, `$1a` looks up the
662     /// capture group named `1a` and not the capture group at index `1`. To
663     /// exert more precise control over the name, use braces, e.g., `${1}a`.
664     ///
665     /// To write a literal `$` use `$$`.
666     ///
667     /// # Example
668     ///
669     /// Note that this function is polymorphic with respect to the replacement.
670     /// In typical usage, this can just be a normal string:
671     ///
672     /// ```
673     /// use regex::Regex;
674     ///
675     /// let re = Regex::new(r"[^01]+").unwrap();
676     /// assert_eq!(re.replace("1078910", ""), "1010");
677     /// ```
678     ///
679     /// But anything satisfying the [`Replacer`] trait will work. For example,
680     /// a closure of type `|&Captures| -> String` provides direct access to the
681     /// captures corresponding to a match. This allows one to access capturing
682     /// group matches easily:
683     ///
684     /// ```
685     /// use regex::{Captures, Regex};
686     ///
687     /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap();
688     /// let result = re.replace("Springsteen, Bruce", |caps: &Captures| {
689     ///     format!("{} {}", &caps[2], &caps[1])
690     /// });
691     /// assert_eq!(result, "Bruce Springsteen");
692     /// ```
693     ///
694     /// But this is a bit cumbersome to use all the time. Instead, a simple
695     /// syntax is supported (as described above) that expands `$name` into the
696     /// corresponding capture group. Here's the last example, but using this
697     /// expansion technique with named capture groups:
698     ///
699     /// ```
700     /// use regex::Regex;
701     ///
702     /// let re = Regex::new(r"(?<last>[^,\s]+),\s+(?<first>\S+)").unwrap();
703     /// let result = re.replace("Springsteen, Bruce", "$first $last");
704     /// assert_eq!(result, "Bruce Springsteen");
705     /// ```
706     ///
707     /// Note that using `$2` instead of `$first` or `$1` instead of `$last`
708     /// would produce the same result. To write a literal `$` use `$$`.
709     ///
710     /// Sometimes the replacement string requires use of curly braces to
711     /// delineate a capture group replacement when it is adjacent to some other
712     /// literal text. For example, if we wanted to join two words together with
713     /// an underscore:
714     ///
715     /// ```
716     /// use regex::Regex;
717     ///
718     /// let re = Regex::new(r"(?<first>\w+)\s+(?<second>\w+)").unwrap();
719     /// let result = re.replace("deep fried", "${first}_$second");
720     /// assert_eq!(result, "deep_fried");
721     /// ```
722     ///
723     /// Without the curly braces, the capture group name `first_` would be
724     /// used, and since it doesn't exist, it would be replaced with the empty
725     /// string.
726     ///
727     /// Finally, sometimes you just want to replace a literal string with no
728     /// regard for capturing group expansion. This can be done by wrapping a
729     /// string with [`NoExpand`]:
730     ///
731     /// ```
732     /// use regex::{NoExpand, Regex};
733     ///
734     /// let re = Regex::new(r"(?<last>[^,\s]+),\s+(\S+)").unwrap();
735     /// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last"));
736     /// assert_eq!(result, "$2 $last");
737     /// ```
738     ///
739     /// Using `NoExpand` may also be faster, since the replacement string won't
740     /// need to be parsed for the `$` syntax.
741     #[inline]
replace<'h, R: Replacer>( &self, haystack: &'h str, rep: R, ) -> Cow<'h, str>742     pub fn replace<'h, R: Replacer>(
743         &self,
744         haystack: &'h str,
745         rep: R,
746     ) -> Cow<'h, str> {
747         self.replacen(haystack, 1, rep)
748     }
749 
750     /// Replaces all non-overlapping matches in the haystack with the
751     /// replacement provided. This is the same as calling `replacen` with
752     /// `limit` set to `0`.
753     ///
754     /// If no match is found, then the haystack is returned unchanged. In that
755     /// case, this implementation will likely return a `Cow::Borrowed` value
756     /// such that no allocation is performed.
757     ///
758     /// When a `Cow::Borrowed` is returned, the value returned is guaranteed
759     /// to be equivalent to the `haystack` given.
760     ///
761     /// The documentation for [`Regex::replace`] goes into more detail about
762     /// what kinds of replacement strings are supported.
763     ///
764     /// # Time complexity
765     ///
766     /// Since iterators over all matches requires running potentially many
767     /// searches on the haystack, and since each search has worst case
768     /// `O(m * n)` time complexity, the overall worst case time complexity for
769     /// this routine is `O(m * n^2)`.
770     ///
771     /// # Fallibility
772     ///
773     /// If you need to write a replacement routine where any individual
774     /// replacement might "fail," doing so with this API isn't really feasible
775     /// because there's no way to stop the search process if a replacement
776     /// fails. Instead, if you need this functionality, you should consider
777     /// implementing your own replacement routine:
778     ///
779     /// ```
780     /// use regex::{Captures, Regex};
781     ///
782     /// fn replace_all<E>(
783     ///     re: &Regex,
784     ///     haystack: &str,
785     ///     replacement: impl Fn(&Captures) -> Result<String, E>,
786     /// ) -> Result<String, E> {
787     ///     let mut new = String::with_capacity(haystack.len());
788     ///     let mut last_match = 0;
789     ///     for caps in re.captures_iter(haystack) {
790     ///         let m = caps.get(0).unwrap();
791     ///         new.push_str(&haystack[last_match..m.start()]);
792     ///         new.push_str(&replacement(&caps)?);
793     ///         last_match = m.end();
794     ///     }
795     ///     new.push_str(&haystack[last_match..]);
796     ///     Ok(new)
797     /// }
798     ///
799     /// // Let's replace each word with the number of bytes in that word.
800     /// // But if we see a word that is "too long," we'll give up.
801     /// let re = Regex::new(r"\w+").unwrap();
802     /// let replacement = |caps: &Captures| -> Result<String, &'static str> {
803     ///     if caps[0].len() >= 5 {
804     ///         return Err("word too long");
805     ///     }
806     ///     Ok(caps[0].len().to_string())
807     /// };
808     /// assert_eq!(
809     ///     Ok("2 3 3 3?".to_string()),
810     ///     replace_all(&re, "hi how are you?", &replacement),
811     /// );
812     /// assert!(replace_all(&re, "hi there", &replacement).is_err());
813     /// ```
814     ///
815     /// # Example
816     ///
817     /// This example shows how to flip the order of whitespace (excluding line
818     /// terminators) delimited fields, and normalizes the whitespace that
819     /// delimits the fields:
820     ///
821     /// ```
822     /// use regex::Regex;
823     ///
824     /// let re = Regex::new(r"(?m)^(\S+)[\s--\r\n]+(\S+)$").unwrap();
825     /// let hay = "
826     /// Greetings  1973
827     /// Wild\t1973
828     /// BornToRun\t\t\t\t1975
829     /// Darkness                    1978
830     /// TheRiver 1980
831     /// ";
832     /// let new = re.replace_all(hay, "$2 $1");
833     /// assert_eq!(new, "
834     /// 1973 Greetings
835     /// 1973 Wild
836     /// 1975 BornToRun
837     /// 1978 Darkness
838     /// 1980 TheRiver
839     /// ");
840     /// ```
841     #[inline]
replace_all<'h, R: Replacer>( &self, haystack: &'h str, rep: R, ) -> Cow<'h, str>842     pub fn replace_all<'h, R: Replacer>(
843         &self,
844         haystack: &'h str,
845         rep: R,
846     ) -> Cow<'h, str> {
847         self.replacen(haystack, 0, rep)
848     }
849 
850     /// Replaces at most `limit` non-overlapping matches in the haystack with
851     /// the replacement provided. If `limit` is `0`, then all non-overlapping
852     /// matches are replaced. That is, `Regex::replace_all(hay, rep)` is
853     /// equivalent to `Regex::replacen(hay, 0, rep)`.
854     ///
855     /// If no match is found, then the haystack is returned unchanged. In that
856     /// case, this implementation will likely return a `Cow::Borrowed` value
857     /// such that no allocation is performed.
858     ///
859     /// When a `Cow::Borrowed` is returned, the value returned is guaranteed
860     /// to be equivalent to the `haystack` given.
861     ///
862     /// The documentation for [`Regex::replace`] goes into more detail about
863     /// what kinds of replacement strings are supported.
864     ///
865     /// # Time complexity
866     ///
867     /// Since iterators over all matches requires running potentially many
868     /// searches on the haystack, and since each search has worst case
869     /// `O(m * n)` time complexity, the overall worst case time complexity for
870     /// this routine is `O(m * n^2)`.
871     ///
872     /// Although note that the worst case time here has an upper bound given
873     /// by the `limit` parameter.
874     ///
875     /// # Fallibility
876     ///
877     /// See the corresponding section in the docs for [`Regex::replace_all`]
878     /// for tips on how to deal with a replacement routine that can fail.
879     ///
880     /// # Example
881     ///
882     /// This example shows how to flip the order of whitespace (excluding line
883     /// terminators) delimited fields, and normalizes the whitespace that
884     /// delimits the fields. But we only do it for the first two matches.
885     ///
886     /// ```
887     /// use regex::Regex;
888     ///
889     /// let re = Regex::new(r"(?m)^(\S+)[\s--\r\n]+(\S+)$").unwrap();
890     /// let hay = "
891     /// Greetings  1973
892     /// Wild\t1973
893     /// BornToRun\t\t\t\t1975
894     /// Darkness                    1978
895     /// TheRiver 1980
896     /// ";
897     /// let new = re.replacen(hay, 2, "$2 $1");
898     /// assert_eq!(new, "
899     /// 1973 Greetings
900     /// 1973 Wild
901     /// BornToRun\t\t\t\t1975
902     /// Darkness                    1978
903     /// TheRiver 1980
904     /// ");
905     /// ```
906     #[inline]
replacen<'h, R: Replacer>( &self, haystack: &'h str, limit: usize, mut rep: R, ) -> Cow<'h, str>907     pub fn replacen<'h, R: Replacer>(
908         &self,
909         haystack: &'h str,
910         limit: usize,
911         mut rep: R,
912     ) -> Cow<'h, str> {
913         // If we know that the replacement doesn't have any capture expansions,
914         // then we can use the fast path. The fast path can make a tremendous
915         // difference:
916         //
917         //   1) We use `find_iter` instead of `captures_iter`. Not asking for
918         //      captures generally makes the regex engines faster.
919         //   2) We don't need to look up all of the capture groups and do
920         //      replacements inside the replacement string. We just push it
921         //      at each match and be done with it.
922         if let Some(rep) = rep.no_expansion() {
923             let mut it = self.find_iter(haystack).enumerate().peekable();
924             if it.peek().is_none() {
925                 return Cow::Borrowed(haystack);
926             }
927             let mut new = String::with_capacity(haystack.len());
928             let mut last_match = 0;
929             for (i, m) in it {
930                 new.push_str(&haystack[last_match..m.start()]);
931                 new.push_str(&rep);
932                 last_match = m.end();
933                 if limit > 0 && i >= limit - 1 {
934                     break;
935                 }
936             }
937             new.push_str(&haystack[last_match..]);
938             return Cow::Owned(new);
939         }
940 
941         // The slower path, which we use if the replacement may need access to
942         // capture groups.
943         let mut it = self.captures_iter(haystack).enumerate().peekable();
944         if it.peek().is_none() {
945             return Cow::Borrowed(haystack);
946         }
947         let mut new = String::with_capacity(haystack.len());
948         let mut last_match = 0;
949         for (i, cap) in it {
950             // unwrap on 0 is OK because captures only reports matches
951             let m = cap.get(0).unwrap();
952             new.push_str(&haystack[last_match..m.start()]);
953             rep.replace_append(&cap, &mut new);
954             last_match = m.end();
955             if limit > 0 && i >= limit - 1 {
956                 break;
957             }
958         }
959         new.push_str(&haystack[last_match..]);
960         Cow::Owned(new)
961     }
962 }
963 
964 /// A group of advanced or "lower level" search methods. Some methods permit
965 /// starting the search at a position greater than `0` in the haystack. Other
966 /// methods permit reusing allocations, for example, when extracting the
967 /// matches for capture groups.
968 impl Regex {
969     /// Returns the end byte offset of the first match in the haystack given.
970     ///
971     /// This method may have the same performance characteristics as
972     /// `is_match`. Behaviorlly, it doesn't just report whether it match
973     /// occurs, but also the end offset for a match. In particular, the offset
974     /// returned *may be shorter* than the proper end of the leftmost-first
975     /// match that you would find via [`Regex::find`].
976     ///
977     /// Note that it is not guaranteed that this routine finds the shortest or
978     /// "earliest" possible match. Instead, the main idea of this API is that
979     /// it returns the offset at the point at which the internal regex engine
980     /// has determined that a match has occurred. This may vary depending on
981     /// which internal regex engine is used, and thus, the offset itself may
982     /// change based on internal heuristics.
983     ///
984     /// # Example
985     ///
986     /// Typically, `a+` would match the entire first sequence of `a` in some
987     /// haystack, but `shortest_match` *may* give up as soon as it sees the
988     /// first `a`.
989     ///
990     /// ```
991     /// use regex::Regex;
992     ///
993     /// let re = Regex::new(r"a+").unwrap();
994     /// let offset = re.shortest_match("aaaaa").unwrap();
995     /// assert_eq!(offset, 1);
996     /// ```
997     #[inline]
shortest_match(&self, haystack: &str) -> Option<usize>998     pub fn shortest_match(&self, haystack: &str) -> Option<usize> {
999         self.shortest_match_at(haystack, 0)
1000     }
1001 
1002     /// Returns the same as [`Regex::shortest_match`], but starts the search at
1003     /// the given offset.
1004     ///
1005     /// The significance of the starting point is that it takes the surrounding
1006     /// context into consideration. For example, the `\A` anchor can only match
1007     /// when `start == 0`.
1008     ///
1009     /// If a match is found, the offset returned is relative to the beginning
1010     /// of the haystack, not the beginning of the search.
1011     ///
1012     /// # Panics
1013     ///
1014     /// This panics when `start >= haystack.len() + 1`.
1015     ///
1016     /// # Example
1017     ///
1018     /// This example shows the significance of `start` by demonstrating how it
1019     /// can be used to permit look-around assertions in a regex to take the
1020     /// surrounding context into account.
1021     ///
1022     /// ```
1023     /// use regex::Regex;
1024     ///
1025     /// let re = Regex::new(r"\bchew\b").unwrap();
1026     /// let hay = "eschew";
1027     /// // We get a match here, but it's probably not intended.
1028     /// assert_eq!(re.shortest_match(&hay[2..]), Some(4));
1029     /// // No match because the  assertions take the context into account.
1030     /// assert_eq!(re.shortest_match_at(hay, 2), None);
1031     /// ```
1032     #[inline]
shortest_match_at( &self, haystack: &str, start: usize, ) -> Option<usize>1033     pub fn shortest_match_at(
1034         &self,
1035         haystack: &str,
1036         start: usize,
1037     ) -> Option<usize> {
1038         let input =
1039             Input::new(haystack).earliest(true).span(start..haystack.len());
1040         self.meta.search_half(&input).map(|hm| hm.offset())
1041     }
1042 
1043     /// Returns the same as [`Regex::is_match`], but starts the search at the
1044     /// given offset.
1045     ///
1046     /// The significance of the starting point is that it takes the surrounding
1047     /// context into consideration. For example, the `\A` anchor can only
1048     /// match when `start == 0`.
1049     ///
1050     /// # Panics
1051     ///
1052     /// This panics when `start >= haystack.len() + 1`.
1053     ///
1054     /// # Example
1055     ///
1056     /// This example shows the significance of `start` by demonstrating how it
1057     /// can be used to permit look-around assertions in a regex to take the
1058     /// surrounding context into account.
1059     ///
1060     /// ```
1061     /// use regex::Regex;
1062     ///
1063     /// let re = Regex::new(r"\bchew\b").unwrap();
1064     /// let hay = "eschew";
1065     /// // We get a match here, but it's probably not intended.
1066     /// assert!(re.is_match(&hay[2..]));
1067     /// // No match because the  assertions take the context into account.
1068     /// assert!(!re.is_match_at(hay, 2));
1069     /// ```
1070     #[inline]
is_match_at(&self, haystack: &str, start: usize) -> bool1071     pub fn is_match_at(&self, haystack: &str, start: usize) -> bool {
1072         let input =
1073             Input::new(haystack).earliest(true).span(start..haystack.len());
1074         self.meta.search_half(&input).is_some()
1075     }
1076 
1077     /// Returns the same as [`Regex::find`], but starts the search at the given
1078     /// offset.
1079     ///
1080     /// The significance of the starting point is that it takes the surrounding
1081     /// context into consideration. For example, the `\A` anchor can only
1082     /// match when `start == 0`.
1083     ///
1084     /// # Panics
1085     ///
1086     /// This panics when `start >= haystack.len() + 1`.
1087     ///
1088     /// # Example
1089     ///
1090     /// This example shows the significance of `start` by demonstrating how it
1091     /// can be used to permit look-around assertions in a regex to take the
1092     /// surrounding context into account.
1093     ///
1094     /// ```
1095     /// use regex::Regex;
1096     ///
1097     /// let re = Regex::new(r"\bchew\b").unwrap();
1098     /// let hay = "eschew";
1099     /// // We get a match here, but it's probably not intended.
1100     /// assert_eq!(re.find(&hay[2..]).map(|m| m.range()), Some(0..4));
1101     /// // No match because the  assertions take the context into account.
1102     /// assert_eq!(re.find_at(hay, 2), None);
1103     /// ```
1104     #[inline]
find_at<'h>( &self, haystack: &'h str, start: usize, ) -> Option<Match<'h>>1105     pub fn find_at<'h>(
1106         &self,
1107         haystack: &'h str,
1108         start: usize,
1109     ) -> Option<Match<'h>> {
1110         let input = Input::new(haystack).span(start..haystack.len());
1111         self.meta
1112             .search(&input)
1113             .map(|m| Match::new(haystack, m.start(), m.end()))
1114     }
1115 
1116     /// Returns the same as [`Regex::captures`], but starts the search at the
1117     /// given offset.
1118     ///
1119     /// The significance of the starting point is that it takes the surrounding
1120     /// context into consideration. For example, the `\A` anchor can only
1121     /// match when `start == 0`.
1122     ///
1123     /// # Panics
1124     ///
1125     /// This panics when `start >= haystack.len() + 1`.
1126     ///
1127     /// # Example
1128     ///
1129     /// This example shows the significance of `start` by demonstrating how it
1130     /// can be used to permit look-around assertions in a regex to take the
1131     /// surrounding context into account.
1132     ///
1133     /// ```
1134     /// use regex::Regex;
1135     ///
1136     /// let re = Regex::new(r"\bchew\b").unwrap();
1137     /// let hay = "eschew";
1138     /// // We get a match here, but it's probably not intended.
1139     /// assert_eq!(&re.captures(&hay[2..]).unwrap()[0], "chew");
1140     /// // No match because the  assertions take the context into account.
1141     /// assert!(re.captures_at(hay, 2).is_none());
1142     /// ```
1143     #[inline]
captures_at<'h>( &self, haystack: &'h str, start: usize, ) -> Option<Captures<'h>>1144     pub fn captures_at<'h>(
1145         &self,
1146         haystack: &'h str,
1147         start: usize,
1148     ) -> Option<Captures<'h>> {
1149         let input = Input::new(haystack).span(start..haystack.len());
1150         let mut caps = self.meta.create_captures();
1151         self.meta.search_captures(&input, &mut caps);
1152         if caps.is_match() {
1153             let static_captures_len = self.static_captures_len();
1154             Some(Captures { haystack, caps, static_captures_len })
1155         } else {
1156             None
1157         }
1158     }
1159 
1160     /// This is like [`Regex::captures`], but writes the byte offsets of each
1161     /// capture group match into the locations given.
1162     ///
1163     /// A [`CaptureLocations`] stores the same byte offsets as a [`Captures`],
1164     /// but does *not* store a reference to the haystack. This makes its API
1165     /// a bit lower level and less convenient. But in exchange, callers
1166     /// may allocate their own `CaptureLocations` and reuse it for multiple
1167     /// searches. This may be helpful if allocating a `Captures` shows up in a
1168     /// profile as too costly.
1169     ///
1170     /// To create a `CaptureLocations` value, use the
1171     /// [`Regex::capture_locations`] method.
1172     ///
1173     /// This also returns the overall match if one was found. When a match is
1174     /// found, its offsets are also always stored in `locs` at index `0`.
1175     ///
1176     /// # Panics
1177     ///
1178     /// This routine may panic if the given `CaptureLocations` was not created
1179     /// by this regex.
1180     ///
1181     /// # Example
1182     ///
1183     /// ```
1184     /// use regex::Regex;
1185     ///
1186     /// let re = Regex::new(r"^([a-z]+)=(\S*)$").unwrap();
1187     /// let mut locs = re.capture_locations();
1188     /// assert!(re.captures_read(&mut locs, "id=foo123").is_some());
1189     /// assert_eq!(Some((0, 9)), locs.get(0));
1190     /// assert_eq!(Some((0, 2)), locs.get(1));
1191     /// assert_eq!(Some((3, 9)), locs.get(2));
1192     /// ```
1193     #[inline]
captures_read<'h>( &self, locs: &mut CaptureLocations, haystack: &'h str, ) -> Option<Match<'h>>1194     pub fn captures_read<'h>(
1195         &self,
1196         locs: &mut CaptureLocations,
1197         haystack: &'h str,
1198     ) -> Option<Match<'h>> {
1199         self.captures_read_at(locs, haystack, 0)
1200     }
1201 
1202     /// Returns the same as [`Regex::captures_read`], but starts the search at
1203     /// the given offset.
1204     ///
1205     /// The significance of the starting point is that it takes the surrounding
1206     /// context into consideration. For example, the `\A` anchor can only
1207     /// match when `start == 0`.
1208     ///
1209     /// # Panics
1210     ///
1211     /// This panics when `start >= haystack.len() + 1`.
1212     ///
1213     /// This routine may also panic if the given `CaptureLocations` was not
1214     /// created by this regex.
1215     ///
1216     /// # Example
1217     ///
1218     /// This example shows the significance of `start` by demonstrating how it
1219     /// can be used to permit look-around assertions in a regex to take the
1220     /// surrounding context into account.
1221     ///
1222     /// ```
1223     /// use regex::Regex;
1224     ///
1225     /// let re = Regex::new(r"\bchew\b").unwrap();
1226     /// let hay = "eschew";
1227     /// let mut locs = re.capture_locations();
1228     /// // We get a match here, but it's probably not intended.
1229     /// assert!(re.captures_read(&mut locs, &hay[2..]).is_some());
1230     /// // No match because the  assertions take the context into account.
1231     /// assert!(re.captures_read_at(&mut locs, hay, 2).is_none());
1232     /// ```
1233     #[inline]
captures_read_at<'h>( &self, locs: &mut CaptureLocations, haystack: &'h str, start: usize, ) -> Option<Match<'h>>1234     pub fn captures_read_at<'h>(
1235         &self,
1236         locs: &mut CaptureLocations,
1237         haystack: &'h str,
1238         start: usize,
1239     ) -> Option<Match<'h>> {
1240         let input = Input::new(haystack).span(start..haystack.len());
1241         self.meta.search_captures(&input, &mut locs.0);
1242         locs.0.get_match().map(|m| Match::new(haystack, m.start(), m.end()))
1243     }
1244 
1245     /// An undocumented alias for `captures_read_at`.
1246     ///
1247     /// The `regex-capi` crate previously used this routine, so to avoid
1248     /// breaking that crate, we continue to provide the name as an undocumented
1249     /// alias.
1250     #[doc(hidden)]
1251     #[inline]
read_captures_at<'h>( &self, locs: &mut CaptureLocations, haystack: &'h str, start: usize, ) -> Option<Match<'h>>1252     pub fn read_captures_at<'h>(
1253         &self,
1254         locs: &mut CaptureLocations,
1255         haystack: &'h str,
1256         start: usize,
1257     ) -> Option<Match<'h>> {
1258         self.captures_read_at(locs, haystack, start)
1259     }
1260 }
1261 
1262 /// Auxiliary methods.
1263 impl Regex {
1264     /// Returns the original string of this regex.
1265     ///
1266     /// # Example
1267     ///
1268     /// ```
1269     /// use regex::Regex;
1270     ///
1271     /// let re = Regex::new(r"foo\w+bar").unwrap();
1272     /// assert_eq!(re.as_str(), r"foo\w+bar");
1273     /// ```
1274     #[inline]
as_str(&self) -> &str1275     pub fn as_str(&self) -> &str {
1276         &self.pattern
1277     }
1278 
1279     /// Returns an iterator over the capture names in this regex.
1280     ///
1281     /// The iterator returned yields elements of type `Option<&str>`. That is,
1282     /// the iterator yields values for all capture groups, even ones that are
1283     /// unnamed. The order of the groups corresponds to the order of the group's
1284     /// corresponding opening parenthesis.
1285     ///
1286     /// The first element of the iterator always yields the group corresponding
1287     /// to the overall match, and this group is always unnamed. Therefore, the
1288     /// iterator always yields at least one group.
1289     ///
1290     /// # Example
1291     ///
1292     /// This shows basic usage with a mix of named and unnamed capture groups:
1293     ///
1294     /// ```
1295     /// use regex::Regex;
1296     ///
1297     /// let re = Regex::new(r"(?<a>.(?<b>.))(.)(?:.)(?<c>.)").unwrap();
1298     /// let mut names = re.capture_names();
1299     /// assert_eq!(names.next(), Some(None));
1300     /// assert_eq!(names.next(), Some(Some("a")));
1301     /// assert_eq!(names.next(), Some(Some("b")));
1302     /// assert_eq!(names.next(), Some(None));
1303     /// // the '(?:.)' group is non-capturing and so doesn't appear here!
1304     /// assert_eq!(names.next(), Some(Some("c")));
1305     /// assert_eq!(names.next(), None);
1306     /// ```
1307     ///
1308     /// The iterator always yields at least one element, even for regexes with
1309     /// no capture groups and even for regexes that can never match:
1310     ///
1311     /// ```
1312     /// use regex::Regex;
1313     ///
1314     /// let re = Regex::new(r"").unwrap();
1315     /// let mut names = re.capture_names();
1316     /// assert_eq!(names.next(), Some(None));
1317     /// assert_eq!(names.next(), None);
1318     ///
1319     /// let re = Regex::new(r"[a&&b]").unwrap();
1320     /// let mut names = re.capture_names();
1321     /// assert_eq!(names.next(), Some(None));
1322     /// assert_eq!(names.next(), None);
1323     /// ```
1324     #[inline]
capture_names(&self) -> CaptureNames<'_>1325     pub fn capture_names(&self) -> CaptureNames<'_> {
1326         CaptureNames(self.meta.group_info().pattern_names(PatternID::ZERO))
1327     }
1328 
1329     /// Returns the number of captures groups in this regex.
1330     ///
1331     /// This includes all named and unnamed groups, including the implicit
1332     /// unnamed group that is always present and corresponds to the entire
1333     /// match.
1334     ///
1335     /// Since the implicit unnamed group is always included in this length, the
1336     /// length returned is guaranteed to be greater than zero.
1337     ///
1338     /// # Example
1339     ///
1340     /// ```
1341     /// use regex::Regex;
1342     ///
1343     /// let re = Regex::new(r"foo").unwrap();
1344     /// assert_eq!(1, re.captures_len());
1345     ///
1346     /// let re = Regex::new(r"(foo)").unwrap();
1347     /// assert_eq!(2, re.captures_len());
1348     ///
1349     /// let re = Regex::new(r"(?<a>.(?<b>.))(.)(?:.)(?<c>.)").unwrap();
1350     /// assert_eq!(5, re.captures_len());
1351     ///
1352     /// let re = Regex::new(r"[a&&b]").unwrap();
1353     /// assert_eq!(1, re.captures_len());
1354     /// ```
1355     #[inline]
captures_len(&self) -> usize1356     pub fn captures_len(&self) -> usize {
1357         self.meta.group_info().group_len(PatternID::ZERO)
1358     }
1359 
1360     /// Returns the total number of capturing groups that appear in every
1361     /// possible match.
1362     ///
1363     /// If the number of capture groups can vary depending on the match, then
1364     /// this returns `None`. That is, a value is only returned when the number
1365     /// of matching groups is invariant or "static."
1366     ///
1367     /// Note that like [`Regex::captures_len`], this **does** include the
1368     /// implicit capturing group corresponding to the entire match. Therefore,
1369     /// when a non-None value is returned, it is guaranteed to be at least `1`.
1370     /// Stated differently, a return value of `Some(0)` is impossible.
1371     ///
1372     /// # Example
1373     ///
1374     /// This shows a few cases where a static number of capture groups is
1375     /// available and a few cases where it is not.
1376     ///
1377     /// ```
1378     /// use regex::Regex;
1379     ///
1380     /// let len = |pattern| {
1381     ///     Regex::new(pattern).map(|re| re.static_captures_len())
1382     /// };
1383     ///
1384     /// assert_eq!(Some(1), len("a")?);
1385     /// assert_eq!(Some(2), len("(a)")?);
1386     /// assert_eq!(Some(2), len("(a)|(b)")?);
1387     /// assert_eq!(Some(3), len("(a)(b)|(c)(d)")?);
1388     /// assert_eq!(None, len("(a)|b")?);
1389     /// assert_eq!(None, len("a|(b)")?);
1390     /// assert_eq!(None, len("(b)*")?);
1391     /// assert_eq!(Some(2), len("(b)+")?);
1392     ///
1393     /// # Ok::<(), Box<dyn std::error::Error>>(())
1394     /// ```
1395     #[inline]
static_captures_len(&self) -> Option<usize>1396     pub fn static_captures_len(&self) -> Option<usize> {
1397         self.meta.static_captures_len()
1398     }
1399 
1400     /// Returns a fresh allocated set of capture locations that can
1401     /// be reused in multiple calls to [`Regex::captures_read`] or
1402     /// [`Regex::captures_read_at`].
1403     ///
1404     /// The returned locations can be used for any subsequent search for this
1405     /// particular regex. There is no guarantee that it is correct to use for
1406     /// other regexes, even if they have the same number of capture groups.
1407     ///
1408     /// # Example
1409     ///
1410     /// ```
1411     /// use regex::Regex;
1412     ///
1413     /// let re = Regex::new(r"(.)(.)(\w+)").unwrap();
1414     /// let mut locs = re.capture_locations();
1415     /// assert!(re.captures_read(&mut locs, "Padron").is_some());
1416     /// assert_eq!(locs.get(0), Some((0, 6)));
1417     /// assert_eq!(locs.get(1), Some((0, 1)));
1418     /// assert_eq!(locs.get(2), Some((1, 2)));
1419     /// assert_eq!(locs.get(3), Some((2, 6)));
1420     /// ```
1421     #[inline]
capture_locations(&self) -> CaptureLocations1422     pub fn capture_locations(&self) -> CaptureLocations {
1423         CaptureLocations(self.meta.create_captures())
1424     }
1425 
1426     /// An alias for `capture_locations` to preserve backward compatibility.
1427     ///
1428     /// The `regex-capi` crate used this method, so to avoid breaking that
1429     /// crate, we continue to export it as an undocumented API.
1430     #[doc(hidden)]
1431     #[inline]
locations(&self) -> CaptureLocations1432     pub fn locations(&self) -> CaptureLocations {
1433         self.capture_locations()
1434     }
1435 }
1436 
1437 /// Represents a single match of a regex in a haystack.
1438 ///
1439 /// A `Match` contains both the start and end byte offsets of the match and the
1440 /// actual substring corresponding to the range of those byte offsets. It is
1441 /// guaranteed that `start <= end`. When `start == end`, the match is empty.
1442 ///
1443 /// Since this `Match` can only be produced by the top-level `Regex` APIs
1444 /// that only support searching UTF-8 encoded strings, the byte offsets for a
1445 /// `Match` are guaranteed to fall on valid UTF-8 codepoint boundaries. That
1446 /// is, slicing a `&str` with [`Match::range`] is guaranteed to never panic.
1447 ///
1448 /// Values with this type are created by [`Regex::find`] or
1449 /// [`Regex::find_iter`]. Other APIs can create `Match` values too. For
1450 /// example, [`Captures::get`].
1451 ///
1452 /// The lifetime parameter `'h` refers to the lifetime of the matched of the
1453 /// haystack that this match was produced from.
1454 ///
1455 /// # Numbering
1456 ///
1457 /// The byte offsets in a `Match` form a half-open interval. That is, the
1458 /// start of the range is inclusive and the end of the range is exclusive.
1459 /// For example, given a haystack `abcFOOxyz` and a match of `FOO`, its byte
1460 /// offset range starts at `3` and ends at `6`. `3` corresponds to `F` and
1461 /// `6` corresponds to `x`, which is one past the end of the match. This
1462 /// corresponds to the same kind of slicing that Rust uses.
1463 ///
1464 /// For more on why this was chosen over other schemes (aside from being
1465 /// consistent with how Rust the language works), see [this discussion] and
1466 /// [Dijkstra's note on a related topic][note].
1467 ///
1468 /// [this discussion]: https://github.com/rust-lang/regex/discussions/866
1469 /// [note]: https://www.cs.utexas.edu/users/EWD/transcriptions/EWD08xx/EWD831.html
1470 ///
1471 /// # Example
1472 ///
1473 /// This example shows the value of each of the methods on `Match` for a
1474 /// particular search.
1475 ///
1476 /// ```
1477 /// use regex::Regex;
1478 ///
1479 /// let re = Regex::new(r"\p{Greek}+").unwrap();
1480 /// let hay = "Greek: αβγδ";
1481 /// let m = re.find(hay).unwrap();
1482 /// assert_eq!(7, m.start());
1483 /// assert_eq!(15, m.end());
1484 /// assert!(!m.is_empty());
1485 /// assert_eq!(8, m.len());
1486 /// assert_eq!(7..15, m.range());
1487 /// assert_eq!("αβγδ", m.as_str());
1488 /// ```
1489 #[derive(Copy, Clone, Eq, PartialEq)]
1490 pub struct Match<'h> {
1491     haystack: &'h str,
1492     start: usize,
1493     end: usize,
1494 }
1495 
1496 impl<'h> Match<'h> {
1497     /// Returns the byte offset of the start of the match in the haystack. The
1498     /// start of the match corresponds to the position where the match begins
1499     /// and includes the first byte in the match.
1500     ///
1501     /// It is guaranteed that `Match::start() <= Match::end()`.
1502     ///
1503     /// This is guaranteed to fall on a valid UTF-8 codepoint boundary. That
1504     /// is, it will never be an offset that appears between the UTF-8 code
1505     /// units of a UTF-8 encoded Unicode scalar value. Consequently, it is
1506     /// always safe to slice the corresponding haystack using this offset.
1507     #[inline]
start(&self) -> usize1508     pub fn start(&self) -> usize {
1509         self.start
1510     }
1511 
1512     /// Returns the byte offset of the end of the match in the haystack. The
1513     /// end of the match corresponds to the byte immediately following the last
1514     /// byte in the match. This means that `&slice[start..end]` works as one
1515     /// would expect.
1516     ///
1517     /// It is guaranteed that `Match::start() <= Match::end()`.
1518     ///
1519     /// This is guaranteed to fall on a valid UTF-8 codepoint boundary. That
1520     /// is, it will never be an offset that appears between the UTF-8 code
1521     /// units of a UTF-8 encoded Unicode scalar value. Consequently, it is
1522     /// always safe to slice the corresponding haystack using this offset.
1523     #[inline]
end(&self) -> usize1524     pub fn end(&self) -> usize {
1525         self.end
1526     }
1527 
1528     /// Returns true if and only if this match has a length of zero.
1529     ///
1530     /// Note that an empty match can only occur when the regex itself can
1531     /// match the empty string. Here are some examples of regexes that can
1532     /// all match the empty string: `^`, `^$`, `\b`, `a?`, `a*`, `a{0}`,
1533     /// `(foo|\d+|quux)?`.
1534     #[inline]
is_empty(&self) -> bool1535     pub fn is_empty(&self) -> bool {
1536         self.start == self.end
1537     }
1538 
1539     /// Returns the length, in bytes, of this match.
1540     #[inline]
len(&self) -> usize1541     pub fn len(&self) -> usize {
1542         self.end - self.start
1543     }
1544 
1545     /// Returns the range over the starting and ending byte offsets of the
1546     /// match in the haystack.
1547     ///
1548     /// It is always correct to slice the original haystack searched with this
1549     /// range. That is, because the offsets are guaranteed to fall on valid
1550     /// UTF-8 boundaries, the range returned is always valid.
1551     #[inline]
range(&self) -> core::ops::Range<usize>1552     pub fn range(&self) -> core::ops::Range<usize> {
1553         self.start..self.end
1554     }
1555 
1556     /// Returns the substring of the haystack that matched.
1557     #[inline]
as_str(&self) -> &'h str1558     pub fn as_str(&self) -> &'h str {
1559         &self.haystack[self.range()]
1560     }
1561 
1562     /// Creates a new match from the given haystack and byte offsets.
1563     #[inline]
new(haystack: &'h str, start: usize, end: usize) -> Match<'h>1564     fn new(haystack: &'h str, start: usize, end: usize) -> Match<'h> {
1565         Match { haystack, start, end }
1566     }
1567 }
1568 
1569 impl<'h> core::fmt::Debug for Match<'h> {
fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result1570     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
1571         f.debug_struct("Match")
1572             .field("start", &self.start)
1573             .field("end", &self.end)
1574             .field("string", &self.as_str())
1575             .finish()
1576     }
1577 }
1578 
1579 impl<'h> From<Match<'h>> for &'h str {
from(m: Match<'h>) -> &'h str1580     fn from(m: Match<'h>) -> &'h str {
1581         m.as_str()
1582     }
1583 }
1584 
1585 impl<'h> From<Match<'h>> for core::ops::Range<usize> {
from(m: Match<'h>) -> core::ops::Range<usize>1586     fn from(m: Match<'h>) -> core::ops::Range<usize> {
1587         m.range()
1588     }
1589 }
1590 
1591 /// Represents the capture groups for a single match.
1592 ///
1593 /// Capture groups refer to parts of a regex enclosed in parentheses. They
1594 /// can be optionally named. The purpose of capture groups is to be able to
1595 /// reference different parts of a match based on the original pattern. In
1596 /// essence, a `Captures` is a container of [`Match`] values for each group
1597 /// that participated in a regex match. Each `Match` can be looked up by either
1598 /// its capture group index or name (if it has one).
1599 ///
1600 /// For example, say you want to match the individual letters in a 5-letter
1601 /// word:
1602 ///
1603 /// ```text
1604 /// (?<first>\w)(\w)(?:\w)\w(?<last>\w)
1605 /// ```
1606 ///
1607 /// This regex has 4 capture groups:
1608 ///
1609 /// * The group at index `0` corresponds to the overall match. It is always
1610 /// present in every match and never has a name.
1611 /// * The group at index `1` with name `first` corresponding to the first
1612 /// letter.
1613 /// * The group at index `2` with no name corresponding to the second letter.
1614 /// * The group at index `3` with name `last` corresponding to the fifth and
1615 /// last letter.
1616 ///
1617 /// Notice that `(?:\w)` was not listed above as a capture group despite it
1618 /// being enclosed in parentheses. That's because `(?:pattern)` is a special
1619 /// syntax that permits grouping but *without* capturing. The reason for not
1620 /// treating it as a capture is that tracking and reporting capture groups
1621 /// requires additional state that may lead to slower searches. So using as few
1622 /// capture groups as possible can help performance. (Although the difference
1623 /// in performance of a couple of capture groups is likely immaterial.)
1624 ///
1625 /// Values with this type are created by [`Regex::captures`] or
1626 /// [`Regex::captures_iter`].
1627 ///
1628 /// `'h` is the lifetime of the haystack that these captures were matched from.
1629 ///
1630 /// # Example
1631 ///
1632 /// ```
1633 /// use regex::Regex;
1634 ///
1635 /// let re = Regex::new(r"(?<first>\w)(\w)(?:\w)\w(?<last>\w)").unwrap();
1636 /// let caps = re.captures("toady").unwrap();
1637 /// assert_eq!("toady", &caps[0]);
1638 /// assert_eq!("t", &caps["first"]);
1639 /// assert_eq!("o", &caps[2]);
1640 /// assert_eq!("y", &caps["last"]);
1641 /// ```
1642 pub struct Captures<'h> {
1643     haystack: &'h str,
1644     caps: captures::Captures,
1645     static_captures_len: Option<usize>,
1646 }
1647 
1648 impl<'h> Captures<'h> {
1649     /// Returns the `Match` associated with the capture group at index `i`. If
1650     /// `i` does not correspond to a capture group, or if the capture group did
1651     /// not participate in the match, then `None` is returned.
1652     ///
1653     /// When `i == 0`, this is guaranteed to return a non-`None` value.
1654     ///
1655     /// # Examples
1656     ///
1657     /// Get the substring that matched with a default of an empty string if the
1658     /// group didn't participate in the match:
1659     ///
1660     /// ```
1661     /// use regex::Regex;
1662     ///
1663     /// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap();
1664     /// let caps = re.captures("abc123").unwrap();
1665     ///
1666     /// let substr1 = caps.get(1).map_or("", |m| m.as_str());
1667     /// let substr2 = caps.get(2).map_or("", |m| m.as_str());
1668     /// assert_eq!(substr1, "123");
1669     /// assert_eq!(substr2, "");
1670     /// ```
1671     #[inline]
get(&self, i: usize) -> Option<Match<'h>>1672     pub fn get(&self, i: usize) -> Option<Match<'h>> {
1673         self.caps
1674             .get_group(i)
1675             .map(|sp| Match::new(self.haystack, sp.start, sp.end))
1676     }
1677 
1678     /// Returns the `Match` associated with the capture group named `name`. If
1679     /// `name` isn't a valid capture group or it refers to a group that didn't
1680     /// match, then `None` is returned.
1681     ///
1682     /// Note that unlike `caps["name"]`, this returns a `Match` whose lifetime
1683     /// matches the lifetime of the haystack in this `Captures` value.
1684     /// Conversely, the substring returned by `caps["name"]` has a lifetime
1685     /// of the `Captures` value, which is likely shorter than the lifetime of
1686     /// the haystack. In some cases, it may be necessary to use this method to
1687     /// access the matching substring instead of the `caps["name"]` notation.
1688     ///
1689     /// # Examples
1690     ///
1691     /// Get the substring that matched with a default of an empty string if the
1692     /// group didn't participate in the match:
1693     ///
1694     /// ```
1695     /// use regex::Regex;
1696     ///
1697     /// let re = Regex::new(
1698     ///     r"[a-z]+(?:(?<numbers>[0-9]+)|(?<letters>[A-Z]+))",
1699     /// ).unwrap();
1700     /// let caps = re.captures("abc123").unwrap();
1701     ///
1702     /// let numbers = caps.name("numbers").map_or("", |m| m.as_str());
1703     /// let letters = caps.name("letters").map_or("", |m| m.as_str());
1704     /// assert_eq!(numbers, "123");
1705     /// assert_eq!(letters, "");
1706     /// ```
1707     #[inline]
name(&self, name: &str) -> Option<Match<'h>>1708     pub fn name(&self, name: &str) -> Option<Match<'h>> {
1709         self.caps
1710             .get_group_by_name(name)
1711             .map(|sp| Match::new(self.haystack, sp.start, sp.end))
1712     }
1713 
1714     /// This is a convenience routine for extracting the substrings
1715     /// corresponding to matching capture groups.
1716     ///
1717     /// This returns a tuple where the first element corresponds to the full
1718     /// substring of the haystack that matched the regex. The second element is
1719     /// an array of substrings, with each corresponding to the to the substring
1720     /// that matched for a particular capture group.
1721     ///
1722     /// # Panics
1723     ///
1724     /// This panics if the number of possible matching groups in this
1725     /// `Captures` value is not fixed to `N` in all circumstances.
1726     /// More precisely, this routine only works when `N` is equivalent to
1727     /// [`Regex::static_captures_len`].
1728     ///
1729     /// Stated more plainly, if the number of matching capture groups in a
1730     /// regex can vary from match to match, then this function always panics.
1731     ///
1732     /// For example, `(a)(b)|(c)` could produce two matching capture groups
1733     /// or one matching capture group for any given match. Therefore, one
1734     /// cannot use `extract` with such a pattern.
1735     ///
1736     /// But a pattern like `(a)(b)|(c)(d)` can be used with `extract` because
1737     /// the number of capture groups in every match is always equivalent,
1738     /// even if the capture _indices_ in each match are not.
1739     ///
1740     /// # Example
1741     ///
1742     /// ```
1743     /// use regex::Regex;
1744     ///
1745     /// let re = Regex::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})").unwrap();
1746     /// let hay = "On 2010-03-14, I became a Tenneessee lamb.";
1747     /// let Some((full, [year, month, day])) =
1748     ///     re.captures(hay).map(|caps| caps.extract()) else { return };
1749     /// assert_eq!("2010-03-14", full);
1750     /// assert_eq!("2010", year);
1751     /// assert_eq!("03", month);
1752     /// assert_eq!("14", day);
1753     /// ```
1754     ///
1755     /// # Example: iteration
1756     ///
1757     /// This example shows how to use this method when iterating over all
1758     /// `Captures` matches in a haystack.
1759     ///
1760     /// ```
1761     /// use regex::Regex;
1762     ///
1763     /// let re = Regex::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})").unwrap();
1764     /// let hay = "1973-01-05, 1975-08-25 and 1980-10-18";
1765     ///
1766     /// let mut dates: Vec<(&str, &str, &str)> = vec![];
1767     /// for (_, [y, m, d]) in re.captures_iter(hay).map(|c| c.extract()) {
1768     ///     dates.push((y, m, d));
1769     /// }
1770     /// assert_eq!(dates, vec![
1771     ///     ("1973", "01", "05"),
1772     ///     ("1975", "08", "25"),
1773     ///     ("1980", "10", "18"),
1774     /// ]);
1775     /// ```
1776     ///
1777     /// # Example: parsing different formats
1778     ///
1779     /// This API is particularly useful when you need to extract a particular
1780     /// value that might occur in a different format. Consider, for example,
1781     /// an identifier that might be in double quotes or single quotes:
1782     ///
1783     /// ```
1784     /// use regex::Regex;
1785     ///
1786     /// let re = Regex::new(r#"id:(?:"([^"]+)"|'([^']+)')"#).unwrap();
1787     /// let hay = r#"The first is id:"foo" and the second is id:'bar'."#;
1788     /// let mut ids = vec![];
1789     /// for (_, [id]) in re.captures_iter(hay).map(|c| c.extract()) {
1790     ///     ids.push(id);
1791     /// }
1792     /// assert_eq!(ids, vec!["foo", "bar"]);
1793     /// ```
extract<const N: usize>(&self) -> (&'h str, [&'h str; N])1794     pub fn extract<const N: usize>(&self) -> (&'h str, [&'h str; N]) {
1795         let len = self
1796             .static_captures_len
1797             .expect("number of capture groups can vary in a match")
1798             .checked_sub(1)
1799             .expect("number of groups is always greater than zero");
1800         assert_eq!(N, len, "asked for {} groups, but must ask for {}", N, len);
1801         // The regex-automata variant of extract is a bit more permissive.
1802         // It doesn't require the number of matching capturing groups to be
1803         // static, and you can even request fewer groups than what's there. So
1804         // this is guaranteed to never panic because we've asserted above that
1805         // the user has requested precisely the number of groups that must be
1806         // present in any match for this regex.
1807         self.caps.extract(self.haystack)
1808     }
1809 
1810     /// Expands all instances of `$ref` in `replacement` to the corresponding
1811     /// capture group, and writes them to the `dst` buffer given. A `ref` can
1812     /// be a capture group index or a name. If `ref` doesn't refer to a capture
1813     /// group that participated in the match, then it is replaced with the
1814     /// empty string.
1815     ///
1816     /// # Format
1817     ///
1818     /// The format of the replacement string supports two different kinds of
1819     /// capture references: unbraced and braced.
1820     ///
1821     /// For the unbraced format, the format supported is `$ref` where `name`
1822     /// can be any character in the class `[0-9A-Za-z_]`. `ref` is always
1823     /// the longest possible parse. So for example, `$1a` corresponds to the
1824     /// capture group named `1a` and not the capture group at index `1`. If
1825     /// `ref` matches `^[0-9]+$`, then it is treated as a capture group index
1826     /// itself and not a name.
1827     ///
1828     /// For the braced format, the format supported is `${ref}` where `ref` can
1829     /// be any sequence of bytes except for `}`. If no closing brace occurs,
1830     /// then it is not considered a capture reference. As with the unbraced
1831     /// format, if `ref` matches `^[0-9]+$`, then it is treated as a capture
1832     /// group index and not a name.
1833     ///
1834     /// The braced format is useful for exerting precise control over the name
1835     /// of the capture reference. For example, `${1}a` corresponds to the
1836     /// capture group reference `1` followed by the letter `a`, where as `$1a`
1837     /// (as mentioned above) corresponds to the capture group reference `1a`.
1838     /// The braced format is also useful for expressing capture group names
1839     /// that use characters not supported by the unbraced format. For example,
1840     /// `${foo[bar].baz}` refers to the capture group named `foo[bar].baz`.
1841     ///
1842     /// If a capture group reference is found and it does not refer to a valid
1843     /// capture group, then it will be replaced with the empty string.
1844     ///
1845     /// To write a literal `$`, use `$$`.
1846     ///
1847     /// # Example
1848     ///
1849     /// ```
1850     /// use regex::Regex;
1851     ///
1852     /// let re = Regex::new(
1853     ///     r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})",
1854     /// ).unwrap();
1855     /// let hay = "On 14-03-2010, I became a Tenneessee lamb.";
1856     /// let caps = re.captures(hay).unwrap();
1857     ///
1858     /// let mut dst = String::new();
1859     /// caps.expand("year=$year, month=$month, day=$day", &mut dst);
1860     /// assert_eq!(dst, "year=2010, month=03, day=14");
1861     /// ```
1862     #[inline]
expand(&self, replacement: &str, dst: &mut String)1863     pub fn expand(&self, replacement: &str, dst: &mut String) {
1864         self.caps.interpolate_string_into(self.haystack, replacement, dst);
1865     }
1866 
1867     /// Returns an iterator over all capture groups. This includes both
1868     /// matching and non-matching groups.
1869     ///
1870     /// The iterator always yields at least one matching group: the first group
1871     /// (at index `0`) with no name. Subsequent groups are returned in the order
1872     /// of their opening parenthesis in the regex.
1873     ///
1874     /// The elements yielded have type `Option<Match<'h>>`, where a non-`None`
1875     /// value is present if the capture group matches.
1876     ///
1877     /// # Example
1878     ///
1879     /// ```
1880     /// use regex::Regex;
1881     ///
1882     /// let re = Regex::new(r"(\w)(\d)?(\w)").unwrap();
1883     /// let caps = re.captures("AZ").unwrap();
1884     ///
1885     /// let mut it = caps.iter();
1886     /// assert_eq!(it.next().unwrap().map(|m| m.as_str()), Some("AZ"));
1887     /// assert_eq!(it.next().unwrap().map(|m| m.as_str()), Some("A"));
1888     /// assert_eq!(it.next().unwrap().map(|m| m.as_str()), None);
1889     /// assert_eq!(it.next().unwrap().map(|m| m.as_str()), Some("Z"));
1890     /// assert_eq!(it.next(), None);
1891     /// ```
1892     #[inline]
iter<'c>(&'c self) -> SubCaptureMatches<'c, 'h>1893     pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 'h> {
1894         SubCaptureMatches { haystack: self.haystack, it: self.caps.iter() }
1895     }
1896 
1897     /// Returns the total number of capture groups. This includes both
1898     /// matching and non-matching groups.
1899     ///
1900     /// The length returned is always equivalent to the number of elements
1901     /// yielded by [`Captures::iter`]. Consequently, the length is always
1902     /// greater than zero since every `Captures` value always includes the
1903     /// match for the entire regex.
1904     ///
1905     /// # Example
1906     ///
1907     /// ```
1908     /// use regex::Regex;
1909     ///
1910     /// let re = Regex::new(r"(\w)(\d)?(\w)").unwrap();
1911     /// let caps = re.captures("AZ").unwrap();
1912     /// assert_eq!(caps.len(), 4);
1913     /// ```
1914     #[inline]
len(&self) -> usize1915     pub fn len(&self) -> usize {
1916         self.caps.group_len()
1917     }
1918 }
1919 
1920 impl<'h> core::fmt::Debug for Captures<'h> {
fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result1921     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1922         /// A little helper type to provide a nice map-like debug
1923         /// representation for our capturing group spans.
1924         ///
1925         /// regex-automata has something similar, but it includes the pattern
1926         /// ID in its debug output, which is confusing. It also doesn't include
1927         /// that strings that match because a regex-automata `Captures` doesn't
1928         /// borrow the haystack.
1929         struct CapturesDebugMap<'a> {
1930             caps: &'a Captures<'a>,
1931         }
1932 
1933         impl<'a> core::fmt::Debug for CapturesDebugMap<'a> {
1934             fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
1935                 let mut map = f.debug_map();
1936                 let names =
1937                     self.caps.caps.group_info().pattern_names(PatternID::ZERO);
1938                 for (group_index, maybe_name) in names.enumerate() {
1939                     let key = Key(group_index, maybe_name);
1940                     match self.caps.get(group_index) {
1941                         None => map.entry(&key, &None::<()>),
1942                         Some(mat) => map.entry(&key, &Value(mat)),
1943                     };
1944                 }
1945                 map.finish()
1946             }
1947         }
1948 
1949         struct Key<'a>(usize, Option<&'a str>);
1950 
1951         impl<'a> core::fmt::Debug for Key<'a> {
1952             fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
1953                 write!(f, "{}", self.0)?;
1954                 if let Some(name) = self.1 {
1955                     write!(f, "/{:?}", name)?;
1956                 }
1957                 Ok(())
1958             }
1959         }
1960 
1961         struct Value<'a>(Match<'a>);
1962 
1963         impl<'a> core::fmt::Debug for Value<'a> {
1964             fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
1965                 write!(
1966                     f,
1967                     "{}..{}/{:?}",
1968                     self.0.start(),
1969                     self.0.end(),
1970                     self.0.as_str()
1971                 )
1972             }
1973         }
1974 
1975         f.debug_tuple("Captures")
1976             .field(&CapturesDebugMap { caps: self })
1977             .finish()
1978     }
1979 }
1980 
1981 /// Get a matching capture group's haystack substring by index.
1982 ///
1983 /// The haystack substring returned can't outlive the `Captures` object if this
1984 /// method is used, because of how `Index` is defined (normally `a[i]` is part
1985 /// of `a` and can't outlive it). To work around this limitation, do that, use
1986 /// [`Captures::get`] instead.
1987 ///
1988 /// `'h` is the lifetime of the matched haystack, but the lifetime of the
1989 /// `&str` returned by this implementation is the lifetime of the `Captures`
1990 /// value itself.
1991 ///
1992 /// # Panics
1993 ///
1994 /// If there is no matching group at the given index.
1995 impl<'h> core::ops::Index<usize> for Captures<'h> {
1996     type Output = str;
1997 
1998     // The lifetime is written out to make it clear that the &str returned
1999     // does NOT have a lifetime equivalent to 'h.
index<'a>(&'a self, i: usize) -> &'a str2000     fn index<'a>(&'a self, i: usize) -> &'a str {
2001         self.get(i)
2002             .map(|m| m.as_str())
2003             .unwrap_or_else(|| panic!("no group at index '{}'", i))
2004     }
2005 }
2006 
2007 /// Get a matching capture group's haystack substring by name.
2008 ///
2009 /// The haystack substring returned can't outlive the `Captures` object if this
2010 /// method is used, because of how `Index` is defined (normally `a[i]` is part
2011 /// of `a` and can't outlive it). To work around this limitation, do that, use
2012 /// [`Captures::name`] instead.
2013 ///
2014 /// `'h` is the lifetime of the matched haystack, but the lifetime of the
2015 /// `&str` returned by this implementation is the lifetime of the `Captures`
2016 /// value itself.
2017 ///
2018 /// `'n` is the lifetime of the group name used to index the `Captures` value.
2019 ///
2020 /// # Panics
2021 ///
2022 /// If there is no matching group at the given name.
2023 impl<'h, 'n> core::ops::Index<&'n str> for Captures<'h> {
2024     type Output = str;
2025 
index<'a>(&'a self, name: &'n str) -> &'a str2026     fn index<'a>(&'a self, name: &'n str) -> &'a str {
2027         self.name(name)
2028             .map(|m| m.as_str())
2029             .unwrap_or_else(|| panic!("no group named '{}'", name))
2030     }
2031 }
2032 
2033 /// A low level representation of the byte offsets of each capture group.
2034 ///
2035 /// You can think of this as a lower level [`Captures`], where this type does
2036 /// not support named capturing groups directly and it does not borrow the
2037 /// haystack that these offsets were matched on.
2038 ///
2039 /// Primarily, this type is useful when using the lower level `Regex` APIs such
2040 /// as [`Regex::captures_read`], which permits amortizing the allocation in
2041 /// which capture match offsets are stored.
2042 ///
2043 /// In order to build a value of this type, you'll need to call the
2044 /// [`Regex::capture_locations`] method. The value returned can then be reused
2045 /// in subsequent searches for that regex. Using it for other regexes may
2046 /// result in a panic or otherwise incorrect results.
2047 ///
2048 /// # Example
2049 ///
2050 /// This example shows how to create and use `CaptureLocations` in a search.
2051 ///
2052 /// ```
2053 /// use regex::Regex;
2054 ///
2055 /// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap();
2056 /// let mut locs = re.capture_locations();
2057 /// let m = re.captures_read(&mut locs, "Bruce Springsteen").unwrap();
2058 /// assert_eq!(0..17, m.range());
2059 /// assert_eq!(Some((0, 17)), locs.get(0));
2060 /// assert_eq!(Some((0, 5)), locs.get(1));
2061 /// assert_eq!(Some((6, 17)), locs.get(2));
2062 ///
2063 /// // Asking for an invalid capture group always returns None.
2064 /// assert_eq!(None, locs.get(3));
2065 /// # // literals are too big for 32-bit usize: #1041
2066 /// # #[cfg(target_pointer_width = "64")]
2067 /// assert_eq!(None, locs.get(34973498648));
2068 /// # #[cfg(target_pointer_width = "64")]
2069 /// assert_eq!(None, locs.get(9944060567225171988));
2070 /// ```
2071 #[derive(Clone, Debug)]
2072 pub struct CaptureLocations(captures::Captures);
2073 
2074 /// A type alias for `CaptureLocations` for backwards compatibility.
2075 ///
2076 /// Previously, we exported `CaptureLocations` as `Locations` in an
2077 /// undocumented API. To prevent breaking that code (e.g., in `regex-capi`),
2078 /// we continue re-exporting the same undocumented API.
2079 #[doc(hidden)]
2080 pub type Locations = CaptureLocations;
2081 
2082 impl CaptureLocations {
2083     /// Returns the start and end byte offsets of the capture group at index
2084     /// `i`. This returns `None` if `i` is not a valid capture group or if the
2085     /// capture group did not match.
2086     ///
2087     /// # Example
2088     ///
2089     /// ```
2090     /// use regex::Regex;
2091     ///
2092     /// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap();
2093     /// let mut locs = re.capture_locations();
2094     /// re.captures_read(&mut locs, "Bruce Springsteen").unwrap();
2095     /// assert_eq!(Some((0, 17)), locs.get(0));
2096     /// assert_eq!(Some((0, 5)), locs.get(1));
2097     /// assert_eq!(Some((6, 17)), locs.get(2));
2098     /// ```
2099     #[inline]
get(&self, i: usize) -> Option<(usize, usize)>2100     pub fn get(&self, i: usize) -> Option<(usize, usize)> {
2101         self.0.get_group(i).map(|sp| (sp.start, sp.end))
2102     }
2103 
2104     /// Returns the total number of capture groups (even if they didn't match).
2105     /// That is, the length returned is unaffected by the result of a search.
2106     ///
2107     /// This is always at least `1` since every regex has at least `1`
2108     /// capturing group that corresponds to the entire match.
2109     ///
2110     /// # Example
2111     ///
2112     /// ```
2113     /// use regex::Regex;
2114     ///
2115     /// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap();
2116     /// let mut locs = re.capture_locations();
2117     /// assert_eq!(3, locs.len());
2118     /// re.captures_read(&mut locs, "Bruce Springsteen").unwrap();
2119     /// assert_eq!(3, locs.len());
2120     /// ```
2121     ///
2122     /// Notice that the length is always at least `1`, regardless of the regex:
2123     ///
2124     /// ```
2125     /// use regex::Regex;
2126     ///
2127     /// let re = Regex::new(r"").unwrap();
2128     /// let locs = re.capture_locations();
2129     /// assert_eq!(1, locs.len());
2130     ///
2131     /// // [a&&b] is a regex that never matches anything.
2132     /// let re = Regex::new(r"[a&&b]").unwrap();
2133     /// let locs = re.capture_locations();
2134     /// assert_eq!(1, locs.len());
2135     /// ```
2136     #[inline]
len(&self) -> usize2137     pub fn len(&self) -> usize {
2138         // self.0.group_len() returns 0 if the underlying captures doesn't
2139         // represent a match, but the behavior guaranteed for this method is
2140         // that the length doesn't change based on a match or not.
2141         self.0.group_info().group_len(PatternID::ZERO)
2142     }
2143 
2144     /// An alias for the `get` method for backwards compatibility.
2145     ///
2146     /// Previously, we exported `get` as `pos` in an undocumented API. To
2147     /// prevent breaking that code (e.g., in `regex-capi`), we continue
2148     /// re-exporting the same undocumented API.
2149     #[doc(hidden)]
2150     #[inline]
pos(&self, i: usize) -> Option<(usize, usize)>2151     pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
2152         self.get(i)
2153     }
2154 }
2155 
2156 /// An iterator over all non-overlapping matches in a haystack.
2157 ///
2158 /// This iterator yields [`Match`] values. The iterator stops when no more
2159 /// matches can be found.
2160 ///
2161 /// `'r` is the lifetime of the compiled regular expression and `'h` is the
2162 /// lifetime of the haystack.
2163 ///
2164 /// This iterator is created by [`Regex::find_iter`].
2165 ///
2166 /// # Time complexity
2167 ///
2168 /// Note that since an iterator runs potentially many searches on the haystack
2169 /// and since each search has worst case `O(m * n)` time complexity, the
2170 /// overall worst case time complexity for iteration is `O(m * n^2)`.
2171 #[derive(Debug)]
2172 pub struct Matches<'r, 'h> {
2173     haystack: &'h str,
2174     it: meta::FindMatches<'r, 'h>,
2175 }
2176 
2177 impl<'r, 'h> Iterator for Matches<'r, 'h> {
2178     type Item = Match<'h>;
2179 
2180     #[inline]
next(&mut self) -> Option<Match<'h>>2181     fn next(&mut self) -> Option<Match<'h>> {
2182         self.it
2183             .next()
2184             .map(|sp| Match::new(self.haystack, sp.start(), sp.end()))
2185     }
2186 
2187     #[inline]
count(self) -> usize2188     fn count(self) -> usize {
2189         // This can actually be up to 2x faster than calling `next()` until
2190         // completion, because counting matches when using a DFA only requires
2191         // finding the end of each match. But returning a `Match` via `next()`
2192         // requires the start of each match which, with a DFA, requires a
2193         // reverse forward scan to find it.
2194         self.it.count()
2195     }
2196 }
2197 
2198 impl<'r, 'h> core::iter::FusedIterator for Matches<'r, 'h> {}
2199 
2200 /// An iterator over all non-overlapping capture matches in a haystack.
2201 ///
2202 /// This iterator yields [`Captures`] values. The iterator stops when no more
2203 /// matches can be found.
2204 ///
2205 /// `'r` is the lifetime of the compiled regular expression and `'h` is the
2206 /// lifetime of the matched string.
2207 ///
2208 /// This iterator is created by [`Regex::captures_iter`].
2209 ///
2210 /// # Time complexity
2211 ///
2212 /// Note that since an iterator runs potentially many searches on the haystack
2213 /// and since each search has worst case `O(m * n)` time complexity, the
2214 /// overall worst case time complexity for iteration is `O(m * n^2)`.
2215 #[derive(Debug)]
2216 pub struct CaptureMatches<'r, 'h> {
2217     haystack: &'h str,
2218     it: meta::CapturesMatches<'r, 'h>,
2219 }
2220 
2221 impl<'r, 'h> Iterator for CaptureMatches<'r, 'h> {
2222     type Item = Captures<'h>;
2223 
2224     #[inline]
next(&mut self) -> Option<Captures<'h>>2225     fn next(&mut self) -> Option<Captures<'h>> {
2226         let static_captures_len = self.it.regex().static_captures_len();
2227         self.it.next().map(|caps| Captures {
2228             haystack: self.haystack,
2229             caps,
2230             static_captures_len,
2231         })
2232     }
2233 
2234     #[inline]
count(self) -> usize2235     fn count(self) -> usize {
2236         // This can actually be up to 2x faster than calling `next()` until
2237         // completion, because counting matches when using a DFA only requires
2238         // finding the end of each match. But returning a `Match` via `next()`
2239         // requires the start of each match which, with a DFA, requires a
2240         // reverse forward scan to find it.
2241         self.it.count()
2242     }
2243 }
2244 
2245 impl<'r, 'h> core::iter::FusedIterator for CaptureMatches<'r, 'h> {}
2246 
2247 /// An iterator over all substrings delimited by a regex match.
2248 ///
2249 /// `'r` is the lifetime of the compiled regular expression and `'h` is the
2250 /// lifetime of the byte string being split.
2251 ///
2252 /// This iterator is created by [`Regex::split`].
2253 ///
2254 /// # Time complexity
2255 ///
2256 /// Note that since an iterator runs potentially many searches on the haystack
2257 /// and since each search has worst case `O(m * n)` time complexity, the
2258 /// overall worst case time complexity for iteration is `O(m * n^2)`.
2259 #[derive(Debug)]
2260 pub struct Split<'r, 'h> {
2261     haystack: &'h str,
2262     it: meta::Split<'r, 'h>,
2263 }
2264 
2265 impl<'r, 'h> Iterator for Split<'r, 'h> {
2266     type Item = &'h str;
2267 
2268     #[inline]
next(&mut self) -> Option<&'h str>2269     fn next(&mut self) -> Option<&'h str> {
2270         self.it.next().map(|span| &self.haystack[span])
2271     }
2272 }
2273 
2274 impl<'r, 'h> core::iter::FusedIterator for Split<'r, 'h> {}
2275 
2276 /// An iterator over at most `N` substrings delimited by a regex match.
2277 ///
2278 /// The last substring yielded by this iterator will be whatever remains after
2279 /// `N-1` splits.
2280 ///
2281 /// `'r` is the lifetime of the compiled regular expression and `'h` is the
2282 /// lifetime of the byte string being split.
2283 ///
2284 /// This iterator is created by [`Regex::splitn`].
2285 ///
2286 /// # Time complexity
2287 ///
2288 /// Note that since an iterator runs potentially many searches on the haystack
2289 /// and since each search has worst case `O(m * n)` time complexity, the
2290 /// overall worst case time complexity for iteration is `O(m * n^2)`.
2291 ///
2292 /// Although note that the worst case time here has an upper bound given
2293 /// by the `limit` parameter to [`Regex::splitn`].
2294 #[derive(Debug)]
2295 pub struct SplitN<'r, 'h> {
2296     haystack: &'h str,
2297     it: meta::SplitN<'r, 'h>,
2298 }
2299 
2300 impl<'r, 'h> Iterator for SplitN<'r, 'h> {
2301     type Item = &'h str;
2302 
2303     #[inline]
next(&mut self) -> Option<&'h str>2304     fn next(&mut self) -> Option<&'h str> {
2305         self.it.next().map(|span| &self.haystack[span])
2306     }
2307 
2308     #[inline]
size_hint(&self) -> (usize, Option<usize>)2309     fn size_hint(&self) -> (usize, Option<usize>) {
2310         self.it.size_hint()
2311     }
2312 }
2313 
2314 impl<'r, 'h> core::iter::FusedIterator for SplitN<'r, 'h> {}
2315 
2316 /// An iterator over the names of all capture groups in a regex.
2317 ///
2318 /// This iterator yields values of type `Option<&str>` in order of the opening
2319 /// capture group parenthesis in the regex pattern. `None` is yielded for
2320 /// groups with no name. The first element always corresponds to the implicit
2321 /// and unnamed group for the overall match.
2322 ///
2323 /// `'r` is the lifetime of the compiled regular expression.
2324 ///
2325 /// This iterator is created by [`Regex::capture_names`].
2326 #[derive(Clone, Debug)]
2327 pub struct CaptureNames<'r>(captures::GroupInfoPatternNames<'r>);
2328 
2329 impl<'r> Iterator for CaptureNames<'r> {
2330     type Item = Option<&'r str>;
2331 
2332     #[inline]
next(&mut self) -> Option<Option<&'r str>>2333     fn next(&mut self) -> Option<Option<&'r str>> {
2334         self.0.next()
2335     }
2336 
2337     #[inline]
size_hint(&self) -> (usize, Option<usize>)2338     fn size_hint(&self) -> (usize, Option<usize>) {
2339         self.0.size_hint()
2340     }
2341 
2342     #[inline]
count(self) -> usize2343     fn count(self) -> usize {
2344         self.0.count()
2345     }
2346 }
2347 
2348 impl<'r> ExactSizeIterator for CaptureNames<'r> {}
2349 
2350 impl<'r> core::iter::FusedIterator for CaptureNames<'r> {}
2351 
2352 /// An iterator over all group matches in a [`Captures`] value.
2353 ///
2354 /// This iterator yields values of type `Option<Match<'h>>`, where `'h` is the
2355 /// lifetime of the haystack that the matches are for. The order of elements
2356 /// yielded corresponds to the order of the opening parenthesis for the group
2357 /// in the regex pattern. `None` is yielded for groups that did not participate
2358 /// in the match.
2359 ///
2360 /// The first element always corresponds to the implicit group for the overall
2361 /// match. Since this iterator is created by a [`Captures`] value, and a
2362 /// `Captures` value is only created when a match occurs, it follows that the
2363 /// first element yielded by this iterator is guaranteed to be non-`None`.
2364 ///
2365 /// The lifetime `'c` corresponds to the lifetime of the `Captures` value that
2366 /// created this iterator, and the lifetime `'h` corresponds to the originally
2367 /// matched haystack.
2368 #[derive(Clone, Debug)]
2369 pub struct SubCaptureMatches<'c, 'h> {
2370     haystack: &'h str,
2371     it: captures::CapturesPatternIter<'c>,
2372 }
2373 
2374 impl<'c, 'h> Iterator for SubCaptureMatches<'c, 'h> {
2375     type Item = Option<Match<'h>>;
2376 
2377     #[inline]
next(&mut self) -> Option<Option<Match<'h>>>2378     fn next(&mut self) -> Option<Option<Match<'h>>> {
2379         self.it.next().map(|group| {
2380             group.map(|sp| Match::new(self.haystack, sp.start, sp.end))
2381         })
2382     }
2383 
2384     #[inline]
size_hint(&self) -> (usize, Option<usize>)2385     fn size_hint(&self) -> (usize, Option<usize>) {
2386         self.it.size_hint()
2387     }
2388 
2389     #[inline]
count(self) -> usize2390     fn count(self) -> usize {
2391         self.it.count()
2392     }
2393 }
2394 
2395 impl<'c, 'h> ExactSizeIterator for SubCaptureMatches<'c, 'h> {}
2396 
2397 impl<'c, 'h> core::iter::FusedIterator for SubCaptureMatches<'c, 'h> {}
2398 
2399 /// A trait for types that can be used to replace matches in a haystack.
2400 ///
2401 /// In general, users of this crate shouldn't need to implement this trait,
2402 /// since implementations are already provided for `&str` along with other
2403 /// variants of string types, as well as `FnMut(&Captures) -> String` (or any
2404 /// `FnMut(&Captures) -> T` where `T: AsRef<str>`). Those cover most use cases,
2405 /// but callers can implement this trait directly if necessary.
2406 ///
2407 /// # Example
2408 ///
2409 /// This example shows a basic implementation of  the `Replacer` trait. This
2410 /// can be done much more simply using the replacement string interpolation
2411 /// support (e.g., `$first $last`), but this approach avoids needing to parse
2412 /// the replacement string at all.
2413 ///
2414 /// ```
2415 /// use regex::{Captures, Regex, Replacer};
2416 ///
2417 /// struct NameSwapper;
2418 ///
2419 /// impl Replacer for NameSwapper {
2420 ///     fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
2421 ///         dst.push_str(&caps["first"]);
2422 ///         dst.push_str(" ");
2423 ///         dst.push_str(&caps["last"]);
2424 ///     }
2425 /// }
2426 ///
2427 /// let re = Regex::new(r"(?<last>[^,\s]+),\s+(?<first>\S+)").unwrap();
2428 /// let result = re.replace("Springsteen, Bruce", NameSwapper);
2429 /// assert_eq!(result, "Bruce Springsteen");
2430 /// ```
2431 pub trait Replacer {
2432     /// Appends possibly empty data to `dst` to replace the current match.
2433     ///
2434     /// The current match is represented by `caps`, which is guaranteed to
2435     /// have a match at capture group `0`.
2436     ///
2437     /// For example, a no-op replacement would be `dst.push_str(&caps[0])`.
replace_append(&mut self, caps: &Captures<'_>, dst: &mut String)2438     fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String);
2439 
2440     /// Return a fixed unchanging replacement string.
2441     ///
2442     /// When doing replacements, if access to [`Captures`] is not needed (e.g.,
2443     /// the replacement string does not need `$` expansion), then it can be
2444     /// beneficial to avoid finding sub-captures.
2445     ///
2446     /// In general, this is called once for every call to a replacement routine
2447     /// such as [`Regex::replace_all`].
no_expansion<'r>(&'r mut self) -> Option<Cow<'r, str>>2448     fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, str>> {
2449         None
2450     }
2451 
2452     /// Returns a type that implements `Replacer`, but that borrows and wraps
2453     /// this `Replacer`.
2454     ///
2455     /// This is useful when you want to take a generic `Replacer` (which might
2456     /// not be cloneable) and use it without consuming it, so it can be used
2457     /// more than once.
2458     ///
2459     /// # Example
2460     ///
2461     /// ```
2462     /// use regex::{Regex, Replacer};
2463     ///
2464     /// fn replace_all_twice<R: Replacer>(
2465     ///     re: Regex,
2466     ///     src: &str,
2467     ///     mut rep: R,
2468     /// ) -> String {
2469     ///     let dst = re.replace_all(src, rep.by_ref());
2470     ///     let dst = re.replace_all(&dst, rep.by_ref());
2471     ///     dst.into_owned()
2472     /// }
2473     /// ```
by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self>2474     fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> {
2475         ReplacerRef(self)
2476     }
2477 }
2478 
2479 impl<'a> Replacer for &'a str {
replace_append(&mut self, caps: &Captures<'_>, dst: &mut String)2480     fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
2481         caps.expand(*self, dst);
2482     }
2483 
no_expansion(&mut self) -> Option<Cow<'_, str>>2484     fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
2485         no_expansion(self)
2486     }
2487 }
2488 
2489 impl<'a> Replacer for &'a String {
replace_append(&mut self, caps: &Captures<'_>, dst: &mut String)2490     fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
2491         self.as_str().replace_append(caps, dst)
2492     }
2493 
no_expansion(&mut self) -> Option<Cow<'_, str>>2494     fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
2495         no_expansion(self)
2496     }
2497 }
2498 
2499 impl Replacer for String {
replace_append(&mut self, caps: &Captures<'_>, dst: &mut String)2500     fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
2501         self.as_str().replace_append(caps, dst)
2502     }
2503 
no_expansion(&mut self) -> Option<Cow<'_, str>>2504     fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
2505         no_expansion(self)
2506     }
2507 }
2508 
2509 impl<'a> Replacer for Cow<'a, str> {
replace_append(&mut self, caps: &Captures<'_>, dst: &mut String)2510     fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
2511         self.as_ref().replace_append(caps, dst)
2512     }
2513 
no_expansion(&mut self) -> Option<Cow<'_, str>>2514     fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
2515         no_expansion(self)
2516     }
2517 }
2518 
2519 impl<'a> Replacer for &'a Cow<'a, str> {
replace_append(&mut self, caps: &Captures<'_>, dst: &mut String)2520     fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
2521         self.as_ref().replace_append(caps, dst)
2522     }
2523 
no_expansion(&mut self) -> Option<Cow<'_, str>>2524     fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
2525         no_expansion(self)
2526     }
2527 }
2528 
2529 impl<F, T> Replacer for F
2530 where
2531     F: FnMut(&Captures<'_>) -> T,
2532     T: AsRef<str>,
2533 {
replace_append(&mut self, caps: &Captures<'_>, dst: &mut String)2534     fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
2535         dst.push_str((*self)(caps).as_ref());
2536     }
2537 }
2538 
2539 /// A by-reference adaptor for a [`Replacer`].
2540 ///
2541 /// This permits reusing the same `Replacer` value in multiple calls to a
2542 /// replacement routine like [`Regex::replace_all`].
2543 ///
2544 /// This type is created by [`Replacer::by_ref`].
2545 #[derive(Debug)]
2546 pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R);
2547 
2548 impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> {
replace_append(&mut self, caps: &Captures<'_>, dst: &mut String)2549     fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
2550         self.0.replace_append(caps, dst)
2551     }
2552 
no_expansion(&mut self) -> Option<Cow<'_, str>>2553     fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
2554         self.0.no_expansion()
2555     }
2556 }
2557 
2558 /// A helper type for forcing literal string replacement.
2559 ///
2560 /// It can be used with routines like [`Regex::replace`] and
2561 /// [`Regex::replace_all`] to do a literal string replacement without expanding
2562 /// `$name` to their corresponding capture groups. This can be both convenient
2563 /// (to avoid escaping `$`, for example) and faster (since capture groups
2564 /// don't need to be found).
2565 ///
2566 /// `'s` is the lifetime of the literal string to use.
2567 ///
2568 /// # Example
2569 ///
2570 /// ```
2571 /// use regex::{NoExpand, Regex};
2572 ///
2573 /// let re = Regex::new(r"(?<last>[^,\s]+),\s+(\S+)").unwrap();
2574 /// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last"));
2575 /// assert_eq!(result, "$2 $last");
2576 /// ```
2577 #[derive(Clone, Debug)]
2578 pub struct NoExpand<'s>(pub &'s str);
2579 
2580 impl<'s> Replacer for NoExpand<'s> {
replace_append(&mut self, _: &Captures<'_>, dst: &mut String)2581     fn replace_append(&mut self, _: &Captures<'_>, dst: &mut String) {
2582         dst.push_str(self.0);
2583     }
2584 
no_expansion(&mut self) -> Option<Cow<'_, str>>2585     fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
2586         Some(Cow::Borrowed(self.0))
2587     }
2588 }
2589 
2590 /// Quickly checks the given replacement string for whether interpolation
2591 /// should be done on it. It returns `None` if a `$` was found anywhere in the
2592 /// given string, which suggests interpolation needs to be done. But if there's
2593 /// no `$` anywhere, then interpolation definitely does not need to be done. In
2594 /// that case, the given string is returned as a borrowed `Cow`.
2595 ///
2596 /// This is meant to be used to implement the `Replacer::no_expandsion` method
2597 /// in its various trait impls.
no_expansion<T: AsRef<str>>(replacement: &T) -> Option<Cow<'_, str>>2598 fn no_expansion<T: AsRef<str>>(replacement: &T) -> Option<Cow<'_, str>> {
2599     let replacement = replacement.as_ref();
2600     match crate::find_byte::find_byte(b'$', replacement.as_bytes()) {
2601         Some(_) => None,
2602         None => Some(Cow::Borrowed(replacement)),
2603     }
2604 }
2605