1 /*!
2 This module defines 256-bit vector implementations of `memchr` and friends.
3 
4 The main types in this module are [`One`], [`Two`] and [`Three`]. They are for
5 searching for one, two or three distinct bytes, respectively, in a haystack.
6 Each type also has corresponding double ended iterators. These searchers are
7 typically much faster than scalar routines accomplishing the same task.
8 
9 The `One` searcher also provides a [`One::count`] routine for efficiently
10 counting the number of times a single byte occurs in a haystack. This is
11 useful, for example, for counting the number of lines in a haystack. This
12 routine exists because it is usually faster, especially with a high match
13 count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its
14 `Iterator::count` implementation to use this routine.)
15 
16 Only one, two and three bytes are supported because three bytes is about
17 the point where one sees diminishing returns. Beyond this point and it's
18 probably (but not necessarily) better to just use a simple `[bool; 256]` array
19 or similar. However, it depends mightily on the specific work-load and the
20 expected match frequency.
21 */
22 
23 use core::arch::x86_64::{__m128i, __m256i};
24 
25 use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector};
26 
27 /// Finds all occurrences of a single byte in a haystack.
28 #[derive(Clone, Copy, Debug)]
29 pub struct One {
30     /// Used for haystacks less than 32 bytes.
31     sse2: generic::One<__m128i>,
32     /// Used for haystacks bigger than 32 bytes.
33     avx2: generic::One<__m256i>,
34 }
35 
36 impl One {
37     /// Create a new searcher that finds occurrences of the needle byte given.
38     ///
39     /// This particular searcher is specialized to use AVX2 vector instructions
40     /// that typically make it quite fast. (SSE2 is used for haystacks that
41     /// are too short to accommodate an AVX2 vector.)
42     ///
43     /// If either SSE2 or AVX2 is unavailable in the current environment, then
44     /// `None` is returned.
45     #[inline]
new(needle: u8) -> Option<One>46     pub fn new(needle: u8) -> Option<One> {
47         if One::is_available() {
48             // SAFETY: we check that sse2 and avx2 are available above.
49             unsafe { Some(One::new_unchecked(needle)) }
50         } else {
51             None
52         }
53     }
54 
55     /// Create a new finder specific to AVX2 vectors and routines without
56     /// checking that either SSE2 or AVX2 is available.
57     ///
58     /// # Safety
59     ///
60     /// Callers must guarantee that it is safe to execute both `sse2` and
61     /// `avx2` instructions in the current environment.
62     ///
63     /// Note that it is a common misconception that if one compiles for an
64     /// `x86_64` target, then they therefore automatically have access to SSE2
65     /// instructions. While this is almost always the case, it isn't true in
66     /// 100% of cases.
67     #[target_feature(enable = "sse2", enable = "avx2")]
68     #[inline]
new_unchecked(needle: u8) -> One69     pub unsafe fn new_unchecked(needle: u8) -> One {
70         One {
71             sse2: generic::One::new(needle),
72             avx2: generic::One::new(needle),
73         }
74     }
75 
76     /// Returns true when this implementation is available in the current
77     /// environment.
78     ///
79     /// When this is true, it is guaranteed that [`One::new`] will return
80     /// a `Some` value. Similarly, when it is false, it is guaranteed that
81     /// `One::new` will return a `None` value.
82     ///
83     /// Note also that for the lifetime of a single program, if this returns
84     /// true then it will always return true.
85     #[inline]
is_available() -> bool86     pub fn is_available() -> bool {
87         #[cfg(not(target_feature = "sse2"))]
88         {
89             false
90         }
91         #[cfg(target_feature = "sse2")]
92         {
93             #[cfg(target_feature = "avx2")]
94             {
95                 true
96             }
97             #[cfg(not(target_feature = "avx2"))]
98             {
99                 #[cfg(feature = "std")]
100                 {
101                     std::is_x86_feature_detected!("avx2")
102                 }
103                 #[cfg(not(feature = "std"))]
104                 {
105                     false
106                 }
107             }
108         }
109     }
110 
111     /// Return the first occurrence of one of the needle bytes in the given
112     /// haystack. If no such occurrence exists, then `None` is returned.
113     ///
114     /// The occurrence is reported as an offset into `haystack`. Its maximum
115     /// value is `haystack.len() - 1`.
116     #[inline]
find(&self, haystack: &[u8]) -> Option<usize>117     pub fn find(&self, haystack: &[u8]) -> Option<usize> {
118         // SAFETY: `find_raw` guarantees that if a pointer is returned, it
119         // falls within the bounds of the start and end pointers.
120         unsafe {
121             generic::search_slice_with_raw(haystack, |s, e| {
122                 self.find_raw(s, e)
123             })
124         }
125     }
126 
127     /// Return the last occurrence of one of the needle bytes in the given
128     /// haystack. If no such occurrence exists, then `None` is returned.
129     ///
130     /// The occurrence is reported as an offset into `haystack`. Its maximum
131     /// value is `haystack.len() - 1`.
132     #[inline]
rfind(&self, haystack: &[u8]) -> Option<usize>133     pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
134         // SAFETY: `find_raw` guarantees that if a pointer is returned, it
135         // falls within the bounds of the start and end pointers.
136         unsafe {
137             generic::search_slice_with_raw(haystack, |s, e| {
138                 self.rfind_raw(s, e)
139             })
140         }
141     }
142 
143     /// Counts all occurrences of this byte in the given haystack.
144     #[inline]
count(&self, haystack: &[u8]) -> usize145     pub fn count(&self, haystack: &[u8]) -> usize {
146         // SAFETY: All of our pointers are derived directly from a borrowed
147         // slice, which is guaranteed to be valid.
148         unsafe {
149             let start = haystack.as_ptr();
150             let end = start.add(haystack.len());
151             self.count_raw(start, end)
152         }
153     }
154 
155     /// Like `find`, but accepts and returns raw pointers.
156     ///
157     /// When a match is found, the pointer returned is guaranteed to be
158     /// `>= start` and `< end`.
159     ///
160     /// This routine is useful if you're already using raw pointers and would
161     /// like to avoid converting back to a slice before executing a search.
162     ///
163     /// # Safety
164     ///
165     /// * Both `start` and `end` must be valid for reads.
166     /// * Both `start` and `end` must point to an initialized value.
167     /// * Both `start` and `end` must point to the same allocated object and
168     /// must either be in bounds or at most one byte past the end of the
169     /// allocated object.
170     /// * Both `start` and `end` must be _derived from_ a pointer to the same
171     /// object.
172     /// * The distance between `start` and `end` must not overflow `isize`.
173     /// * The distance being in bounds must not rely on "wrapping around" the
174     /// address space.
175     ///
176     /// Note that callers may pass a pair of pointers such that `start >= end`.
177     /// In that case, `None` will always be returned.
178     #[inline]
find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>179     pub unsafe fn find_raw(
180         &self,
181         start: *const u8,
182         end: *const u8,
183     ) -> Option<*const u8> {
184         if start >= end {
185             return None;
186         }
187         let len = end.distance(start);
188         if len < __m256i::BYTES {
189             return if len < __m128i::BYTES {
190                 // SAFETY: We require the caller to pass valid start/end
191                 // pointers.
192                 generic::fwd_byte_by_byte(start, end, |b| {
193                     b == self.sse2.needle1()
194                 })
195             } else {
196                 // SAFETY: We require the caller to pass valid start/end
197                 // pointers.
198                 self.find_raw_sse2(start, end)
199             };
200         }
201         // SAFETY: Building a `One` means it's safe to call both 'sse2' and
202         // 'avx2' routines. Also, we've checked that our haystack is big
203         // enough to run on the vector routine. Pointer validity is caller's
204         // responsibility.
205         //
206         // Note that we could call `self.avx2.find_raw` directly here. But that
207         // means we'd have to annotate this routine with `target_feature`.
208         // Which is fine, because this routine is `unsafe` anyway and the
209         // `target_feature` obligation is met by virtue of building a `One`.
210         // The real problem is that a routine with a `target_feature`
211         // annotation generally can't be inlined into caller code unless
212         // the caller code has the same target feature annotations. Namely,
213         // the common case (at time of writing) is for calling code to not
214         // have the `avx2` target feature enabled *at compile time*. Without
215         // `target_feature` on this routine, it can be inlined which will
216         // handle some of the short-haystack cases above without touching the
217         // architecture specific code.
218         self.find_raw_avx2(start, end)
219     }
220 
221     /// Like `rfind`, but accepts and returns raw pointers.
222     ///
223     /// When a match is found, the pointer returned is guaranteed to be
224     /// `>= start` and `< end`.
225     ///
226     /// This routine is useful if you're already using raw pointers and would
227     /// like to avoid converting back to a slice before executing a search.
228     ///
229     /// # Safety
230     ///
231     /// * Both `start` and `end` must be valid for reads.
232     /// * Both `start` and `end` must point to an initialized value.
233     /// * Both `start` and `end` must point to the same allocated object and
234     /// must either be in bounds or at most one byte past the end of the
235     /// allocated object.
236     /// * Both `start` and `end` must be _derived from_ a pointer to the same
237     /// object.
238     /// * The distance between `start` and `end` must not overflow `isize`.
239     /// * The distance being in bounds must not rely on "wrapping around" the
240     /// address space.
241     ///
242     /// Note that callers may pass a pair of pointers such that `start >= end`.
243     /// In that case, `None` will always be returned.
244     #[inline]
rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>245     pub unsafe fn rfind_raw(
246         &self,
247         start: *const u8,
248         end: *const u8,
249     ) -> Option<*const u8> {
250         if start >= end {
251             return None;
252         }
253         let len = end.distance(start);
254         if len < __m256i::BYTES {
255             return if len < __m128i::BYTES {
256                 // SAFETY: We require the caller to pass valid start/end
257                 // pointers.
258                 generic::rev_byte_by_byte(start, end, |b| {
259                     b == self.sse2.needle1()
260                 })
261             } else {
262                 // SAFETY: We require the caller to pass valid start/end
263                 // pointers.
264                 self.rfind_raw_sse2(start, end)
265             };
266         }
267         // SAFETY: Building a `One` means it's safe to call both 'sse2' and
268         // 'avx2' routines. Also, we've checked that our haystack is big
269         // enough to run on the vector routine. Pointer validity is caller's
270         // responsibility.
271         //
272         // See note in forward routine above for why we don't just call
273         // `self.avx2.rfind_raw` directly here.
274         self.rfind_raw_avx2(start, end)
275     }
276 
277     /// Counts all occurrences of this byte in the given haystack represented
278     /// by raw pointers.
279     ///
280     /// This routine is useful if you're already using raw pointers and would
281     /// like to avoid converting back to a slice before executing a search.
282     ///
283     /// # Safety
284     ///
285     /// * Both `start` and `end` must be valid for reads.
286     /// * Both `start` and `end` must point to an initialized value.
287     /// * Both `start` and `end` must point to the same allocated object and
288     /// must either be in bounds or at most one byte past the end of the
289     /// allocated object.
290     /// * Both `start` and `end` must be _derived from_ a pointer to the same
291     /// object.
292     /// * The distance between `start` and `end` must not overflow `isize`.
293     /// * The distance being in bounds must not rely on "wrapping around" the
294     /// address space.
295     ///
296     /// Note that callers may pass a pair of pointers such that `start >= end`.
297     /// In that case, `0` will always be returned.
298     #[inline]
count_raw(&self, start: *const u8, end: *const u8) -> usize299     pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize {
300         if start >= end {
301             return 0;
302         }
303         let len = end.distance(start);
304         if len < __m256i::BYTES {
305             return if len < __m128i::BYTES {
306                 // SAFETY: We require the caller to pass valid start/end
307                 // pointers.
308                 generic::count_byte_by_byte(start, end, |b| {
309                     b == self.sse2.needle1()
310                 })
311             } else {
312                 // SAFETY: We require the caller to pass valid start/end
313                 // pointers.
314                 self.count_raw_sse2(start, end)
315             };
316         }
317         // SAFETY: Building a `One` means it's safe to call both 'sse2' and
318         // 'avx2' routines. Also, we've checked that our haystack is big
319         // enough to run on the vector routine. Pointer validity is caller's
320         // responsibility.
321         self.count_raw_avx2(start, end)
322     }
323 
324     /// Execute a search using SSE2 vectors and routines.
325     ///
326     /// # Safety
327     ///
328     /// Same as [`One::find_raw`], except the distance between `start` and
329     /// `end` must be at least the size of an SSE2 vector (in bytes).
330     ///
331     /// (The target feature safety obligation is automatically fulfilled by
332     /// virtue of being a method on `One`, which can only be constructed
333     /// when it is safe to call `sse2`/`avx2` routines.)
334     #[target_feature(enable = "sse2")]
335     #[inline]
find_raw_sse2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>336     unsafe fn find_raw_sse2(
337         &self,
338         start: *const u8,
339         end: *const u8,
340     ) -> Option<*const u8> {
341         self.sse2.find_raw(start, end)
342     }
343 
344     /// Execute a search using SSE2 vectors and routines.
345     ///
346     /// # Safety
347     ///
348     /// Same as [`One::rfind_raw`], except the distance between `start` and
349     /// `end` must be at least the size of an SSE2 vector (in bytes).
350     ///
351     /// (The target feature safety obligation is automatically fulfilled by
352     /// virtue of being a method on `One`, which can only be constructed
353     /// when it is safe to call `sse2`/`avx2` routines.)
354     #[target_feature(enable = "sse2")]
355     #[inline]
rfind_raw_sse2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>356     unsafe fn rfind_raw_sse2(
357         &self,
358         start: *const u8,
359         end: *const u8,
360     ) -> Option<*const u8> {
361         self.sse2.rfind_raw(start, end)
362     }
363 
364     /// Execute a count using SSE2 vectors and routines.
365     ///
366     /// # Safety
367     ///
368     /// Same as [`One::count_raw`], except the distance between `start` and
369     /// `end` must be at least the size of an SSE2 vector (in bytes).
370     ///
371     /// (The target feature safety obligation is automatically fulfilled by
372     /// virtue of being a method on `One`, which can only be constructed
373     /// when it is safe to call `sse2`/`avx2` routines.)
374     #[target_feature(enable = "sse2")]
375     #[inline]
count_raw_sse2( &self, start: *const u8, end: *const u8, ) -> usize376     unsafe fn count_raw_sse2(
377         &self,
378         start: *const u8,
379         end: *const u8,
380     ) -> usize {
381         self.sse2.count_raw(start, end)
382     }
383 
384     /// Execute a search using AVX2 vectors and routines.
385     ///
386     /// # Safety
387     ///
388     /// Same as [`One::find_raw`], except the distance between `start` and
389     /// `end` must be at least the size of an AVX2 vector (in bytes).
390     ///
391     /// (The target feature safety obligation is automatically fulfilled by
392     /// virtue of being a method on `One`, which can only be constructed
393     /// when it is safe to call `sse2`/`avx2` routines.)
394     #[target_feature(enable = "avx2")]
395     #[inline]
find_raw_avx2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>396     unsafe fn find_raw_avx2(
397         &self,
398         start: *const u8,
399         end: *const u8,
400     ) -> Option<*const u8> {
401         self.avx2.find_raw(start, end)
402     }
403 
404     /// Execute a search using AVX2 vectors and routines.
405     ///
406     /// # Safety
407     ///
408     /// Same as [`One::rfind_raw`], except the distance between `start` and
409     /// `end` must be at least the size of an AVX2 vector (in bytes).
410     ///
411     /// (The target feature safety obligation is automatically fulfilled by
412     /// virtue of being a method on `One`, which can only be constructed
413     /// when it is safe to call `sse2`/`avx2` routines.)
414     #[target_feature(enable = "avx2")]
415     #[inline]
rfind_raw_avx2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>416     unsafe fn rfind_raw_avx2(
417         &self,
418         start: *const u8,
419         end: *const u8,
420     ) -> Option<*const u8> {
421         self.avx2.rfind_raw(start, end)
422     }
423 
424     /// Execute a count using AVX2 vectors and routines.
425     ///
426     /// # Safety
427     ///
428     /// Same as [`One::count_raw`], except the distance between `start` and
429     /// `end` must be at least the size of an AVX2 vector (in bytes).
430     ///
431     /// (The target feature safety obligation is automatically fulfilled by
432     /// virtue of being a method on `One`, which can only be constructed
433     /// when it is safe to call `sse2`/`avx2` routines.)
434     #[target_feature(enable = "avx2")]
435     #[inline]
count_raw_avx2( &self, start: *const u8, end: *const u8, ) -> usize436     unsafe fn count_raw_avx2(
437         &self,
438         start: *const u8,
439         end: *const u8,
440     ) -> usize {
441         self.avx2.count_raw(start, end)
442     }
443 
444     /// Returns an iterator over all occurrences of the needle byte in the
445     /// given haystack.
446     ///
447     /// The iterator returned implements `DoubleEndedIterator`. This means it
448     /// can also be used to find occurrences in reverse order.
449     #[inline]
iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h>450     pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> {
451         OneIter { searcher: self, it: generic::Iter::new(haystack) }
452     }
453 }
454 
455 /// An iterator over all occurrences of a single byte in a haystack.
456 ///
457 /// This iterator implements `DoubleEndedIterator`, which means it can also be
458 /// used to find occurrences in reverse order.
459 ///
460 /// This iterator is created by the [`One::iter`] method.
461 ///
462 /// The lifetime parameters are as follows:
463 ///
464 /// * `'a` refers to the lifetime of the underlying [`One`] searcher.
465 /// * `'h` refers to the lifetime of the haystack being searched.
466 #[derive(Clone, Debug)]
467 pub struct OneIter<'a, 'h> {
468     searcher: &'a One,
469     it: generic::Iter<'h>,
470 }
471 
472 impl<'a, 'h> Iterator for OneIter<'a, 'h> {
473     type Item = usize;
474 
475     #[inline]
next(&mut self) -> Option<usize>476     fn next(&mut self) -> Option<usize> {
477         // SAFETY: We rely on the generic iterator to provide valid start
478         // and end pointers, but we guarantee that any pointer returned by
479         // 'find_raw' falls within the bounds of the start and end pointer.
480         unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
481     }
482 
483     #[inline]
count(self) -> usize484     fn count(self) -> usize {
485         self.it.count(|s, e| {
486             // SAFETY: We rely on our generic iterator to return valid start
487             // and end pointers.
488             unsafe { self.searcher.count_raw(s, e) }
489         })
490     }
491 
492     #[inline]
size_hint(&self) -> (usize, Option<usize>)493     fn size_hint(&self) -> (usize, Option<usize>) {
494         self.it.size_hint()
495     }
496 }
497 
498 impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> {
499     #[inline]
next_back(&mut self) -> Option<usize>500     fn next_back(&mut self) -> Option<usize> {
501         // SAFETY: We rely on the generic iterator to provide valid start
502         // and end pointers, but we guarantee that any pointer returned by
503         // 'rfind_raw' falls within the bounds of the start and end pointer.
504         unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
505     }
506 }
507 
508 impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {}
509 
510 /// Finds all occurrences of two bytes in a haystack.
511 ///
512 /// That is, this reports matches of one of two possible bytes. For example,
513 /// searching for `a` or `b` in `afoobar` would report matches at offsets `0`,
514 /// `4` and `5`.
515 #[derive(Clone, Copy, Debug)]
516 pub struct Two {
517     /// Used for haystacks less than 32 bytes.
518     sse2: generic::Two<__m128i>,
519     /// Used for haystacks bigger than 32 bytes.
520     avx2: generic::Two<__m256i>,
521 }
522 
523 impl Two {
524     /// Create a new searcher that finds occurrences of the needle bytes given.
525     ///
526     /// This particular searcher is specialized to use AVX2 vector instructions
527     /// that typically make it quite fast. (SSE2 is used for haystacks that
528     /// are too short to accommodate an AVX2 vector.)
529     ///
530     /// If either SSE2 or AVX2 is unavailable in the current environment, then
531     /// `None` is returned.
532     #[inline]
new(needle1: u8, needle2: u8) -> Option<Two>533     pub fn new(needle1: u8, needle2: u8) -> Option<Two> {
534         if Two::is_available() {
535             // SAFETY: we check that sse2 and avx2 are available above.
536             unsafe { Some(Two::new_unchecked(needle1, needle2)) }
537         } else {
538             None
539         }
540     }
541 
542     /// Create a new finder specific to AVX2 vectors and routines without
543     /// checking that either SSE2 or AVX2 is available.
544     ///
545     /// # Safety
546     ///
547     /// Callers must guarantee that it is safe to execute both `sse2` and
548     /// `avx2` instructions in the current environment.
549     ///
550     /// Note that it is a common misconception that if one compiles for an
551     /// `x86_64` target, then they therefore automatically have access to SSE2
552     /// instructions. While this is almost always the case, it isn't true in
553     /// 100% of cases.
554     #[target_feature(enable = "sse2", enable = "avx2")]
555     #[inline]
new_unchecked(needle1: u8, needle2: u8) -> Two556     pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two {
557         Two {
558             sse2: generic::Two::new(needle1, needle2),
559             avx2: generic::Two::new(needle1, needle2),
560         }
561     }
562 
563     /// Returns true when this implementation is available in the current
564     /// environment.
565     ///
566     /// When this is true, it is guaranteed that [`Two::new`] will return
567     /// a `Some` value. Similarly, when it is false, it is guaranteed that
568     /// `Two::new` will return a `None` value.
569     ///
570     /// Note also that for the lifetime of a single program, if this returns
571     /// true then it will always return true.
572     #[inline]
is_available() -> bool573     pub fn is_available() -> bool {
574         #[cfg(not(target_feature = "sse2"))]
575         {
576             false
577         }
578         #[cfg(target_feature = "sse2")]
579         {
580             #[cfg(target_feature = "avx2")]
581             {
582                 true
583             }
584             #[cfg(not(target_feature = "avx2"))]
585             {
586                 #[cfg(feature = "std")]
587                 {
588                     std::is_x86_feature_detected!("avx2")
589                 }
590                 #[cfg(not(feature = "std"))]
591                 {
592                     false
593                 }
594             }
595         }
596     }
597 
598     /// Return the first occurrence of one of the needle bytes in the given
599     /// haystack. If no such occurrence exists, then `None` is returned.
600     ///
601     /// The occurrence is reported as an offset into `haystack`. Its maximum
602     /// value is `haystack.len() - 1`.
603     #[inline]
find(&self, haystack: &[u8]) -> Option<usize>604     pub fn find(&self, haystack: &[u8]) -> Option<usize> {
605         // SAFETY: `find_raw` guarantees that if a pointer is returned, it
606         // falls within the bounds of the start and end pointers.
607         unsafe {
608             generic::search_slice_with_raw(haystack, |s, e| {
609                 self.find_raw(s, e)
610             })
611         }
612     }
613 
614     /// Return the last occurrence of one of the needle bytes in the given
615     /// haystack. If no such occurrence exists, then `None` is returned.
616     ///
617     /// The occurrence is reported as an offset into `haystack`. Its maximum
618     /// value is `haystack.len() - 1`.
619     #[inline]
rfind(&self, haystack: &[u8]) -> Option<usize>620     pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
621         // SAFETY: `find_raw` guarantees that if a pointer is returned, it
622         // falls within the bounds of the start and end pointers.
623         unsafe {
624             generic::search_slice_with_raw(haystack, |s, e| {
625                 self.rfind_raw(s, e)
626             })
627         }
628     }
629 
630     /// Like `find`, but accepts and returns raw pointers.
631     ///
632     /// When a match is found, the pointer returned is guaranteed to be
633     /// `>= start` and `< end`.
634     ///
635     /// This routine is useful if you're already using raw pointers and would
636     /// like to avoid converting back to a slice before executing a search.
637     ///
638     /// # Safety
639     ///
640     /// * Both `start` and `end` must be valid for reads.
641     /// * Both `start` and `end` must point to an initialized value.
642     /// * Both `start` and `end` must point to the same allocated object and
643     /// must either be in bounds or at most one byte past the end of the
644     /// allocated object.
645     /// * Both `start` and `end` must be _derived from_ a pointer to the same
646     /// object.
647     /// * The distance between `start` and `end` must not overflow `isize`.
648     /// * The distance being in bounds must not rely on "wrapping around" the
649     /// address space.
650     ///
651     /// Note that callers may pass a pair of pointers such that `start >= end`.
652     /// In that case, `None` will always be returned.
653     #[inline]
find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>654     pub unsafe fn find_raw(
655         &self,
656         start: *const u8,
657         end: *const u8,
658     ) -> Option<*const u8> {
659         if start >= end {
660             return None;
661         }
662         let len = end.distance(start);
663         if len < __m256i::BYTES {
664             return if len < __m128i::BYTES {
665                 // SAFETY: We require the caller to pass valid start/end
666                 // pointers.
667                 generic::fwd_byte_by_byte(start, end, |b| {
668                     b == self.sse2.needle1() || b == self.sse2.needle2()
669                 })
670             } else {
671                 // SAFETY: We require the caller to pass valid start/end
672                 // pointers.
673                 self.find_raw_sse2(start, end)
674             };
675         }
676         // SAFETY: Building a `Two` means it's safe to call both 'sse2' and
677         // 'avx2' routines. Also, we've checked that our haystack is big
678         // enough to run on the vector routine. Pointer validity is caller's
679         // responsibility.
680         //
681         // Note that we could call `self.avx2.find_raw` directly here. But that
682         // means we'd have to annotate this routine with `target_feature`.
683         // Which is fine, because this routine is `unsafe` anyway and the
684         // `target_feature` obligation is met by virtue of building a `Two`.
685         // The real problem is that a routine with a `target_feature`
686         // annotation generally can't be inlined into caller code unless
687         // the caller code has the same target feature annotations. Namely,
688         // the common case (at time of writing) is for calling code to not
689         // have the `avx2` target feature enabled *at compile time*. Without
690         // `target_feature` on this routine, it can be inlined which will
691         // handle some of the short-haystack cases above without touching the
692         // architecture specific code.
693         self.find_raw_avx2(start, end)
694     }
695 
696     /// Like `rfind`, but accepts and returns raw pointers.
697     ///
698     /// When a match is found, the pointer returned is guaranteed to be
699     /// `>= start` and `< end`.
700     ///
701     /// This routine is useful if you're already using raw pointers and would
702     /// like to avoid converting back to a slice before executing a search.
703     ///
704     /// # Safety
705     ///
706     /// * Both `start` and `end` must be valid for reads.
707     /// * Both `start` and `end` must point to an initialized value.
708     /// * Both `start` and `end` must point to the same allocated object and
709     /// must either be in bounds or at most one byte past the end of the
710     /// allocated object.
711     /// * Both `start` and `end` must be _derived from_ a pointer to the same
712     /// object.
713     /// * The distance between `start` and `end` must not overflow `isize`.
714     /// * The distance being in bounds must not rely on "wrapping around" the
715     /// address space.
716     ///
717     /// Note that callers may pass a pair of pointers such that `start >= end`.
718     /// In that case, `None` will always be returned.
719     #[inline]
rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>720     pub unsafe fn rfind_raw(
721         &self,
722         start: *const u8,
723         end: *const u8,
724     ) -> Option<*const u8> {
725         if start >= end {
726             return None;
727         }
728         let len = end.distance(start);
729         if len < __m256i::BYTES {
730             return if len < __m128i::BYTES {
731                 // SAFETY: We require the caller to pass valid start/end
732                 // pointers.
733                 generic::rev_byte_by_byte(start, end, |b| {
734                     b == self.sse2.needle1() || b == self.sse2.needle2()
735                 })
736             } else {
737                 // SAFETY: We require the caller to pass valid start/end
738                 // pointers.
739                 self.rfind_raw_sse2(start, end)
740             };
741         }
742         // SAFETY: Building a `Two` means it's safe to call both 'sse2' and
743         // 'avx2' routines. Also, we've checked that our haystack is big
744         // enough to run on the vector routine. Pointer validity is caller's
745         // responsibility.
746         //
747         // See note in forward routine above for why we don't just call
748         // `self.avx2.rfind_raw` directly here.
749         self.rfind_raw_avx2(start, end)
750     }
751 
752     /// Execute a search using SSE2 vectors and routines.
753     ///
754     /// # Safety
755     ///
756     /// Same as [`Two::find_raw`], except the distance between `start` and
757     /// `end` must be at least the size of an SSE2 vector (in bytes).
758     ///
759     /// (The target feature safety obligation is automatically fulfilled by
760     /// virtue of being a method on `Two`, which can only be constructed
761     /// when it is safe to call `sse2`/`avx2` routines.)
762     #[target_feature(enable = "sse2")]
763     #[inline]
find_raw_sse2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>764     unsafe fn find_raw_sse2(
765         &self,
766         start: *const u8,
767         end: *const u8,
768     ) -> Option<*const u8> {
769         self.sse2.find_raw(start, end)
770     }
771 
772     /// Execute a search using SSE2 vectors and routines.
773     ///
774     /// # Safety
775     ///
776     /// Same as [`Two::rfind_raw`], except the distance between `start` and
777     /// `end` must be at least the size of an SSE2 vector (in bytes).
778     ///
779     /// (The target feature safety obligation is automatically fulfilled by
780     /// virtue of being a method on `Two`, which can only be constructed
781     /// when it is safe to call `sse2`/`avx2` routines.)
782     #[target_feature(enable = "sse2")]
783     #[inline]
rfind_raw_sse2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>784     unsafe fn rfind_raw_sse2(
785         &self,
786         start: *const u8,
787         end: *const u8,
788     ) -> Option<*const u8> {
789         self.sse2.rfind_raw(start, end)
790     }
791 
792     /// Execute a search using AVX2 vectors and routines.
793     ///
794     /// # Safety
795     ///
796     /// Same as [`Two::find_raw`], except the distance between `start` and
797     /// `end` must be at least the size of an AVX2 vector (in bytes).
798     ///
799     /// (The target feature safety obligation is automatically fulfilled by
800     /// virtue of being a method on `Two`, which can only be constructed
801     /// when it is safe to call `sse2`/`avx2` routines.)
802     #[target_feature(enable = "avx2")]
803     #[inline]
find_raw_avx2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>804     unsafe fn find_raw_avx2(
805         &self,
806         start: *const u8,
807         end: *const u8,
808     ) -> Option<*const u8> {
809         self.avx2.find_raw(start, end)
810     }
811 
812     /// Execute a search using AVX2 vectors and routines.
813     ///
814     /// # Safety
815     ///
816     /// Same as [`Two::rfind_raw`], except the distance between `start` and
817     /// `end` must be at least the size of an AVX2 vector (in bytes).
818     ///
819     /// (The target feature safety obligation is automatically fulfilled by
820     /// virtue of being a method on `Two`, which can only be constructed
821     /// when it is safe to call `sse2`/`avx2` routines.)
822     #[target_feature(enable = "avx2")]
823     #[inline]
rfind_raw_avx2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>824     unsafe fn rfind_raw_avx2(
825         &self,
826         start: *const u8,
827         end: *const u8,
828     ) -> Option<*const u8> {
829         self.avx2.rfind_raw(start, end)
830     }
831 
832     /// Returns an iterator over all occurrences of the needle bytes in the
833     /// given haystack.
834     ///
835     /// The iterator returned implements `DoubleEndedIterator`. This means it
836     /// can also be used to find occurrences in reverse order.
837     #[inline]
iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h>838     pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> {
839         TwoIter { searcher: self, it: generic::Iter::new(haystack) }
840     }
841 }
842 
843 /// An iterator over all occurrences of two possible bytes in a haystack.
844 ///
845 /// This iterator implements `DoubleEndedIterator`, which means it can also be
846 /// used to find occurrences in reverse order.
847 ///
848 /// This iterator is created by the [`Two::iter`] method.
849 ///
850 /// The lifetime parameters are as follows:
851 ///
852 /// * `'a` refers to the lifetime of the underlying [`Two`] searcher.
853 /// * `'h` refers to the lifetime of the haystack being searched.
854 #[derive(Clone, Debug)]
855 pub struct TwoIter<'a, 'h> {
856     searcher: &'a Two,
857     it: generic::Iter<'h>,
858 }
859 
860 impl<'a, 'h> Iterator for TwoIter<'a, 'h> {
861     type Item = usize;
862 
863     #[inline]
next(&mut self) -> Option<usize>864     fn next(&mut self) -> Option<usize> {
865         // SAFETY: We rely on the generic iterator to provide valid start
866         // and end pointers, but we guarantee that any pointer returned by
867         // 'find_raw' falls within the bounds of the start and end pointer.
868         unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
869     }
870 
871     #[inline]
size_hint(&self) -> (usize, Option<usize>)872     fn size_hint(&self) -> (usize, Option<usize>) {
873         self.it.size_hint()
874     }
875 }
876 
877 impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> {
878     #[inline]
next_back(&mut self) -> Option<usize>879     fn next_back(&mut self) -> Option<usize> {
880         // SAFETY: We rely on the generic iterator to provide valid start
881         // and end pointers, but we guarantee that any pointer returned by
882         // 'rfind_raw' falls within the bounds of the start and end pointer.
883         unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
884     }
885 }
886 
887 impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {}
888 
889 /// Finds all occurrences of three bytes in a haystack.
890 ///
891 /// That is, this reports matches of one of three possible bytes. For example,
892 /// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets
893 /// `0`, `2`, `3`, `4` and `5`.
894 #[derive(Clone, Copy, Debug)]
895 pub struct Three {
896     /// Used for haystacks less than 32 bytes.
897     sse2: generic::Three<__m128i>,
898     /// Used for haystacks bigger than 32 bytes.
899     avx2: generic::Three<__m256i>,
900 }
901 
902 impl Three {
903     /// Create a new searcher that finds occurrences of the needle bytes given.
904     ///
905     /// This particular searcher is specialized to use AVX2 vector instructions
906     /// that typically make it quite fast. (SSE2 is used for haystacks that
907     /// are too short to accommodate an AVX2 vector.)
908     ///
909     /// If either SSE2 or AVX2 is unavailable in the current environment, then
910     /// `None` is returned.
911     #[inline]
new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three>912     pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three> {
913         if Three::is_available() {
914             // SAFETY: we check that sse2 and avx2 are available above.
915             unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) }
916         } else {
917             None
918         }
919     }
920 
921     /// Create a new finder specific to AVX2 vectors and routines without
922     /// checking that either SSE2 or AVX2 is available.
923     ///
924     /// # Safety
925     ///
926     /// Callers must guarantee that it is safe to execute both `sse2` and
927     /// `avx2` instructions in the current environment.
928     ///
929     /// Note that it is a common misconception that if one compiles for an
930     /// `x86_64` target, then they therefore automatically have access to SSE2
931     /// instructions. While this is almost always the case, it isn't true in
932     /// 100% of cases.
933     #[target_feature(enable = "sse2", enable = "avx2")]
934     #[inline]
new_unchecked( needle1: u8, needle2: u8, needle3: u8, ) -> Three935     pub unsafe fn new_unchecked(
936         needle1: u8,
937         needle2: u8,
938         needle3: u8,
939     ) -> Three {
940         Three {
941             sse2: generic::Three::new(needle1, needle2, needle3),
942             avx2: generic::Three::new(needle1, needle2, needle3),
943         }
944     }
945 
946     /// Returns true when this implementation is available in the current
947     /// environment.
948     ///
949     /// When this is true, it is guaranteed that [`Three::new`] will return
950     /// a `Some` value. Similarly, when it is false, it is guaranteed that
951     /// `Three::new` will return a `None` value.
952     ///
953     /// Note also that for the lifetime of a single program, if this returns
954     /// true then it will always return true.
955     #[inline]
is_available() -> bool956     pub fn is_available() -> bool {
957         #[cfg(not(target_feature = "sse2"))]
958         {
959             false
960         }
961         #[cfg(target_feature = "sse2")]
962         {
963             #[cfg(target_feature = "avx2")]
964             {
965                 true
966             }
967             #[cfg(not(target_feature = "avx2"))]
968             {
969                 #[cfg(feature = "std")]
970                 {
971                     std::is_x86_feature_detected!("avx2")
972                 }
973                 #[cfg(not(feature = "std"))]
974                 {
975                     false
976                 }
977             }
978         }
979     }
980 
981     /// Return the first occurrence of one of the needle bytes in the given
982     /// haystack. If no such occurrence exists, then `None` is returned.
983     ///
984     /// The occurrence is reported as an offset into `haystack`. Its maximum
985     /// value is `haystack.len() - 1`.
986     #[inline]
find(&self, haystack: &[u8]) -> Option<usize>987     pub fn find(&self, haystack: &[u8]) -> Option<usize> {
988         // SAFETY: `find_raw` guarantees that if a pointer is returned, it
989         // falls within the bounds of the start and end pointers.
990         unsafe {
991             generic::search_slice_with_raw(haystack, |s, e| {
992                 self.find_raw(s, e)
993             })
994         }
995     }
996 
997     /// Return the last occurrence of one of the needle bytes in the given
998     /// haystack. If no such occurrence exists, then `None` is returned.
999     ///
1000     /// The occurrence is reported as an offset into `haystack`. Its maximum
1001     /// value is `haystack.len() - 1`.
1002     #[inline]
rfind(&self, haystack: &[u8]) -> Option<usize>1003     pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
1004         // SAFETY: `find_raw` guarantees that if a pointer is returned, it
1005         // falls within the bounds of the start and end pointers.
1006         unsafe {
1007             generic::search_slice_with_raw(haystack, |s, e| {
1008                 self.rfind_raw(s, e)
1009             })
1010         }
1011     }
1012 
1013     /// Like `find`, but accepts and returns raw pointers.
1014     ///
1015     /// When a match is found, the pointer returned is guaranteed to be
1016     /// `>= start` and `< end`.
1017     ///
1018     /// This routine is useful if you're already using raw pointers and would
1019     /// like to avoid converting back to a slice before executing a search.
1020     ///
1021     /// # Safety
1022     ///
1023     /// * Both `start` and `end` must be valid for reads.
1024     /// * Both `start` and `end` must point to an initialized value.
1025     /// * Both `start` and `end` must point to the same allocated object and
1026     /// must either be in bounds or at most one byte past the end of the
1027     /// allocated object.
1028     /// * Both `start` and `end` must be _derived from_ a pointer to the same
1029     /// object.
1030     /// * The distance between `start` and `end` must not overflow `isize`.
1031     /// * The distance being in bounds must not rely on "wrapping around" the
1032     /// address space.
1033     ///
1034     /// Note that callers may pass a pair of pointers such that `start >= end`.
1035     /// In that case, `None` will always be returned.
1036     #[inline]
find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>1037     pub unsafe fn find_raw(
1038         &self,
1039         start: *const u8,
1040         end: *const u8,
1041     ) -> Option<*const u8> {
1042         if start >= end {
1043             return None;
1044         }
1045         let len = end.distance(start);
1046         if len < __m256i::BYTES {
1047             return if len < __m128i::BYTES {
1048                 // SAFETY: We require the caller to pass valid start/end
1049                 // pointers.
1050                 generic::fwd_byte_by_byte(start, end, |b| {
1051                     b == self.sse2.needle1()
1052                         || b == self.sse2.needle2()
1053                         || b == self.sse2.needle3()
1054                 })
1055             } else {
1056                 // SAFETY: We require the caller to pass valid start/end
1057                 // pointers.
1058                 self.find_raw_sse2(start, end)
1059             };
1060         }
1061         // SAFETY: Building a `Three` means it's safe to call both 'sse2' and
1062         // 'avx2' routines. Also, we've checked that our haystack is big
1063         // enough to run on the vector routine. Pointer validity is caller's
1064         // responsibility.
1065         //
1066         // Note that we could call `self.avx2.find_raw` directly here. But that
1067         // means we'd have to annotate this routine with `target_feature`.
1068         // Which is fine, because this routine is `unsafe` anyway and the
1069         // `target_feature` obligation is met by virtue of building a `Three`.
1070         // The real problem is that a routine with a `target_feature`
1071         // annotation generally can't be inlined into caller code unless
1072         // the caller code has the same target feature annotations. Namely,
1073         // the common case (at time of writing) is for calling code to not
1074         // have the `avx2` target feature enabled *at compile time*. Without
1075         // `target_feature` on this routine, it can be inlined which will
1076         // handle some of the short-haystack cases above without touching the
1077         // architecture specific code.
1078         self.find_raw_avx2(start, end)
1079     }
1080 
1081     /// Like `rfind`, but accepts and returns raw pointers.
1082     ///
1083     /// When a match is found, the pointer returned is guaranteed to be
1084     /// `>= start` and `< end`.
1085     ///
1086     /// This routine is useful if you're already using raw pointers and would
1087     /// like to avoid converting back to a slice before executing a search.
1088     ///
1089     /// # Safety
1090     ///
1091     /// * Both `start` and `end` must be valid for reads.
1092     /// * Both `start` and `end` must point to an initialized value.
1093     /// * Both `start` and `end` must point to the same allocated object and
1094     /// must either be in bounds or at most one byte past the end of the
1095     /// allocated object.
1096     /// * Both `start` and `end` must be _derived from_ a pointer to the same
1097     /// object.
1098     /// * The distance between `start` and `end` must not overflow `isize`.
1099     /// * The distance being in bounds must not rely on "wrapping around" the
1100     /// address space.
1101     ///
1102     /// Note that callers may pass a pair of pointers such that `start >= end`.
1103     /// In that case, `None` will always be returned.
1104     #[inline]
rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>1105     pub unsafe fn rfind_raw(
1106         &self,
1107         start: *const u8,
1108         end: *const u8,
1109     ) -> Option<*const u8> {
1110         if start >= end {
1111             return None;
1112         }
1113         let len = end.distance(start);
1114         if len < __m256i::BYTES {
1115             return if len < __m128i::BYTES {
1116                 // SAFETY: We require the caller to pass valid start/end
1117                 // pointers.
1118                 generic::rev_byte_by_byte(start, end, |b| {
1119                     b == self.sse2.needle1()
1120                         || b == self.sse2.needle2()
1121                         || b == self.sse2.needle3()
1122                 })
1123             } else {
1124                 // SAFETY: We require the caller to pass valid start/end
1125                 // pointers.
1126                 self.rfind_raw_sse2(start, end)
1127             };
1128         }
1129         // SAFETY: Building a `Three` means it's safe to call both 'sse2' and
1130         // 'avx2' routines. Also, we've checked that our haystack is big
1131         // enough to run on the vector routine. Pointer validity is caller's
1132         // responsibility.
1133         //
1134         // See note in forward routine above for why we don't just call
1135         // `self.avx2.rfind_raw` directly here.
1136         self.rfind_raw_avx2(start, end)
1137     }
1138 
1139     /// Execute a search using SSE2 vectors and routines.
1140     ///
1141     /// # Safety
1142     ///
1143     /// Same as [`Three::find_raw`], except the distance between `start` and
1144     /// `end` must be at least the size of an SSE2 vector (in bytes).
1145     ///
1146     /// (The target feature safety obligation is automatically fulfilled by
1147     /// virtue of being a method on `Three`, which can only be constructed
1148     /// when it is safe to call `sse2`/`avx2` routines.)
1149     #[target_feature(enable = "sse2")]
1150     #[inline]
find_raw_sse2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>1151     unsafe fn find_raw_sse2(
1152         &self,
1153         start: *const u8,
1154         end: *const u8,
1155     ) -> Option<*const u8> {
1156         self.sse2.find_raw(start, end)
1157     }
1158 
1159     /// Execute a search using SSE2 vectors and routines.
1160     ///
1161     /// # Safety
1162     ///
1163     /// Same as [`Three::rfind_raw`], except the distance between `start` and
1164     /// `end` must be at least the size of an SSE2 vector (in bytes).
1165     ///
1166     /// (The target feature safety obligation is automatically fulfilled by
1167     /// virtue of being a method on `Three`, which can only be constructed
1168     /// when it is safe to call `sse2`/`avx2` routines.)
1169     #[target_feature(enable = "sse2")]
1170     #[inline]
rfind_raw_sse2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>1171     unsafe fn rfind_raw_sse2(
1172         &self,
1173         start: *const u8,
1174         end: *const u8,
1175     ) -> Option<*const u8> {
1176         self.sse2.rfind_raw(start, end)
1177     }
1178 
1179     /// Execute a search using AVX2 vectors and routines.
1180     ///
1181     /// # Safety
1182     ///
1183     /// Same as [`Three::find_raw`], except the distance between `start` and
1184     /// `end` must be at least the size of an AVX2 vector (in bytes).
1185     ///
1186     /// (The target feature safety obligation is automatically fulfilled by
1187     /// virtue of being a method on `Three`, which can only be constructed
1188     /// when it is safe to call `sse2`/`avx2` routines.)
1189     #[target_feature(enable = "avx2")]
1190     #[inline]
find_raw_avx2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>1191     unsafe fn find_raw_avx2(
1192         &self,
1193         start: *const u8,
1194         end: *const u8,
1195     ) -> Option<*const u8> {
1196         self.avx2.find_raw(start, end)
1197     }
1198 
1199     /// Execute a search using AVX2 vectors and routines.
1200     ///
1201     /// # Safety
1202     ///
1203     /// Same as [`Three::rfind_raw`], except the distance between `start` and
1204     /// `end` must be at least the size of an AVX2 vector (in bytes).
1205     ///
1206     /// (The target feature safety obligation is automatically fulfilled by
1207     /// virtue of being a method on `Three`, which can only be constructed
1208     /// when it is safe to call `sse2`/`avx2` routines.)
1209     #[target_feature(enable = "avx2")]
1210     #[inline]
rfind_raw_avx2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>1211     unsafe fn rfind_raw_avx2(
1212         &self,
1213         start: *const u8,
1214         end: *const u8,
1215     ) -> Option<*const u8> {
1216         self.avx2.rfind_raw(start, end)
1217     }
1218 
1219     /// Returns an iterator over all occurrences of the needle bytes in the
1220     /// given haystack.
1221     ///
1222     /// The iterator returned implements `DoubleEndedIterator`. This means it
1223     /// can also be used to find occurrences in reverse order.
1224     #[inline]
iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h>1225     pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> {
1226         ThreeIter { searcher: self, it: generic::Iter::new(haystack) }
1227     }
1228 }
1229 
1230 /// An iterator over all occurrences of three possible bytes in a haystack.
1231 ///
1232 /// This iterator implements `DoubleEndedIterator`, which means it can also be
1233 /// used to find occurrences in reverse order.
1234 ///
1235 /// This iterator is created by the [`Three::iter`] method.
1236 ///
1237 /// The lifetime parameters are as follows:
1238 ///
1239 /// * `'a` refers to the lifetime of the underlying [`Three`] searcher.
1240 /// * `'h` refers to the lifetime of the haystack being searched.
1241 #[derive(Clone, Debug)]
1242 pub struct ThreeIter<'a, 'h> {
1243     searcher: &'a Three,
1244     it: generic::Iter<'h>,
1245 }
1246 
1247 impl<'a, 'h> Iterator for ThreeIter<'a, 'h> {
1248     type Item = usize;
1249 
1250     #[inline]
next(&mut self) -> Option<usize>1251     fn next(&mut self) -> Option<usize> {
1252         // SAFETY: We rely on the generic iterator to provide valid start
1253         // and end pointers, but we guarantee that any pointer returned by
1254         // 'find_raw' falls within the bounds of the start and end pointer.
1255         unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
1256     }
1257 
1258     #[inline]
size_hint(&self) -> (usize, Option<usize>)1259     fn size_hint(&self) -> (usize, Option<usize>) {
1260         self.it.size_hint()
1261     }
1262 }
1263 
1264 impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> {
1265     #[inline]
next_back(&mut self) -> Option<usize>1266     fn next_back(&mut self) -> Option<usize> {
1267         // SAFETY: We rely on the generic iterator to provide valid start
1268         // and end pointers, but we guarantee that any pointer returned by
1269         // 'rfind_raw' falls within the bounds of the start and end pointer.
1270         unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
1271     }
1272 }
1273 
1274 impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {}
1275 
1276 #[cfg(test)]
1277 mod tests {
1278     use super::*;
1279 
1280     define_memchr_quickcheck!(super);
1281 
1282     #[test]
forward_one()1283     fn forward_one() {
1284         crate::tests::memchr::Runner::new(1).forward_iter(
1285             |haystack, needles| {
1286                 Some(One::new(needles[0])?.iter(haystack).collect())
1287             },
1288         )
1289     }
1290 
1291     #[test]
reverse_one()1292     fn reverse_one() {
1293         crate::tests::memchr::Runner::new(1).reverse_iter(
1294             |haystack, needles| {
1295                 Some(One::new(needles[0])?.iter(haystack).rev().collect())
1296             },
1297         )
1298     }
1299 
1300     #[test]
count_one()1301     fn count_one() {
1302         crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| {
1303             Some(One::new(needles[0])?.iter(haystack).count())
1304         })
1305     }
1306 
1307     #[test]
forward_two()1308     fn forward_two() {
1309         crate::tests::memchr::Runner::new(2).forward_iter(
1310             |haystack, needles| {
1311                 let n1 = needles.get(0).copied()?;
1312                 let n2 = needles.get(1).copied()?;
1313                 Some(Two::new(n1, n2)?.iter(haystack).collect())
1314             },
1315         )
1316     }
1317 
1318     #[test]
reverse_two()1319     fn reverse_two() {
1320         crate::tests::memchr::Runner::new(2).reverse_iter(
1321             |haystack, needles| {
1322                 let n1 = needles.get(0).copied()?;
1323                 let n2 = needles.get(1).copied()?;
1324                 Some(Two::new(n1, n2)?.iter(haystack).rev().collect())
1325             },
1326         )
1327     }
1328 
1329     #[test]
forward_three()1330     fn forward_three() {
1331         crate::tests::memchr::Runner::new(3).forward_iter(
1332             |haystack, needles| {
1333                 let n1 = needles.get(0).copied()?;
1334                 let n2 = needles.get(1).copied()?;
1335                 let n3 = needles.get(2).copied()?;
1336                 Some(Three::new(n1, n2, n3)?.iter(haystack).collect())
1337             },
1338         )
1339     }
1340 
1341     #[test]
reverse_three()1342     fn reverse_three() {
1343         crate::tests::memchr::Runner::new(3).reverse_iter(
1344             |haystack, needles| {
1345                 let n1 = needles.get(0).copied()?;
1346                 let n2 = needles.get(1).copied()?;
1347                 let n3 = needles.get(2).copied()?;
1348                 Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect())
1349             },
1350         )
1351     }
1352 }
1353