1 /*!
2 This module defines 128-bit vector implementations of `memchr` and friends.
3 
4 The main types in this module are [`One`], [`Two`] and [`Three`]. They are for
5 searching for one, two or three distinct bytes, respectively, in a haystack.
6 Each type also has corresponding double ended iterators. These searchers are
7 typically much faster than scalar routines accomplishing the same task.
8 
9 The `One` searcher also provides a [`One::count`] routine for efficiently
10 counting the number of times a single byte occurs in a haystack. This is
11 useful, for example, for counting the number of lines in a haystack. This
12 routine exists because it is usually faster, especially with a high match
13 count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its
14 `Iterator::count` implementation to use this routine.)
15 
16 Only one, two and three bytes are supported because three bytes is about
17 the point where one sees diminishing returns. Beyond this point and it's
18 probably (but not necessarily) better to just use a simple `[bool; 256]` array
19 or similar. However, it depends mightily on the specific work-load and the
20 expected match frequency.
21 */
22 
23 use core::arch::wasm32::v128;
24 
25 use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector};
26 
27 /// Finds all occurrences of a single byte in a haystack.
28 #[derive(Clone, Copy, Debug)]
29 pub struct One(generic::One<v128>);
30 
31 impl One {
32     /// Create a new searcher that finds occurrences of the needle byte given.
33     ///
34     /// This particular searcher is specialized to use simd128 vector
35     /// instructions that typically make it quite fast.
36     ///
37     /// If simd128 is unavailable in the current environment, then `None` is
38     /// returned.
39     #[inline]
new(needle: u8) -> Option<One>40     pub fn new(needle: u8) -> Option<One> {
41         if One::is_available() {
42             // SAFETY: we check that simd128 is available above.
43             unsafe { Some(One::new_unchecked(needle)) }
44         } else {
45             None
46         }
47     }
48 
49     /// Create a new finder specific to simd128 vectors and routines without
50     /// checking that simd128 is available.
51     ///
52     /// # Safety
53     ///
54     /// Callers must guarantee that it is safe to execute `simd128`
55     /// instructions in the current environment.
56     #[target_feature(enable = "simd128")]
57     #[inline]
new_unchecked(needle: u8) -> One58     pub unsafe fn new_unchecked(needle: u8) -> One {
59         One(generic::One::new(needle))
60     }
61 
62     /// Returns true when this implementation is available in the current
63     /// environment.
64     ///
65     /// When this is true, it is guaranteed that [`One::new`] will return
66     /// a `Some` value. Similarly, when it is false, it is guaranteed that
67     /// `One::new` will return a `None` value.
68     ///
69     /// Note also that for the lifetime of a single program, if this returns
70     /// true then it will always return true.
71     #[inline]
is_available() -> bool72     pub fn is_available() -> bool {
73         #[cfg(target_feature = "simd128")]
74         {
75             true
76         }
77         #[cfg(not(target_feature = "simd128"))]
78         {
79             false
80         }
81     }
82 
83     /// Return the first occurrence of one of the needle bytes in the given
84     /// haystack. If no such occurrence exists, then `None` is returned.
85     ///
86     /// The occurrence is reported as an offset into `haystack`. Its maximum
87     /// value is `haystack.len() - 1`.
88     #[inline]
find(&self, haystack: &[u8]) -> Option<usize>89     pub fn find(&self, haystack: &[u8]) -> Option<usize> {
90         // SAFETY: `find_raw` guarantees that if a pointer is returned, it
91         // falls within the bounds of the start and end pointers.
92         unsafe {
93             generic::search_slice_with_raw(haystack, |s, e| {
94                 self.find_raw(s, e)
95             })
96         }
97     }
98 
99     /// Return the last occurrence of one of the needle bytes in the given
100     /// haystack. If no such occurrence exists, then `None` is returned.
101     ///
102     /// The occurrence is reported as an offset into `haystack`. Its maximum
103     /// value is `haystack.len() - 1`.
104     #[inline]
rfind(&self, haystack: &[u8]) -> Option<usize>105     pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
106         // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it
107         // falls within the bounds of the start and end pointers.
108         unsafe {
109             generic::search_slice_with_raw(haystack, |s, e| {
110                 self.rfind_raw(s, e)
111             })
112         }
113     }
114 
115     /// Counts all occurrences of this byte in the given haystack.
116     #[inline]
count(&self, haystack: &[u8]) -> usize117     pub fn count(&self, haystack: &[u8]) -> usize {
118         // SAFETY: All of our pointers are derived directly from a borrowed
119         // slice, which is guaranteed to be valid.
120         unsafe {
121             let start = haystack.as_ptr();
122             let end = start.add(haystack.len());
123             self.count_raw(start, end)
124         }
125     }
126 
127     /// Like `find`, but accepts and returns raw pointers.
128     ///
129     /// When a match is found, the pointer returned is guaranteed to be
130     /// `>= start` and `< end`.
131     ///
132     /// This routine is useful if you're already using raw pointers and would
133     /// like to avoid converting back to a slice before executing a search.
134     ///
135     /// # Safety
136     ///
137     /// * Both `start` and `end` must be valid for reads.
138     /// * Both `start` and `end` must point to an initialized value.
139     /// * Both `start` and `end` must point to the same allocated object and
140     /// must either be in bounds or at most one byte past the end of the
141     /// allocated object.
142     /// * Both `start` and `end` must be _derived from_ a pointer to the same
143     /// object.
144     /// * The distance between `start` and `end` must not overflow `isize`.
145     /// * The distance being in bounds must not rely on "wrapping around" the
146     /// address space.
147     ///
148     /// Note that callers may pass a pair of pointers such that `start >= end`.
149     /// In that case, `None` will always be returned.
150     #[inline]
find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>151     pub unsafe fn find_raw(
152         &self,
153         start: *const u8,
154         end: *const u8,
155     ) -> Option<*const u8> {
156         if start >= end {
157             return None;
158         }
159         if end.distance(start) < v128::BYTES {
160             // SAFETY: We require the caller to pass valid start/end pointers.
161             return generic::fwd_byte_by_byte(start, end, |b| {
162                 b == self.0.needle1()
163             });
164         }
165         // SAFETY: Building a `One` means it's safe to call 'simd128' routines.
166         // Also, we've checked that our haystack is big enough to run on the
167         // vector routine. Pointer validity is caller's responsibility.
168         self.find_raw_impl(start, end)
169     }
170 
171     /// Like `rfind`, but accepts and returns raw pointers.
172     ///
173     /// When a match is found, the pointer returned is guaranteed to be
174     /// `>= start` and `< end`.
175     ///
176     /// This routine is useful if you're already using raw pointers and would
177     /// like to avoid converting back to a slice before executing a search.
178     ///
179     /// # Safety
180     ///
181     /// * Both `start` and `end` must be valid for reads.
182     /// * Both `start` and `end` must point to an initialized value.
183     /// * Both `start` and `end` must point to the same allocated object and
184     /// must either be in bounds or at most one byte past the end of the
185     /// allocated object.
186     /// * Both `start` and `end` must be _derived from_ a pointer to the same
187     /// object.
188     /// * The distance between `start` and `end` must not overflow `isize`.
189     /// * The distance being in bounds must not rely on "wrapping around" the
190     /// address space.
191     ///
192     /// Note that callers may pass a pair of pointers such that `start >= end`.
193     /// In that case, `None` will always be returned.
194     #[inline]
rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>195     pub unsafe fn rfind_raw(
196         &self,
197         start: *const u8,
198         end: *const u8,
199     ) -> Option<*const u8> {
200         if start >= end {
201             return None;
202         }
203         if end.distance(start) < v128::BYTES {
204             // SAFETY: We require the caller to pass valid start/end pointers.
205             return generic::rev_byte_by_byte(start, end, |b| {
206                 b == self.0.needle1()
207             });
208         }
209         // SAFETY: Building a `One` means it's safe to call 'simd128' routines.
210         // Also, we've checked that our haystack is big enough to run on the
211         // vector routine. Pointer validity is caller's responsibility.
212         self.rfind_raw_impl(start, end)
213     }
214 
215     /// Counts all occurrences of this byte in the given haystack represented
216     /// by raw pointers.
217     ///
218     /// When a match is found, the pointer returned is guaranteed to be
219     /// `>= start` and `< end`.
220     ///
221     /// This routine is useful if you're already using raw pointers and would
222     /// like to avoid converting back to a slice before executing a search.
223     ///
224     /// # Safety
225     ///
226     /// * Both `start` and `end` must be valid for reads.
227     /// * Both `start` and `end` must point to an initialized value.
228     /// * Both `start` and `end` must point to the same allocated object and
229     /// must either be in bounds or at most one byte past the end of the
230     /// allocated object.
231     /// * Both `start` and `end` must be _derived from_ a pointer to the same
232     /// object.
233     /// * The distance between `start` and `end` must not overflow `isize`.
234     /// * The distance being in bounds must not rely on "wrapping around" the
235     /// address space.
236     ///
237     /// Note that callers may pass a pair of pointers such that `start >= end`.
238     /// In that case, `None` will always be returned.
239     #[inline]
count_raw(&self, start: *const u8, end: *const u8) -> usize240     pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize {
241         if start >= end {
242             return 0;
243         }
244         if end.distance(start) < v128::BYTES {
245             // SAFETY: We require the caller to pass valid start/end pointers.
246             return generic::count_byte_by_byte(start, end, |b| {
247                 b == self.0.needle1()
248             });
249         }
250         // SAFETY: Building a `One` means it's safe to call 'simd128' routines.
251         // Also, we've checked that our haystack is big enough to run on the
252         // vector routine. Pointer validity is caller's responsibility.
253         self.count_raw_impl(start, end)
254     }
255 
256     /// Execute a search using simd128 vectors and routines.
257     ///
258     /// # Safety
259     ///
260     /// Same as [`One::find_raw`], except the distance between `start` and
261     /// `end` must be at least the size of a simd128 vector (in bytes).
262     ///
263     /// (The target feature safety obligation is automatically fulfilled by
264     /// virtue of being a method on `One`, which can only be constructed
265     /// when it is safe to call `simd128` routines.)
266     #[target_feature(enable = "simd128")]
267     #[inline]
find_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>268     unsafe fn find_raw_impl(
269         &self,
270         start: *const u8,
271         end: *const u8,
272     ) -> Option<*const u8> {
273         self.0.find_raw(start, end)
274     }
275 
276     /// Execute a search using simd128 vectors and routines.
277     ///
278     /// # Safety
279     ///
280     /// Same as [`One::rfind_raw`], except the distance between `start` and
281     /// `end` must be at least the size of a simd128 vector (in bytes).
282     ///
283     /// (The target feature safety obligation is automatically fulfilled by
284     /// virtue of being a method on `One`, which can only be constructed
285     /// when it is safe to call `simd128` routines.)
286     #[target_feature(enable = "simd128")]
287     #[inline]
rfind_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>288     unsafe fn rfind_raw_impl(
289         &self,
290         start: *const u8,
291         end: *const u8,
292     ) -> Option<*const u8> {
293         self.0.rfind_raw(start, end)
294     }
295 
296     /// Execute a count using simd128 vectors and routines.
297     ///
298     /// # Safety
299     ///
300     /// Same as [`One::count_raw`], except the distance between `start` and
301     /// `end` must be at least the size of a simd128 vector (in bytes).
302     ///
303     /// (The target feature safety obligation is automatically fulfilled by
304     /// virtue of being a method on `One`, which can only be constructed
305     /// when it is safe to call `simd128` routines.)
306     #[target_feature(enable = "simd128")]
307     #[inline]
count_raw_impl( &self, start: *const u8, end: *const u8, ) -> usize308     unsafe fn count_raw_impl(
309         &self,
310         start: *const u8,
311         end: *const u8,
312     ) -> usize {
313         self.0.count_raw(start, end)
314     }
315 
316     /// Returns an iterator over all occurrences of the needle byte in the
317     /// given haystack.
318     ///
319     /// The iterator returned implements `DoubleEndedIterator`. This means it
320     /// can also be used to find occurrences in reverse order.
321     #[inline]
iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h>322     pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> {
323         OneIter { searcher: self, it: generic::Iter::new(haystack) }
324     }
325 }
326 
327 /// An iterator over all occurrences of a single byte in a haystack.
328 ///
329 /// This iterator implements `DoubleEndedIterator`, which means it can also be
330 /// used to find occurrences in reverse order.
331 ///
332 /// This iterator is created by the [`One::iter`] method.
333 ///
334 /// The lifetime parameters are as follows:
335 ///
336 /// * `'a` refers to the lifetime of the underlying [`One`] searcher.
337 /// * `'h` refers to the lifetime of the haystack being searched.
338 #[derive(Clone, Debug)]
339 pub struct OneIter<'a, 'h> {
340     searcher: &'a One,
341     it: generic::Iter<'h>,
342 }
343 
344 impl<'a, 'h> Iterator for OneIter<'a, 'h> {
345     type Item = usize;
346 
347     #[inline]
next(&mut self) -> Option<usize>348     fn next(&mut self) -> Option<usize> {
349         // SAFETY: We rely on the generic iterator to provide valid start
350         // and end pointers, but we guarantee that any pointer returned by
351         // 'find_raw' falls within the bounds of the start and end pointer.
352         unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
353     }
354 
355     #[inline]
count(self) -> usize356     fn count(self) -> usize {
357         self.it.count(|s, e| {
358             // SAFETY: We rely on our generic iterator to return valid start
359             // and end pointers.
360             unsafe { self.searcher.count_raw(s, e) }
361         })
362     }
363 
364     #[inline]
size_hint(&self) -> (usize, Option<usize>)365     fn size_hint(&self) -> (usize, Option<usize>) {
366         self.it.size_hint()
367     }
368 }
369 
370 impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> {
371     #[inline]
next_back(&mut self) -> Option<usize>372     fn next_back(&mut self) -> Option<usize> {
373         // SAFETY: We rely on the generic iterator to provide valid start
374         // and end pointers, but we guarantee that any pointer returned by
375         // 'rfind_raw' falls within the bounds of the start and end pointer.
376         unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
377     }
378 }
379 
380 impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {}
381 
382 /// Finds all occurrences of two bytes in a haystack.
383 ///
384 /// That is, this reports matches of one of two possible bytes. For example,
385 /// searching for `a` or `b` in `afoobar` would report matches at offsets `0`,
386 /// `4` and `5`.
387 #[derive(Clone, Copy, Debug)]
388 pub struct Two(generic::Two<v128>);
389 
390 impl Two {
391     /// Create a new searcher that finds occurrences of the needle bytes given.
392     ///
393     /// This particular searcher is specialized to use simd128 vector
394     /// instructions that typically make it quite fast.
395     ///
396     /// If simd128 is unavailable in the current environment, then `None` is
397     /// returned.
398     #[inline]
new(needle1: u8, needle2: u8) -> Option<Two>399     pub fn new(needle1: u8, needle2: u8) -> Option<Two> {
400         if Two::is_available() {
401             // SAFETY: we check that simd128 is available above.
402             unsafe { Some(Two::new_unchecked(needle1, needle2)) }
403         } else {
404             None
405         }
406     }
407 
408     /// Create a new finder specific to simd128 vectors and routines without
409     /// checking that simd128 is available.
410     ///
411     /// # Safety
412     ///
413     /// Callers must guarantee that it is safe to execute `simd128`
414     /// instructions in the current environment.
415     #[target_feature(enable = "simd128")]
416     #[inline]
new_unchecked(needle1: u8, needle2: u8) -> Two417     pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two {
418         Two(generic::Two::new(needle1, needle2))
419     }
420 
421     /// Returns true when this implementation is available in the current
422     /// environment.
423     ///
424     /// When this is true, it is guaranteed that [`Two::new`] will return
425     /// a `Some` value. Similarly, when it is false, it is guaranteed that
426     /// `Two::new` will return a `None` value.
427     ///
428     /// Note also that for the lifetime of a single program, if this returns
429     /// true then it will always return true.
430     #[inline]
is_available() -> bool431     pub fn is_available() -> bool {
432         #[cfg(target_feature = "simd128")]
433         {
434             true
435         }
436         #[cfg(not(target_feature = "simd128"))]
437         {
438             false
439         }
440     }
441 
442     /// Return the first occurrence of one of the needle bytes in the given
443     /// haystack. If no such occurrence exists, then `None` is returned.
444     ///
445     /// The occurrence is reported as an offset into `haystack`. Its maximum
446     /// value is `haystack.len() - 1`.
447     #[inline]
find(&self, haystack: &[u8]) -> Option<usize>448     pub fn find(&self, haystack: &[u8]) -> Option<usize> {
449         // SAFETY: `find_raw` guarantees that if a pointer is returned, it
450         // falls within the bounds of the start and end pointers.
451         unsafe {
452             generic::search_slice_with_raw(haystack, |s, e| {
453                 self.find_raw(s, e)
454             })
455         }
456     }
457 
458     /// Return the last occurrence of one of the needle bytes in the given
459     /// haystack. If no such occurrence exists, then `None` is returned.
460     ///
461     /// The occurrence is reported as an offset into `haystack`. Its maximum
462     /// value is `haystack.len() - 1`.
463     #[inline]
rfind(&self, haystack: &[u8]) -> Option<usize>464     pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
465         // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it
466         // falls within the bounds of the start and end pointers.
467         unsafe {
468             generic::search_slice_with_raw(haystack, |s, e| {
469                 self.rfind_raw(s, e)
470             })
471         }
472     }
473 
474     /// Like `find`, but accepts and returns raw pointers.
475     ///
476     /// When a match is found, the pointer returned is guaranteed to be
477     /// `>= start` and `< end`.
478     ///
479     /// This routine is useful if you're already using raw pointers and would
480     /// like to avoid converting back to a slice before executing a search.
481     ///
482     /// # Safety
483     ///
484     /// * Both `start` and `end` must be valid for reads.
485     /// * Both `start` and `end` must point to an initialized value.
486     /// * Both `start` and `end` must point to the same allocated object and
487     /// must either be in bounds or at most one byte past the end of the
488     /// allocated object.
489     /// * Both `start` and `end` must be _derived from_ a pointer to the same
490     /// object.
491     /// * The distance between `start` and `end` must not overflow `isize`.
492     /// * The distance being in bounds must not rely on "wrapping around" the
493     /// address space.
494     ///
495     /// Note that callers may pass a pair of pointers such that `start >= end`.
496     /// In that case, `None` will always be returned.
497     #[inline]
find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>498     pub unsafe fn find_raw(
499         &self,
500         start: *const u8,
501         end: *const u8,
502     ) -> Option<*const u8> {
503         if start >= end {
504             return None;
505         }
506         if end.distance(start) < v128::BYTES {
507             // SAFETY: We require the caller to pass valid start/end pointers.
508             return generic::fwd_byte_by_byte(start, end, |b| {
509                 b == self.0.needle1() || b == self.0.needle2()
510             });
511         }
512         // SAFETY: Building a `Two` means it's safe to call 'simd128' routines.
513         // Also, we've checked that our haystack is big enough to run on the
514         // vector routine. Pointer validity is caller's responsibility.
515         self.find_raw_impl(start, end)
516     }
517 
518     /// Like `rfind`, but accepts and returns raw pointers.
519     ///
520     /// When a match is found, the pointer returned is guaranteed to be
521     /// `>= start` and `< end`.
522     ///
523     /// This routine is useful if you're already using raw pointers and would
524     /// like to avoid converting back to a slice before executing a search.
525     ///
526     /// # Safety
527     ///
528     /// * Both `start` and `end` must be valid for reads.
529     /// * Both `start` and `end` must point to an initialized value.
530     /// * Both `start` and `end` must point to the same allocated object and
531     /// must either be in bounds or at most one byte past the end of the
532     /// allocated object.
533     /// * Both `start` and `end` must be _derived from_ a pointer to the same
534     /// object.
535     /// * The distance between `start` and `end` must not overflow `isize`.
536     /// * The distance being in bounds must not rely on "wrapping around" the
537     /// address space.
538     ///
539     /// Note that callers may pass a pair of pointers such that `start >= end`.
540     /// In that case, `None` will always be returned.
541     #[inline]
rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>542     pub unsafe fn rfind_raw(
543         &self,
544         start: *const u8,
545         end: *const u8,
546     ) -> Option<*const u8> {
547         if start >= end {
548             return None;
549         }
550         if end.distance(start) < v128::BYTES {
551             // SAFETY: We require the caller to pass valid start/end pointers.
552             return generic::rev_byte_by_byte(start, end, |b| {
553                 b == self.0.needle1() || b == self.0.needle2()
554             });
555         }
556         // SAFETY: Building a `Two` means it's safe to call 'simd128' routines.
557         // Also, we've checked that our haystack is big enough to run on the
558         // vector routine. Pointer validity is caller's responsibility.
559         self.rfind_raw_impl(start, end)
560     }
561 
562     /// Execute a search using simd128 vectors and routines.
563     ///
564     /// # Safety
565     ///
566     /// Same as [`Two::find_raw`], except the distance between `start` and
567     /// `end` must be at least the size of a simd128 vector (in bytes).
568     ///
569     /// (The target feature safety obligation is automatically fulfilled by
570     /// virtue of being a method on `Two`, which can only be constructed
571     /// when it is safe to call `simd128` routines.)
572     #[target_feature(enable = "simd128")]
573     #[inline]
find_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>574     unsafe fn find_raw_impl(
575         &self,
576         start: *const u8,
577         end: *const u8,
578     ) -> Option<*const u8> {
579         self.0.find_raw(start, end)
580     }
581 
582     /// Execute a search using simd128 vectors and routines.
583     ///
584     /// # Safety
585     ///
586     /// Same as [`Two::rfind_raw`], except the distance between `start` and
587     /// `end` must be at least the size of a simd128 vector (in bytes).
588     ///
589     /// (The target feature safety obligation is automatically fulfilled by
590     /// virtue of being a method on `Two`, which can only be constructed
591     /// when it is safe to call `simd128` routines.)
592     #[target_feature(enable = "simd128")]
593     #[inline]
rfind_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>594     unsafe fn rfind_raw_impl(
595         &self,
596         start: *const u8,
597         end: *const u8,
598     ) -> Option<*const u8> {
599         self.0.rfind_raw(start, end)
600     }
601 
602     /// Returns an iterator over all occurrences of the needle bytes in the
603     /// given haystack.
604     ///
605     /// The iterator returned implements `DoubleEndedIterator`. This means it
606     /// can also be used to find occurrences in reverse order.
607     #[inline]
iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h>608     pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> {
609         TwoIter { searcher: self, it: generic::Iter::new(haystack) }
610     }
611 }
612 
613 /// An iterator over all occurrences of two possible bytes in a haystack.
614 ///
615 /// This iterator implements `DoubleEndedIterator`, which means it can also be
616 /// used to find occurrences in reverse order.
617 ///
618 /// This iterator is created by the [`Two::iter`] method.
619 ///
620 /// The lifetime parameters are as follows:
621 ///
622 /// * `'a` refers to the lifetime of the underlying [`Two`] searcher.
623 /// * `'h` refers to the lifetime of the haystack being searched.
624 #[derive(Clone, Debug)]
625 pub struct TwoIter<'a, 'h> {
626     searcher: &'a Two,
627     it: generic::Iter<'h>,
628 }
629 
630 impl<'a, 'h> Iterator for TwoIter<'a, 'h> {
631     type Item = usize;
632 
633     #[inline]
next(&mut self) -> Option<usize>634     fn next(&mut self) -> Option<usize> {
635         // SAFETY: We rely on the generic iterator to provide valid start
636         // and end pointers, but we guarantee that any pointer returned by
637         // 'find_raw' falls within the bounds of the start and end pointer.
638         unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
639     }
640 
641     #[inline]
size_hint(&self) -> (usize, Option<usize>)642     fn size_hint(&self) -> (usize, Option<usize>) {
643         self.it.size_hint()
644     }
645 }
646 
647 impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> {
648     #[inline]
next_back(&mut self) -> Option<usize>649     fn next_back(&mut self) -> Option<usize> {
650         // SAFETY: We rely on the generic iterator to provide valid start
651         // and end pointers, but we guarantee that any pointer returned by
652         // 'rfind_raw' falls within the bounds of the start and end pointer.
653         unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
654     }
655 }
656 
657 impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {}
658 
659 /// Finds all occurrences of three bytes in a haystack.
660 ///
661 /// That is, this reports matches of one of three possible bytes. For example,
662 /// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets
663 /// `0`, `2`, `3`, `4` and `5`.
664 #[derive(Clone, Copy, Debug)]
665 pub struct Three(generic::Three<v128>);
666 
667 impl Three {
668     /// Create a new searcher that finds occurrences of the needle bytes given.
669     ///
670     /// This particular searcher is specialized to use simd128 vector
671     /// instructions that typically make it quite fast.
672     ///
673     /// If simd128 is unavailable in the current environment, then `None` is
674     /// returned.
675     #[inline]
new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three>676     pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three> {
677         if Three::is_available() {
678             // SAFETY: we check that simd128 is available above.
679             unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) }
680         } else {
681             None
682         }
683     }
684 
685     /// Create a new finder specific to simd128 vectors and routines without
686     /// checking that simd128 is available.
687     ///
688     /// # Safety
689     ///
690     /// Callers must guarantee that it is safe to execute `simd128`
691     /// instructions in the current environment.
692     #[target_feature(enable = "simd128")]
693     #[inline]
new_unchecked( needle1: u8, needle2: u8, needle3: u8, ) -> Three694     pub unsafe fn new_unchecked(
695         needle1: u8,
696         needle2: u8,
697         needle3: u8,
698     ) -> Three {
699         Three(generic::Three::new(needle1, needle2, needle3))
700     }
701 
702     /// Returns true when this implementation is available in the current
703     /// environment.
704     ///
705     /// When this is true, it is guaranteed that [`Three::new`] will return
706     /// a `Some` value. Similarly, when it is false, it is guaranteed that
707     /// `Three::new` will return a `None` value.
708     ///
709     /// Note also that for the lifetime of a single program, if this returns
710     /// true then it will always return true.
711     #[inline]
is_available() -> bool712     pub fn is_available() -> bool {
713         #[cfg(target_feature = "simd128")]
714         {
715             true
716         }
717         #[cfg(not(target_feature = "simd128"))]
718         {
719             false
720         }
721     }
722 
723     /// Return the first occurrence of one of the needle bytes in the given
724     /// haystack. If no such occurrence exists, then `None` is returned.
725     ///
726     /// The occurrence is reported as an offset into `haystack`. Its maximum
727     /// value is `haystack.len() - 1`.
728     #[inline]
find(&self, haystack: &[u8]) -> Option<usize>729     pub fn find(&self, haystack: &[u8]) -> Option<usize> {
730         // SAFETY: `find_raw` guarantees that if a pointer is returned, it
731         // falls within the bounds of the start and end pointers.
732         unsafe {
733             generic::search_slice_with_raw(haystack, |s, e| {
734                 self.find_raw(s, e)
735             })
736         }
737     }
738 
739     /// Return the last occurrence of one of the needle bytes in the given
740     /// haystack. If no such occurrence exists, then `None` is returned.
741     ///
742     /// The occurrence is reported as an offset into `haystack`. Its maximum
743     /// value is `haystack.len() - 1`.
744     #[inline]
rfind(&self, haystack: &[u8]) -> Option<usize>745     pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
746         // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it
747         // falls within the bounds of the start and end pointers.
748         unsafe {
749             generic::search_slice_with_raw(haystack, |s, e| {
750                 self.rfind_raw(s, e)
751             })
752         }
753     }
754 
755     /// Like `find`, but accepts and returns raw pointers.
756     ///
757     /// When a match is found, the pointer returned is guaranteed to be
758     /// `>= start` and `< end`.
759     ///
760     /// This routine is useful if you're already using raw pointers and would
761     /// like to avoid converting back to a slice before executing a search.
762     ///
763     /// # Safety
764     ///
765     /// * Both `start` and `end` must be valid for reads.
766     /// * Both `start` and `end` must point to an initialized value.
767     /// * Both `start` and `end` must point to the same allocated object and
768     /// must either be in bounds or at most one byte past the end of the
769     /// allocated object.
770     /// * Both `start` and `end` must be _derived from_ a pointer to the same
771     /// object.
772     /// * The distance between `start` and `end` must not overflow `isize`.
773     /// * The distance being in bounds must not rely on "wrapping around" the
774     /// address space.
775     ///
776     /// Note that callers may pass a pair of pointers such that `start >= end`.
777     /// In that case, `None` will always be returned.
778     #[inline]
find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>779     pub unsafe fn find_raw(
780         &self,
781         start: *const u8,
782         end: *const u8,
783     ) -> Option<*const u8> {
784         if start >= end {
785             return None;
786         }
787         if end.distance(start) < v128::BYTES {
788             // SAFETY: We require the caller to pass valid start/end pointers.
789             return generic::fwd_byte_by_byte(start, end, |b| {
790                 b == self.0.needle1()
791                     || b == self.0.needle2()
792                     || b == self.0.needle3()
793             });
794         }
795         // SAFETY: Building a `Three` means it's safe to call 'simd128'
796         // routines. Also, we've checked that our haystack is big enough to run
797         // on the vector routine. Pointer validity is caller's responsibility.
798         self.find_raw_impl(start, end)
799     }
800 
801     /// Like `rfind`, but accepts and returns raw pointers.
802     ///
803     /// When a match is found, the pointer returned is guaranteed to be
804     /// `>= start` and `< end`.
805     ///
806     /// This routine is useful if you're already using raw pointers and would
807     /// like to avoid converting back to a slice before executing a search.
808     ///
809     /// # Safety
810     ///
811     /// * Both `start` and `end` must be valid for reads.
812     /// * Both `start` and `end` must point to an initialized value.
813     /// * Both `start` and `end` must point to the same allocated object and
814     /// must either be in bounds or at most one byte past the end of the
815     /// allocated object.
816     /// * Both `start` and `end` must be _derived from_ a pointer to the same
817     /// object.
818     /// * The distance between `start` and `end` must not overflow `isize`.
819     /// * The distance being in bounds must not rely on "wrapping around" the
820     /// address space.
821     ///
822     /// Note that callers may pass a pair of pointers such that `start >= end`.
823     /// In that case, `None` will always be returned.
824     #[inline]
rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>825     pub unsafe fn rfind_raw(
826         &self,
827         start: *const u8,
828         end: *const u8,
829     ) -> Option<*const u8> {
830         if start >= end {
831             return None;
832         }
833         if end.distance(start) < v128::BYTES {
834             // SAFETY: We require the caller to pass valid start/end pointers.
835             return generic::rev_byte_by_byte(start, end, |b| {
836                 b == self.0.needle1()
837                     || b == self.0.needle2()
838                     || b == self.0.needle3()
839             });
840         }
841         // SAFETY: Building a `Three` means it's safe to call 'simd128'
842         // routines. Also, we've checked that our haystack is big enough to run
843         // on the vector routine. Pointer validity is caller's responsibility.
844         self.rfind_raw_impl(start, end)
845     }
846 
847     /// Execute a search using simd128 vectors and routines.
848     ///
849     /// # Safety
850     ///
851     /// Same as [`Three::find_raw`], except the distance between `start` and
852     /// `end` must be at least the size of a simd128 vector (in bytes).
853     ///
854     /// (The target feature safety obligation is automatically fulfilled by
855     /// virtue of being a method on `Three`, which can only be constructed
856     /// when it is safe to call `simd128` routines.)
857     #[target_feature(enable = "simd128")]
858     #[inline]
find_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>859     unsafe fn find_raw_impl(
860         &self,
861         start: *const u8,
862         end: *const u8,
863     ) -> Option<*const u8> {
864         self.0.find_raw(start, end)
865     }
866 
867     /// Execute a search using simd128 vectors and routines.
868     ///
869     /// # Safety
870     ///
871     /// Same as [`Three::rfind_raw`], except the distance between `start` and
872     /// `end` must be at least the size of a simd128 vector (in bytes).
873     ///
874     /// (The target feature safety obligation is automatically fulfilled by
875     /// virtue of being a method on `Three`, which can only be constructed
876     /// when it is safe to call `simd128` routines.)
877     #[target_feature(enable = "simd128")]
878     #[inline]
rfind_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>879     unsafe fn rfind_raw_impl(
880         &self,
881         start: *const u8,
882         end: *const u8,
883     ) -> Option<*const u8> {
884         self.0.rfind_raw(start, end)
885     }
886 
887     /// Returns an iterator over all occurrences of the needle byte in the
888     /// given haystack.
889     ///
890     /// The iterator returned implements `DoubleEndedIterator`. This means it
891     /// can also be used to find occurrences in reverse order.
892     #[inline]
iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h>893     pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> {
894         ThreeIter { searcher: self, it: generic::Iter::new(haystack) }
895     }
896 }
897 
898 /// An iterator over all occurrences of three possible bytes in a haystack.
899 ///
900 /// This iterator implements `DoubleEndedIterator`, which means it can also be
901 /// used to find occurrences in reverse order.
902 ///
903 /// This iterator is created by the [`Three::iter`] method.
904 ///
905 /// The lifetime parameters are as follows:
906 ///
907 /// * `'a` refers to the lifetime of the underlying [`Three`] searcher.
908 /// * `'h` refers to the lifetime of the haystack being searched.
909 #[derive(Clone, Debug)]
910 pub struct ThreeIter<'a, 'h> {
911     searcher: &'a Three,
912     it: generic::Iter<'h>,
913 }
914 
915 impl<'a, 'h> Iterator for ThreeIter<'a, 'h> {
916     type Item = usize;
917 
918     #[inline]
next(&mut self) -> Option<usize>919     fn next(&mut self) -> Option<usize> {
920         // SAFETY: We rely on the generic iterator to provide valid start
921         // and end pointers, but we guarantee that any pointer returned by
922         // 'find_raw' falls within the bounds of the start and end pointer.
923         unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
924     }
925 
926     #[inline]
size_hint(&self) -> (usize, Option<usize>)927     fn size_hint(&self) -> (usize, Option<usize>) {
928         self.it.size_hint()
929     }
930 }
931 
932 impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> {
933     #[inline]
next_back(&mut self) -> Option<usize>934     fn next_back(&mut self) -> Option<usize> {
935         // SAFETY: We rely on the generic iterator to provide valid start
936         // and end pointers, but we guarantee that any pointer returned by
937         // 'rfind_raw' falls within the bounds of the start and end pointer.
938         unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
939     }
940 }
941 
942 impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {}
943 
944 #[cfg(test)]
945 mod tests {
946     use super::*;
947 
948     define_memchr_quickcheck!(super);
949 
950     #[test]
forward_one()951     fn forward_one() {
952         crate::tests::memchr::Runner::new(1).forward_iter(
953             |haystack, needles| {
954                 Some(One::new(needles[0])?.iter(haystack).collect())
955             },
956         )
957     }
958 
959     #[test]
reverse_one()960     fn reverse_one() {
961         crate::tests::memchr::Runner::new(1).reverse_iter(
962             |haystack, needles| {
963                 Some(One::new(needles[0])?.iter(haystack).rev().collect())
964             },
965         )
966     }
967 
968     #[test]
count_one()969     fn count_one() {
970         crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| {
971             Some(One::new(needles[0])?.iter(haystack).count())
972         })
973     }
974 
975     #[test]
forward_two()976     fn forward_two() {
977         crate::tests::memchr::Runner::new(2).forward_iter(
978             |haystack, needles| {
979                 let n1 = needles.get(0).copied()?;
980                 let n2 = needles.get(1).copied()?;
981                 Some(Two::new(n1, n2)?.iter(haystack).collect())
982             },
983         )
984     }
985 
986     #[test]
reverse_two()987     fn reverse_two() {
988         crate::tests::memchr::Runner::new(2).reverse_iter(
989             |haystack, needles| {
990                 let n1 = needles.get(0).copied()?;
991                 let n2 = needles.get(1).copied()?;
992                 Some(Two::new(n1, n2)?.iter(haystack).rev().collect())
993             },
994         )
995     }
996 
997     #[test]
forward_three()998     fn forward_three() {
999         crate::tests::memchr::Runner::new(3).forward_iter(
1000             |haystack, needles| {
1001                 let n1 = needles.get(0).copied()?;
1002                 let n2 = needles.get(1).copied()?;
1003                 let n3 = needles.get(2).copied()?;
1004                 Some(Three::new(n1, n2, n3)?.iter(haystack).collect())
1005             },
1006         )
1007     }
1008 
1009     #[test]
reverse_three()1010     fn reverse_three() {
1011         crate::tests::memchr::Runner::new(3).reverse_iter(
1012             |haystack, needles| {
1013                 let n1 = needles.get(0).copied()?;
1014                 let n2 = needles.get(1).copied()?;
1015                 let n3 = needles.get(2).copied()?;
1016                 Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect())
1017             },
1018         )
1019     }
1020 }
1021