1 /*!
2 This module defines 128-bit vector implementations of `memchr` and friends.
3 
4 The main types in this module are [`One`], [`Two`] and [`Three`]. They are for
5 searching for one, two or three distinct bytes, respectively, in a haystack.
6 Each type also has corresponding double ended iterators. These searchers are
7 typically much faster than scalar routines accomplishing the same task.
8 
9 The `One` searcher also provides a [`One::count`] routine for efficiently
10 counting the number of times a single byte occurs in a haystack. This is
11 useful, for example, for counting the number of lines in a haystack. This
12 routine exists because it is usually faster, especially with a high match
13 count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its
14 `Iterator::count` implementation to use this routine.)
15 
16 Only one, two and three bytes are supported because three bytes is about
17 the point where one sees diminishing returns. Beyond this point and it's
18 probably (but not necessarily) better to just use a simple `[bool; 256]` array
19 or similar. However, it depends mightily on the specific work-load and the
20 expected match frequency.
21 */
22 
23 use core::arch::x86_64::__m128i;
24 
25 use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector};
26 
27 /// Finds all occurrences of a single byte in a haystack.
28 #[derive(Clone, Copy, Debug)]
29 pub struct One(generic::One<__m128i>);
30 
31 impl One {
32     /// Create a new searcher that finds occurrences of the needle byte given.
33     ///
34     /// This particular searcher is specialized to use SSE2 vector instructions
35     /// that typically make it quite fast.
36     ///
37     /// If SSE2 is unavailable in the current environment, then `None` is
38     /// returned.
39     #[inline]
new(needle: u8) -> Option<One>40     pub fn new(needle: u8) -> Option<One> {
41         if One::is_available() {
42             // SAFETY: we check that sse2 is available above.
43             unsafe { Some(One::new_unchecked(needle)) }
44         } else {
45             None
46         }
47     }
48 
49     /// Create a new finder specific to SSE2 vectors and routines without
50     /// checking that SSE2 is available.
51     ///
52     /// # Safety
53     ///
54     /// Callers must guarantee that it is safe to execute `sse2` instructions
55     /// in the current environment.
56     ///
57     /// Note that it is a common misconception that if one compiles for an
58     /// `x86_64` target, then they therefore automatically have access to SSE2
59     /// instructions. While this is almost always the case, it isn't true in
60     /// 100% of cases.
61     #[target_feature(enable = "sse2")]
62     #[inline]
new_unchecked(needle: u8) -> One63     pub unsafe fn new_unchecked(needle: u8) -> One {
64         One(generic::One::new(needle))
65     }
66 
67     /// Returns true when this implementation is available in the current
68     /// environment.
69     ///
70     /// When this is true, it is guaranteed that [`One::new`] will return
71     /// a `Some` value. Similarly, when it is false, it is guaranteed that
72     /// `One::new` will return a `None` value.
73     ///
74     /// Note also that for the lifetime of a single program, if this returns
75     /// true then it will always return true.
76     #[inline]
is_available() -> bool77     pub fn is_available() -> bool {
78         #[cfg(target_feature = "sse2")]
79         {
80             true
81         }
82         #[cfg(not(target_feature = "sse2"))]
83         {
84             false
85         }
86     }
87 
88     /// Return the first occurrence of one of the needle bytes in the given
89     /// haystack. If no such occurrence exists, then `None` is returned.
90     ///
91     /// The occurrence is reported as an offset into `haystack`. Its maximum
92     /// value is `haystack.len() - 1`.
93     #[inline]
find(&self, haystack: &[u8]) -> Option<usize>94     pub fn find(&self, haystack: &[u8]) -> Option<usize> {
95         // SAFETY: `find_raw` guarantees that if a pointer is returned, it
96         // falls within the bounds of the start and end pointers.
97         unsafe {
98             generic::search_slice_with_raw(haystack, |s, e| {
99                 self.find_raw(s, e)
100             })
101         }
102     }
103 
104     /// Return the last occurrence of one of the needle bytes in the given
105     /// haystack. If no such occurrence exists, then `None` is returned.
106     ///
107     /// The occurrence is reported as an offset into `haystack`. Its maximum
108     /// value is `haystack.len() - 1`.
109     #[inline]
rfind(&self, haystack: &[u8]) -> Option<usize>110     pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
111         // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it
112         // falls within the bounds of the start and end pointers.
113         unsafe {
114             generic::search_slice_with_raw(haystack, |s, e| {
115                 self.rfind_raw(s, e)
116             })
117         }
118     }
119 
120     /// Counts all occurrences of this byte in the given haystack.
121     #[inline]
count(&self, haystack: &[u8]) -> usize122     pub fn count(&self, haystack: &[u8]) -> usize {
123         // SAFETY: All of our pointers are derived directly from a borrowed
124         // slice, which is guaranteed to be valid.
125         unsafe {
126             let start = haystack.as_ptr();
127             let end = start.add(haystack.len());
128             self.count_raw(start, end)
129         }
130     }
131 
132     /// Like `find`, but accepts and returns raw pointers.
133     ///
134     /// When a match is found, the pointer returned is guaranteed to be
135     /// `>= start` and `< end`.
136     ///
137     /// This routine is useful if you're already using raw pointers and would
138     /// like to avoid converting back to a slice before executing a search.
139     ///
140     /// # Safety
141     ///
142     /// * Both `start` and `end` must be valid for reads.
143     /// * Both `start` and `end` must point to an initialized value.
144     /// * Both `start` and `end` must point to the same allocated object and
145     /// must either be in bounds or at most one byte past the end of the
146     /// allocated object.
147     /// * Both `start` and `end` must be _derived from_ a pointer to the same
148     /// object.
149     /// * The distance between `start` and `end` must not overflow `isize`.
150     /// * The distance being in bounds must not rely on "wrapping around" the
151     /// address space.
152     ///
153     /// Note that callers may pass a pair of pointers such that `start >= end`.
154     /// In that case, `None` will always be returned.
155     #[inline]
find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>156     pub unsafe fn find_raw(
157         &self,
158         start: *const u8,
159         end: *const u8,
160     ) -> Option<*const u8> {
161         if start >= end {
162             return None;
163         }
164         if end.distance(start) < __m128i::BYTES {
165             // SAFETY: We require the caller to pass valid start/end pointers.
166             return generic::fwd_byte_by_byte(start, end, |b| {
167                 b == self.0.needle1()
168             });
169         }
170         // SAFETY: Building a `One` means it's safe to call 'sse2' routines.
171         // Also, we've checked that our haystack is big enough to run on the
172         // vector routine. Pointer validity is caller's responsibility.
173         //
174         // Note that we could call `self.0.find_raw` directly here. But that
175         // means we'd have to annotate this routine with `target_feature`.
176         // Which is fine, because this routine is `unsafe` anyway and the
177         // `target_feature` obligation is met by virtue of building a `One`.
178         // The real problem is that a routine with a `target_feature`
179         // annotation generally can't be inlined into caller code unless the
180         // caller code has the same target feature annotations. Which is maybe
181         // okay for SSE2, but we do the same thing for AVX2 where caller code
182         // probably usually doesn't have AVX2 enabled. That means that this
183         // routine can be inlined which will handle some of the short-haystack
184         // cases above without touching the architecture specific code.
185         self.find_raw_impl(start, end)
186     }
187 
188     /// Like `rfind`, but accepts and returns raw pointers.
189     ///
190     /// When a match is found, the pointer returned is guaranteed to be
191     /// `>= start` and `< end`.
192     ///
193     /// This routine is useful if you're already using raw pointers and would
194     /// like to avoid converting back to a slice before executing a search.
195     ///
196     /// # Safety
197     ///
198     /// * Both `start` and `end` must be valid for reads.
199     /// * Both `start` and `end` must point to an initialized value.
200     /// * Both `start` and `end` must point to the same allocated object and
201     /// must either be in bounds or at most one byte past the end of the
202     /// allocated object.
203     /// * Both `start` and `end` must be _derived from_ a pointer to the same
204     /// object.
205     /// * The distance between `start` and `end` must not overflow `isize`.
206     /// * The distance being in bounds must not rely on "wrapping around" the
207     /// address space.
208     ///
209     /// Note that callers may pass a pair of pointers such that `start >= end`.
210     /// In that case, `None` will always be returned.
211     #[inline]
rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>212     pub unsafe fn rfind_raw(
213         &self,
214         start: *const u8,
215         end: *const u8,
216     ) -> Option<*const u8> {
217         if start >= end {
218             return None;
219         }
220         if end.distance(start) < __m128i::BYTES {
221             // SAFETY: We require the caller to pass valid start/end pointers.
222             return generic::rev_byte_by_byte(start, end, |b| {
223                 b == self.0.needle1()
224             });
225         }
226         // SAFETY: Building a `One` means it's safe to call 'sse2' routines.
227         // Also, we've checked that our haystack is big enough to run on the
228         // vector routine. Pointer validity is caller's responsibility.
229         //
230         // See note in forward routine above for why we don't just call
231         // `self.0.rfind_raw` directly here.
232         self.rfind_raw_impl(start, end)
233     }
234 
235     /// Counts all occurrences of this byte in the given haystack represented
236     /// by raw pointers.
237     ///
238     /// This routine is useful if you're already using raw pointers and would
239     /// like to avoid converting back to a slice before executing a search.
240     ///
241     /// # Safety
242     ///
243     /// * Both `start` and `end` must be valid for reads.
244     /// * Both `start` and `end` must point to an initialized value.
245     /// * Both `start` and `end` must point to the same allocated object and
246     /// must either be in bounds or at most one byte past the end of the
247     /// allocated object.
248     /// * Both `start` and `end` must be _derived from_ a pointer to the same
249     /// object.
250     /// * The distance between `start` and `end` must not overflow `isize`.
251     /// * The distance being in bounds must not rely on "wrapping around" the
252     /// address space.
253     ///
254     /// Note that callers may pass a pair of pointers such that `start >= end`.
255     /// In that case, `0` will always be returned.
256     #[inline]
count_raw(&self, start: *const u8, end: *const u8) -> usize257     pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize {
258         if start >= end {
259             return 0;
260         }
261         if end.distance(start) < __m128i::BYTES {
262             // SAFETY: We require the caller to pass valid start/end pointers.
263             return generic::count_byte_by_byte(start, end, |b| {
264                 b == self.0.needle1()
265             });
266         }
267         // SAFETY: Building a `One` means it's safe to call 'sse2' routines.
268         // Also, we've checked that our haystack is big enough to run on the
269         // vector routine. Pointer validity is caller's responsibility.
270         self.count_raw_impl(start, end)
271     }
272 
273     /// Execute a search using SSE2 vectors and routines.
274     ///
275     /// # Safety
276     ///
277     /// Same as [`One::find_raw`], except the distance between `start` and
278     /// `end` must be at least the size of an SSE2 vector (in bytes).
279     ///
280     /// (The target feature safety obligation is automatically fulfilled by
281     /// virtue of being a method on `One`, which can only be constructed
282     /// when it is safe to call `sse2` routines.)
283     #[target_feature(enable = "sse2")]
284     #[inline]
find_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>285     unsafe fn find_raw_impl(
286         &self,
287         start: *const u8,
288         end: *const u8,
289     ) -> Option<*const u8> {
290         self.0.find_raw(start, end)
291     }
292 
293     /// Execute a search using SSE2 vectors and routines.
294     ///
295     /// # Safety
296     ///
297     /// Same as [`One::rfind_raw`], except the distance between `start` and
298     /// `end` must be at least the size of an SSE2 vector (in bytes).
299     ///
300     /// (The target feature safety obligation is automatically fulfilled by
301     /// virtue of being a method on `One`, which can only be constructed
302     /// when it is safe to call `sse2` routines.)
303     #[target_feature(enable = "sse2")]
304     #[inline]
rfind_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>305     unsafe fn rfind_raw_impl(
306         &self,
307         start: *const u8,
308         end: *const u8,
309     ) -> Option<*const u8> {
310         self.0.rfind_raw(start, end)
311     }
312 
313     /// Execute a count using SSE2 vectors and routines.
314     ///
315     /// # Safety
316     ///
317     /// Same as [`One::count_raw`], except the distance between `start` and
318     /// `end` must be at least the size of an SSE2 vector (in bytes).
319     ///
320     /// (The target feature safety obligation is automatically fulfilled by
321     /// virtue of being a method on `One`, which can only be constructed
322     /// when it is safe to call `sse2` routines.)
323     #[target_feature(enable = "sse2")]
324     #[inline]
count_raw_impl( &self, start: *const u8, end: *const u8, ) -> usize325     unsafe fn count_raw_impl(
326         &self,
327         start: *const u8,
328         end: *const u8,
329     ) -> usize {
330         self.0.count_raw(start, end)
331     }
332 
333     /// Returns an iterator over all occurrences of the needle byte in the
334     /// given haystack.
335     ///
336     /// The iterator returned implements `DoubleEndedIterator`. This means it
337     /// can also be used to find occurrences in reverse order.
338     #[inline]
iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h>339     pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> {
340         OneIter { searcher: self, it: generic::Iter::new(haystack) }
341     }
342 }
343 
344 /// An iterator over all occurrences of a single byte in a haystack.
345 ///
346 /// This iterator implements `DoubleEndedIterator`, which means it can also be
347 /// used to find occurrences in reverse order.
348 ///
349 /// This iterator is created by the [`One::iter`] method.
350 ///
351 /// The lifetime parameters are as follows:
352 ///
353 /// * `'a` refers to the lifetime of the underlying [`One`] searcher.
354 /// * `'h` refers to the lifetime of the haystack being searched.
355 #[derive(Clone, Debug)]
356 pub struct OneIter<'a, 'h> {
357     searcher: &'a One,
358     it: generic::Iter<'h>,
359 }
360 
361 impl<'a, 'h> Iterator for OneIter<'a, 'h> {
362     type Item = usize;
363 
364     #[inline]
next(&mut self) -> Option<usize>365     fn next(&mut self) -> Option<usize> {
366         // SAFETY: We rely on the generic iterator to provide valid start
367         // and end pointers, but we guarantee that any pointer returned by
368         // 'find_raw' falls within the bounds of the start and end pointer.
369         unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
370     }
371 
372     #[inline]
count(self) -> usize373     fn count(self) -> usize {
374         self.it.count(|s, e| {
375             // SAFETY: We rely on our generic iterator to return valid start
376             // and end pointers.
377             unsafe { self.searcher.count_raw(s, e) }
378         })
379     }
380 
381     #[inline]
size_hint(&self) -> (usize, Option<usize>)382     fn size_hint(&self) -> (usize, Option<usize>) {
383         self.it.size_hint()
384     }
385 }
386 
387 impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> {
388     #[inline]
next_back(&mut self) -> Option<usize>389     fn next_back(&mut self) -> Option<usize> {
390         // SAFETY: We rely on the generic iterator to provide valid start
391         // and end pointers, but we guarantee that any pointer returned by
392         // 'rfind_raw' falls within the bounds of the start and end pointer.
393         unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
394     }
395 }
396 
397 impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {}
398 
399 /// Finds all occurrences of two bytes in a haystack.
400 ///
401 /// That is, this reports matches of one of two possible bytes. For example,
402 /// searching for `a` or `b` in `afoobar` would report matches at offsets `0`,
403 /// `4` and `5`.
404 #[derive(Clone, Copy, Debug)]
405 pub struct Two(generic::Two<__m128i>);
406 
407 impl Two {
408     /// Create a new searcher that finds occurrences of the needle bytes given.
409     ///
410     /// This particular searcher is specialized to use SSE2 vector instructions
411     /// that typically make it quite fast.
412     ///
413     /// If SSE2 is unavailable in the current environment, then `None` is
414     /// returned.
415     #[inline]
new(needle1: u8, needle2: u8) -> Option<Two>416     pub fn new(needle1: u8, needle2: u8) -> Option<Two> {
417         if Two::is_available() {
418             // SAFETY: we check that sse2 is available above.
419             unsafe { Some(Two::new_unchecked(needle1, needle2)) }
420         } else {
421             None
422         }
423     }
424 
425     /// Create a new finder specific to SSE2 vectors and routines without
426     /// checking that SSE2 is available.
427     ///
428     /// # Safety
429     ///
430     /// Callers must guarantee that it is safe to execute `sse2` instructions
431     /// in the current environment.
432     ///
433     /// Note that it is a common misconception that if one compiles for an
434     /// `x86_64` target, then they therefore automatically have access to SSE2
435     /// instructions. While this is almost always the case, it isn't true in
436     /// 100% of cases.
437     #[target_feature(enable = "sse2")]
438     #[inline]
new_unchecked(needle1: u8, needle2: u8) -> Two439     pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two {
440         Two(generic::Two::new(needle1, needle2))
441     }
442 
443     /// Returns true when this implementation is available in the current
444     /// environment.
445     ///
446     /// When this is true, it is guaranteed that [`Two::new`] will return
447     /// a `Some` value. Similarly, when it is false, it is guaranteed that
448     /// `Two::new` will return a `None` value.
449     ///
450     /// Note also that for the lifetime of a single program, if this returns
451     /// true then it will always return true.
452     #[inline]
is_available() -> bool453     pub fn is_available() -> bool {
454         #[cfg(target_feature = "sse2")]
455         {
456             true
457         }
458         #[cfg(not(target_feature = "sse2"))]
459         {
460             false
461         }
462     }
463 
464     /// Return the first occurrence of one of the needle bytes in the given
465     /// haystack. If no such occurrence exists, then `None` is returned.
466     ///
467     /// The occurrence is reported as an offset into `haystack`. Its maximum
468     /// value is `haystack.len() - 1`.
469     #[inline]
find(&self, haystack: &[u8]) -> Option<usize>470     pub fn find(&self, haystack: &[u8]) -> Option<usize> {
471         // SAFETY: `find_raw` guarantees that if a pointer is returned, it
472         // falls within the bounds of the start and end pointers.
473         unsafe {
474             generic::search_slice_with_raw(haystack, |s, e| {
475                 self.find_raw(s, e)
476             })
477         }
478     }
479 
480     /// Return the last occurrence of one of the needle bytes in the given
481     /// haystack. If no such occurrence exists, then `None` is returned.
482     ///
483     /// The occurrence is reported as an offset into `haystack`. Its maximum
484     /// value is `haystack.len() - 1`.
485     #[inline]
rfind(&self, haystack: &[u8]) -> Option<usize>486     pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
487         // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it
488         // falls within the bounds of the start and end pointers.
489         unsafe {
490             generic::search_slice_with_raw(haystack, |s, e| {
491                 self.rfind_raw(s, e)
492             })
493         }
494     }
495 
496     /// Like `find`, but accepts and returns raw pointers.
497     ///
498     /// When a match is found, the pointer returned is guaranteed to be
499     /// `>= start` and `< end`.
500     ///
501     /// This routine is useful if you're already using raw pointers and would
502     /// like to avoid converting back to a slice before executing a search.
503     ///
504     /// # Safety
505     ///
506     /// * Both `start` and `end` must be valid for reads.
507     /// * Both `start` and `end` must point to an initialized value.
508     /// * Both `start` and `end` must point to the same allocated object and
509     /// must either be in bounds or at most one byte past the end of the
510     /// allocated object.
511     /// * Both `start` and `end` must be _derived from_ a pointer to the same
512     /// object.
513     /// * The distance between `start` and `end` must not overflow `isize`.
514     /// * The distance being in bounds must not rely on "wrapping around" the
515     /// address space.
516     ///
517     /// Note that callers may pass a pair of pointers such that `start >= end`.
518     /// In that case, `None` will always be returned.
519     #[inline]
find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>520     pub unsafe fn find_raw(
521         &self,
522         start: *const u8,
523         end: *const u8,
524     ) -> Option<*const u8> {
525         if start >= end {
526             return None;
527         }
528         if end.distance(start) < __m128i::BYTES {
529             // SAFETY: We require the caller to pass valid start/end pointers.
530             return generic::fwd_byte_by_byte(start, end, |b| {
531                 b == self.0.needle1() || b == self.0.needle2()
532             });
533         }
534         // SAFETY: Building a `Two` means it's safe to call 'sse2' routines.
535         // Also, we've checked that our haystack is big enough to run on the
536         // vector routine. Pointer validity is caller's responsibility.
537         //
538         // Note that we could call `self.0.find_raw` directly here. But that
539         // means we'd have to annotate this routine with `target_feature`.
540         // Which is fine, because this routine is `unsafe` anyway and the
541         // `target_feature` obligation is met by virtue of building a `Two`.
542         // The real problem is that a routine with a `target_feature`
543         // annotation generally can't be inlined into caller code unless the
544         // caller code has the same target feature annotations. Which is maybe
545         // okay for SSE2, but we do the same thing for AVX2 where caller code
546         // probably usually doesn't have AVX2 enabled. That means that this
547         // routine can be inlined which will handle some of the short-haystack
548         // cases above without touching the architecture specific code.
549         self.find_raw_impl(start, end)
550     }
551 
552     /// Like `rfind`, but accepts and returns raw pointers.
553     ///
554     /// When a match is found, the pointer returned is guaranteed to be
555     /// `>= start` and `< end`.
556     ///
557     /// This routine is useful if you're already using raw pointers and would
558     /// like to avoid converting back to a slice before executing a search.
559     ///
560     /// # Safety
561     ///
562     /// * Both `start` and `end` must be valid for reads.
563     /// * Both `start` and `end` must point to an initialized value.
564     /// * Both `start` and `end` must point to the same allocated object and
565     /// must either be in bounds or at most one byte past the end of the
566     /// allocated object.
567     /// * Both `start` and `end` must be _derived from_ a pointer to the same
568     /// object.
569     /// * The distance between `start` and `end` must not overflow `isize`.
570     /// * The distance being in bounds must not rely on "wrapping around" the
571     /// address space.
572     ///
573     /// Note that callers may pass a pair of pointers such that `start >= end`.
574     /// In that case, `None` will always be returned.
575     #[inline]
rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>576     pub unsafe fn rfind_raw(
577         &self,
578         start: *const u8,
579         end: *const u8,
580     ) -> Option<*const u8> {
581         if start >= end {
582             return None;
583         }
584         if end.distance(start) < __m128i::BYTES {
585             // SAFETY: We require the caller to pass valid start/end pointers.
586             return generic::rev_byte_by_byte(start, end, |b| {
587                 b == self.0.needle1() || b == self.0.needle2()
588             });
589         }
590         // SAFETY: Building a `Two` means it's safe to call 'sse2' routines.
591         // Also, we've checked that our haystack is big enough to run on the
592         // vector routine. Pointer validity is caller's responsibility.
593         //
594         // See note in forward routine above for why we don't just call
595         // `self.0.rfind_raw` directly here.
596         self.rfind_raw_impl(start, end)
597     }
598 
599     /// Execute a search using SSE2 vectors and routines.
600     ///
601     /// # Safety
602     ///
603     /// Same as [`Two::find_raw`], except the distance between `start` and
604     /// `end` must be at least the size of an SSE2 vector (in bytes).
605     ///
606     /// (The target feature safety obligation is automatically fulfilled by
607     /// virtue of being a method on `Two`, which can only be constructed
608     /// when it is safe to call `sse2` routines.)
609     #[target_feature(enable = "sse2")]
610     #[inline]
find_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>611     unsafe fn find_raw_impl(
612         &self,
613         start: *const u8,
614         end: *const u8,
615     ) -> Option<*const u8> {
616         self.0.find_raw(start, end)
617     }
618 
619     /// Execute a search using SSE2 vectors and routines.
620     ///
621     /// # Safety
622     ///
623     /// Same as [`Two::rfind_raw`], except the distance between `start` and
624     /// `end` must be at least the size of an SSE2 vector (in bytes).
625     ///
626     /// (The target feature safety obligation is automatically fulfilled by
627     /// virtue of being a method on `Two`, which can only be constructed
628     /// when it is safe to call `sse2` routines.)
629     #[target_feature(enable = "sse2")]
630     #[inline]
rfind_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>631     unsafe fn rfind_raw_impl(
632         &self,
633         start: *const u8,
634         end: *const u8,
635     ) -> Option<*const u8> {
636         self.0.rfind_raw(start, end)
637     }
638 
639     /// Returns an iterator over all occurrences of the needle bytes in the
640     /// given haystack.
641     ///
642     /// The iterator returned implements `DoubleEndedIterator`. This means it
643     /// can also be used to find occurrences in reverse order.
644     #[inline]
iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h>645     pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> {
646         TwoIter { searcher: self, it: generic::Iter::new(haystack) }
647     }
648 }
649 
650 /// An iterator over all occurrences of two possible bytes in a haystack.
651 ///
652 /// This iterator implements `DoubleEndedIterator`, which means it can also be
653 /// used to find occurrences in reverse order.
654 ///
655 /// This iterator is created by the [`Two::iter`] method.
656 ///
657 /// The lifetime parameters are as follows:
658 ///
659 /// * `'a` refers to the lifetime of the underlying [`Two`] searcher.
660 /// * `'h` refers to the lifetime of the haystack being searched.
661 #[derive(Clone, Debug)]
662 pub struct TwoIter<'a, 'h> {
663     searcher: &'a Two,
664     it: generic::Iter<'h>,
665 }
666 
667 impl<'a, 'h> Iterator for TwoIter<'a, 'h> {
668     type Item = usize;
669 
670     #[inline]
next(&mut self) -> Option<usize>671     fn next(&mut self) -> Option<usize> {
672         // SAFETY: We rely on the generic iterator to provide valid start
673         // and end pointers, but we guarantee that any pointer returned by
674         // 'find_raw' falls within the bounds of the start and end pointer.
675         unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
676     }
677 
678     #[inline]
size_hint(&self) -> (usize, Option<usize>)679     fn size_hint(&self) -> (usize, Option<usize>) {
680         self.it.size_hint()
681     }
682 }
683 
684 impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> {
685     #[inline]
next_back(&mut self) -> Option<usize>686     fn next_back(&mut self) -> Option<usize> {
687         // SAFETY: We rely on the generic iterator to provide valid start
688         // and end pointers, but we guarantee that any pointer returned by
689         // 'rfind_raw' falls within the bounds of the start and end pointer.
690         unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
691     }
692 }
693 
694 impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {}
695 
696 /// Finds all occurrences of three bytes in a haystack.
697 ///
698 /// That is, this reports matches of one of three possible bytes. For example,
699 /// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets
700 /// `0`, `2`, `3`, `4` and `5`.
701 #[derive(Clone, Copy, Debug)]
702 pub struct Three(generic::Three<__m128i>);
703 
704 impl Three {
705     /// Create a new searcher that finds occurrences of the needle bytes given.
706     ///
707     /// This particular searcher is specialized to use SSE2 vector instructions
708     /// that typically make it quite fast.
709     ///
710     /// If SSE2 is unavailable in the current environment, then `None` is
711     /// returned.
712     #[inline]
new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three>713     pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three> {
714         if Three::is_available() {
715             // SAFETY: we check that sse2 is available above.
716             unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) }
717         } else {
718             None
719         }
720     }
721 
722     /// Create a new finder specific to SSE2 vectors and routines without
723     /// checking that SSE2 is available.
724     ///
725     /// # Safety
726     ///
727     /// Callers must guarantee that it is safe to execute `sse2` instructions
728     /// in the current environment.
729     ///
730     /// Note that it is a common misconception that if one compiles for an
731     /// `x86_64` target, then they therefore automatically have access to SSE2
732     /// instructions. While this is almost always the case, it isn't true in
733     /// 100% of cases.
734     #[target_feature(enable = "sse2")]
735     #[inline]
new_unchecked( needle1: u8, needle2: u8, needle3: u8, ) -> Three736     pub unsafe fn new_unchecked(
737         needle1: u8,
738         needle2: u8,
739         needle3: u8,
740     ) -> Three {
741         Three(generic::Three::new(needle1, needle2, needle3))
742     }
743 
744     /// Returns true when this implementation is available in the current
745     /// environment.
746     ///
747     /// When this is true, it is guaranteed that [`Three::new`] will return
748     /// a `Some` value. Similarly, when it is false, it is guaranteed that
749     /// `Three::new` will return a `None` value.
750     ///
751     /// Note also that for the lifetime of a single program, if this returns
752     /// true then it will always return true.
753     #[inline]
is_available() -> bool754     pub fn is_available() -> bool {
755         #[cfg(target_feature = "sse2")]
756         {
757             true
758         }
759         #[cfg(not(target_feature = "sse2"))]
760         {
761             false
762         }
763     }
764 
765     /// Return the first occurrence of one of the needle bytes in the given
766     /// haystack. If no such occurrence exists, then `None` is returned.
767     ///
768     /// The occurrence is reported as an offset into `haystack`. Its maximum
769     /// value is `haystack.len() - 1`.
770     #[inline]
find(&self, haystack: &[u8]) -> Option<usize>771     pub fn find(&self, haystack: &[u8]) -> Option<usize> {
772         // SAFETY: `find_raw` guarantees that if a pointer is returned, it
773         // falls within the bounds of the start and end pointers.
774         unsafe {
775             generic::search_slice_with_raw(haystack, |s, e| {
776                 self.find_raw(s, e)
777             })
778         }
779     }
780 
781     /// Return the last occurrence of one of the needle bytes in the given
782     /// haystack. If no such occurrence exists, then `None` is returned.
783     ///
784     /// The occurrence is reported as an offset into `haystack`. Its maximum
785     /// value is `haystack.len() - 1`.
786     #[inline]
rfind(&self, haystack: &[u8]) -> Option<usize>787     pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
788         // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it
789         // falls within the bounds of the start and end pointers.
790         unsafe {
791             generic::search_slice_with_raw(haystack, |s, e| {
792                 self.rfind_raw(s, e)
793             })
794         }
795     }
796 
797     /// Like `find`, but accepts and returns raw pointers.
798     ///
799     /// When a match is found, the pointer returned is guaranteed to be
800     /// `>= start` and `< end`.
801     ///
802     /// This routine is useful if you're already using raw pointers and would
803     /// like to avoid converting back to a slice before executing a search.
804     ///
805     /// # Safety
806     ///
807     /// * Both `start` and `end` must be valid for reads.
808     /// * Both `start` and `end` must point to an initialized value.
809     /// * Both `start` and `end` must point to the same allocated object and
810     /// must either be in bounds or at most one byte past the end of the
811     /// allocated object.
812     /// * Both `start` and `end` must be _derived from_ a pointer to the same
813     /// object.
814     /// * The distance between `start` and `end` must not overflow `isize`.
815     /// * The distance being in bounds must not rely on "wrapping around" the
816     /// address space.
817     ///
818     /// Note that callers may pass a pair of pointers such that `start >= end`.
819     /// In that case, `None` will always be returned.
820     #[inline]
find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>821     pub unsafe fn find_raw(
822         &self,
823         start: *const u8,
824         end: *const u8,
825     ) -> Option<*const u8> {
826         if start >= end {
827             return None;
828         }
829         if end.distance(start) < __m128i::BYTES {
830             // SAFETY: We require the caller to pass valid start/end pointers.
831             return generic::fwd_byte_by_byte(start, end, |b| {
832                 b == self.0.needle1()
833                     || b == self.0.needle2()
834                     || b == self.0.needle3()
835             });
836         }
837         // SAFETY: Building a `Three` means it's safe to call 'sse2' routines.
838         // Also, we've checked that our haystack is big enough to run on the
839         // vector routine. Pointer validity is caller's responsibility.
840         //
841         // Note that we could call `self.0.find_raw` directly here. But that
842         // means we'd have to annotate this routine with `target_feature`.
843         // Which is fine, because this routine is `unsafe` anyway and the
844         // `target_feature` obligation is met by virtue of building a `Three`.
845         // The real problem is that a routine with a `target_feature`
846         // annotation generally can't be inlined into caller code unless the
847         // caller code has the same target feature annotations. Which is maybe
848         // okay for SSE2, but we do the same thing for AVX2 where caller code
849         // probably usually doesn't have AVX2 enabled. That means that this
850         // routine can be inlined which will handle some of the short-haystack
851         // cases above without touching the architecture specific code.
852         self.find_raw_impl(start, end)
853     }
854 
855     /// Like `rfind`, but accepts and returns raw pointers.
856     ///
857     /// When a match is found, the pointer returned is guaranteed to be
858     /// `>= start` and `< end`.
859     ///
860     /// This routine is useful if you're already using raw pointers and would
861     /// like to avoid converting back to a slice before executing a search.
862     ///
863     /// # Safety
864     ///
865     /// * Both `start` and `end` must be valid for reads.
866     /// * Both `start` and `end` must point to an initialized value.
867     /// * Both `start` and `end` must point to the same allocated object and
868     /// must either be in bounds or at most one byte past the end of the
869     /// allocated object.
870     /// * Both `start` and `end` must be _derived from_ a pointer to the same
871     /// object.
872     /// * The distance between `start` and `end` must not overflow `isize`.
873     /// * The distance being in bounds must not rely on "wrapping around" the
874     /// address space.
875     ///
876     /// Note that callers may pass a pair of pointers such that `start >= end`.
877     /// In that case, `None` will always be returned.
878     #[inline]
rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>879     pub unsafe fn rfind_raw(
880         &self,
881         start: *const u8,
882         end: *const u8,
883     ) -> Option<*const u8> {
884         if start >= end {
885             return None;
886         }
887         if end.distance(start) < __m128i::BYTES {
888             // SAFETY: We require the caller to pass valid start/end pointers.
889             return generic::rev_byte_by_byte(start, end, |b| {
890                 b == self.0.needle1()
891                     || b == self.0.needle2()
892                     || b == self.0.needle3()
893             });
894         }
895         // SAFETY: Building a `Three` means it's safe to call 'sse2' routines.
896         // Also, we've checked that our haystack is big enough to run on the
897         // vector routine. Pointer validity is caller's responsibility.
898         //
899         // See note in forward routine above for why we don't just call
900         // `self.0.rfind_raw` directly here.
901         self.rfind_raw_impl(start, end)
902     }
903 
904     /// Execute a search using SSE2 vectors and routines.
905     ///
906     /// # Safety
907     ///
908     /// Same as [`Three::find_raw`], except the distance between `start` and
909     /// `end` must be at least the size of an SSE2 vector (in bytes).
910     ///
911     /// (The target feature safety obligation is automatically fulfilled by
912     /// virtue of being a method on `Three`, which can only be constructed
913     /// when it is safe to call `sse2` routines.)
914     #[target_feature(enable = "sse2")]
915     #[inline]
find_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>916     unsafe fn find_raw_impl(
917         &self,
918         start: *const u8,
919         end: *const u8,
920     ) -> Option<*const u8> {
921         self.0.find_raw(start, end)
922     }
923 
924     /// Execute a search using SSE2 vectors and routines.
925     ///
926     /// # Safety
927     ///
928     /// Same as [`Three::rfind_raw`], except the distance between `start` and
929     /// `end` must be at least the size of an SSE2 vector (in bytes).
930     ///
931     /// (The target feature safety obligation is automatically fulfilled by
932     /// virtue of being a method on `Three`, which can only be constructed
933     /// when it is safe to call `sse2` routines.)
934     #[target_feature(enable = "sse2")]
935     #[inline]
rfind_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>936     unsafe fn rfind_raw_impl(
937         &self,
938         start: *const u8,
939         end: *const u8,
940     ) -> Option<*const u8> {
941         self.0.rfind_raw(start, end)
942     }
943 
944     /// Returns an iterator over all occurrences of the needle byte in the
945     /// given haystack.
946     ///
947     /// The iterator returned implements `DoubleEndedIterator`. This means it
948     /// can also be used to find occurrences in reverse order.
949     #[inline]
iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h>950     pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> {
951         ThreeIter { searcher: self, it: generic::Iter::new(haystack) }
952     }
953 }
954 
955 /// An iterator over all occurrences of three possible bytes in a haystack.
956 ///
957 /// This iterator implements `DoubleEndedIterator`, which means it can also be
958 /// used to find occurrences in reverse order.
959 ///
960 /// This iterator is created by the [`Three::iter`] method.
961 ///
962 /// The lifetime parameters are as follows:
963 ///
964 /// * `'a` refers to the lifetime of the underlying [`Three`] searcher.
965 /// * `'h` refers to the lifetime of the haystack being searched.
966 #[derive(Clone, Debug)]
967 pub struct ThreeIter<'a, 'h> {
968     searcher: &'a Three,
969     it: generic::Iter<'h>,
970 }
971 
972 impl<'a, 'h> Iterator for ThreeIter<'a, 'h> {
973     type Item = usize;
974 
975     #[inline]
next(&mut self) -> Option<usize>976     fn next(&mut self) -> Option<usize> {
977         // SAFETY: We rely on the generic iterator to provide valid start
978         // and end pointers, but we guarantee that any pointer returned by
979         // 'find_raw' falls within the bounds of the start and end pointer.
980         unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
981     }
982 
983     #[inline]
size_hint(&self) -> (usize, Option<usize>)984     fn size_hint(&self) -> (usize, Option<usize>) {
985         self.it.size_hint()
986     }
987 }
988 
989 impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> {
990     #[inline]
next_back(&mut self) -> Option<usize>991     fn next_back(&mut self) -> Option<usize> {
992         // SAFETY: We rely on the generic iterator to provide valid start
993         // and end pointers, but we guarantee that any pointer returned by
994         // 'rfind_raw' falls within the bounds of the start and end pointer.
995         unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
996     }
997 }
998 
999 impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {}
1000 
1001 #[cfg(test)]
1002 mod tests {
1003     use super::*;
1004 
1005     define_memchr_quickcheck!(super);
1006 
1007     #[test]
forward_one()1008     fn forward_one() {
1009         crate::tests::memchr::Runner::new(1).forward_iter(
1010             |haystack, needles| {
1011                 Some(One::new(needles[0])?.iter(haystack).collect())
1012             },
1013         )
1014     }
1015 
1016     #[test]
reverse_one()1017     fn reverse_one() {
1018         crate::tests::memchr::Runner::new(1).reverse_iter(
1019             |haystack, needles| {
1020                 Some(One::new(needles[0])?.iter(haystack).rev().collect())
1021             },
1022         )
1023     }
1024 
1025     #[test]
count_one()1026     fn count_one() {
1027         crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| {
1028             Some(One::new(needles[0])?.iter(haystack).count())
1029         })
1030     }
1031 
1032     #[test]
forward_two()1033     fn forward_two() {
1034         crate::tests::memchr::Runner::new(2).forward_iter(
1035             |haystack, needles| {
1036                 let n1 = needles.get(0).copied()?;
1037                 let n2 = needles.get(1).copied()?;
1038                 Some(Two::new(n1, n2)?.iter(haystack).collect())
1039             },
1040         )
1041     }
1042 
1043     #[test]
reverse_two()1044     fn reverse_two() {
1045         crate::tests::memchr::Runner::new(2).reverse_iter(
1046             |haystack, needles| {
1047                 let n1 = needles.get(0).copied()?;
1048                 let n2 = needles.get(1).copied()?;
1049                 Some(Two::new(n1, n2)?.iter(haystack).rev().collect())
1050             },
1051         )
1052     }
1053 
1054     #[test]
forward_three()1055     fn forward_three() {
1056         crate::tests::memchr::Runner::new(3).forward_iter(
1057             |haystack, needles| {
1058                 let n1 = needles.get(0).copied()?;
1059                 let n2 = needles.get(1).copied()?;
1060                 let n3 = needles.get(2).copied()?;
1061                 Some(Three::new(n1, n2, n3)?.iter(haystack).collect())
1062             },
1063         )
1064     }
1065 
1066     #[test]
reverse_three()1067     fn reverse_three() {
1068         crate::tests::memchr::Runner::new(3).reverse_iter(
1069             |haystack, needles| {
1070                 let n1 = needles.get(0).copied()?;
1071                 let n2 = needles.get(1).copied()?;
1072                 let n3 = needles.get(2).copied()?;
1073                 Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect())
1074             },
1075         )
1076     }
1077 }
1078