1 /*!
2 Generic crate-internal routines for the `memchr` family of functions.
3 */
4 
5 // What follows is a vector algorithm generic over the specific vector
6 // type to detect the position of one, two or three needles in a haystack.
7 // From what I know, this is a "classic" algorithm, although I don't
8 // believe it has been published in any peer reviewed journal. I believe
9 // it can be found in places like glibc and Go's standard library. It
10 // appears to be well known and is elaborated on in more detail here:
11 // https://gms.tf/stdfind-and-memchr-optimizations.html
12 //
13 // While the routine below is fairly long and perhaps intimidating, the basic
14 // idea is actually very simple and can be expressed straight-forwardly in
15 // pseudo code. The psuedo code below is written for 128 bit vectors, but the
16 // actual code below works for anything that implements the Vector trait.
17 //
18 //     needle = (n1 << 15) | (n1 << 14) | ... | (n1 << 1) | n1
19 //     // Note: shift amount is in bytes
20 //
21 //     while i <= haystack.len() - 16:
22 //       // A 16 byte vector. Each byte in chunk corresponds to a byte in
23 //       // the haystack.
24 //       chunk = haystack[i:i+16]
25 //       // Compare bytes in needle with bytes in chunk. The result is a 16
26 //       // byte chunk where each byte is 0xFF if the corresponding bytes
27 //       // in needle and chunk were equal, or 0x00 otherwise.
28 //       eqs = cmpeq(needle, chunk)
29 //       // Return a 32 bit integer where the most significant 16 bits
30 //       // are always 0 and the lower 16 bits correspond to whether the
31 //       // most significant bit in the correspond byte in `eqs` is set.
32 //       // In other words, `mask as u16` has bit i set if and only if
33 //       // needle[i] == chunk[i].
34 //       mask = movemask(eqs)
35 //
36 //       // Mask is 0 if there is no match, and non-zero otherwise.
37 //       if mask != 0:
38 //         // trailing_zeros tells us the position of the least significant
39 //         // bit that is set.
40 //         return i + trailing_zeros(mask)
41 //
42 //     // haystack length may not be a multiple of 16, so search the rest.
43 //     while i < haystack.len():
44 //       if haystack[i] == n1:
45 //         return i
46 //
47 //     // No match found.
48 //     return NULL
49 //
50 // In fact, we could loosely translate the above code to Rust line-for-line
51 // and it would be a pretty fast algorithm. But, we pull out all the stops
52 // to go as fast as possible:
53 //
54 // 1. We use aligned loads. That is, we do some finagling to make sure our
55 //    primary loop not only proceeds in increments of 16 bytes, but that
56 //    the address of haystack's pointer that we dereference is aligned to
57 //    16 bytes. 16 is a magic number here because it is the size of SSE2
58 //    128-bit vector. (For the AVX2 algorithm, 32 is the magic number.)
59 //    Therefore, to get aligned loads, our pointer's address must be evenly
60 //    divisible by 16.
61 // 2. Our primary loop proceeds 64 bytes at a time instead of 16. It's
62 //    kind of like loop unrolling, but we combine the equality comparisons
63 //    using a vector OR such that we only need to extract a single mask to
64 //    determine whether a match exists or not. If so, then we do some
65 //    book-keeping to determine the precise location but otherwise mush on.
66 // 3. We use our "chunk" comparison routine in as many places as possible,
67 //    even if it means using unaligned loads. In particular, if haystack
68 //    starts with an unaligned address, then we do an unaligned load to
69 //    search the first 16 bytes. We then start our primary loop at the
70 //    smallest subsequent aligned address, which will actually overlap with
71 //    previously searched bytes. But we're OK with that. We do a similar
72 //    dance at the end of our primary loop. Finally, to avoid a
73 //    byte-at-a-time loop at the end, we do a final 16 byte unaligned load
74 //    that may overlap with a previous load. This is OK because it converts
75 //    a loop into a small number of very fast vector instructions. The overlap
76 //    is OK because we know the place where the overlap occurs does not
77 //    contain a match.
78 //
79 // And that's pretty all there is to it. Note that since the below is
80 // generic and since it's meant to be inlined into routines with a
81 // `#[target_feature(enable = "...")]` annotation, we must mark all routines as
82 // both unsafe and `#[inline(always)]`.
83 //
84 // The fact that the code below is generic does somewhat inhibit us. For
85 // example, I've noticed that introducing an unlineable `#[cold]` function to
86 // handle the match case in the loop generates tighter assembly, but there is
87 // no way to do this in the generic code below because the generic code doesn't
88 // know what `target_feature` annotation to apply to the unlineable function.
89 // We could make such functions part of the `Vector` trait, but we instead live
90 // with the slightly sub-optimal codegen for now since it doesn't seem to have
91 // a noticeable perf difference.
92 
93 use crate::{
94     ext::Pointer,
95     vector::{MoveMask, Vector},
96 };
97 
98 /// Finds all occurrences of a single byte in a haystack.
99 #[derive(Clone, Copy, Debug)]
100 pub(crate) struct One<V> {
101     s1: u8,
102     v1: V,
103 }
104 
105 impl<V: Vector> One<V> {
106     /// The number of bytes we examine per each iteration of our search loop.
107     const LOOP_SIZE: usize = 4 * V::BYTES;
108 
109     /// Create a new searcher that finds occurrences of the byte given.
110     #[inline(always)]
new(needle: u8) -> One<V>111     pub(crate) unsafe fn new(needle: u8) -> One<V> {
112         One { s1: needle, v1: V::splat(needle) }
113     }
114 
115     /// Returns the needle given to `One::new`.
116     #[inline(always)]
needle1(&self) -> u8117     pub(crate) fn needle1(&self) -> u8 {
118         self.s1
119     }
120 
121     /// Return a pointer to the first occurrence of the needle in the given
122     /// haystack. If no such occurrence exists, then `None` is returned.
123     ///
124     /// When a match is found, the pointer returned is guaranteed to be
125     /// `>= start` and `< end`.
126     ///
127     /// # Safety
128     ///
129     /// * It must be the case that `start < end` and that the distance between
130     /// them is at least equal to `V::BYTES`. That is, it must always be valid
131     /// to do at least an unaligned load of `V` at `start`.
132     /// * Both `start` and `end` must be valid for reads.
133     /// * Both `start` and `end` must point to an initialized value.
134     /// * Both `start` and `end` must point to the same allocated object and
135     /// must either be in bounds or at most one byte past the end of the
136     /// allocated object.
137     /// * Both `start` and `end` must be _derived from_ a pointer to the same
138     /// object.
139     /// * The distance between `start` and `end` must not overflow `isize`.
140     /// * The distance being in bounds must not rely on "wrapping around" the
141     /// address space.
142     #[inline(always)]
find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>143     pub(crate) unsafe fn find_raw(
144         &self,
145         start: *const u8,
146         end: *const u8,
147     ) -> Option<*const u8> {
148         // If we want to support vectors bigger than 256 bits, we probably
149         // need to move up to using a u64 for the masks used below. Currently
150         // they are 32 bits, which means we're SOL for vectors that need masks
151         // bigger than 32 bits. Overall unclear until there's a use case.
152         debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
153 
154         let topos = V::Mask::first_offset;
155         let len = end.distance(start);
156         debug_assert!(
157             len >= V::BYTES,
158             "haystack has length {}, but must be at least {}",
159             len,
160             V::BYTES
161         );
162 
163         // Search a possibly unaligned chunk at `start`. This covers any part
164         // of the haystack prior to where aligned loads can start.
165         if let Some(cur) = self.search_chunk(start, topos) {
166             return Some(cur);
167         }
168         // Set `cur` to the first V-aligned pointer greater than `start`.
169         let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN));
170         debug_assert!(cur > start && end.sub(V::BYTES) >= start);
171         if len >= Self::LOOP_SIZE {
172             while cur <= end.sub(Self::LOOP_SIZE) {
173                 debug_assert_eq!(0, cur.as_usize() % V::BYTES);
174 
175                 let a = V::load_aligned(cur);
176                 let b = V::load_aligned(cur.add(1 * V::BYTES));
177                 let c = V::load_aligned(cur.add(2 * V::BYTES));
178                 let d = V::load_aligned(cur.add(3 * V::BYTES));
179                 let eqa = self.v1.cmpeq(a);
180                 let eqb = self.v1.cmpeq(b);
181                 let eqc = self.v1.cmpeq(c);
182                 let eqd = self.v1.cmpeq(d);
183                 let or1 = eqa.or(eqb);
184                 let or2 = eqc.or(eqd);
185                 let or3 = or1.or(or2);
186                 if or3.movemask_will_have_non_zero() {
187                     let mask = eqa.movemask();
188                     if mask.has_non_zero() {
189                         return Some(cur.add(topos(mask)));
190                     }
191 
192                     let mask = eqb.movemask();
193                     if mask.has_non_zero() {
194                         return Some(cur.add(1 * V::BYTES).add(topos(mask)));
195                     }
196 
197                     let mask = eqc.movemask();
198                     if mask.has_non_zero() {
199                         return Some(cur.add(2 * V::BYTES).add(topos(mask)));
200                     }
201 
202                     let mask = eqd.movemask();
203                     debug_assert!(mask.has_non_zero());
204                     return Some(cur.add(3 * V::BYTES).add(topos(mask)));
205                 }
206                 cur = cur.add(Self::LOOP_SIZE);
207             }
208         }
209         // Handle any leftovers after the aligned loop above. We use unaligned
210         // loads here, but I believe we are guaranteed that they are aligned
211         // since `cur` is aligned.
212         while cur <= end.sub(V::BYTES) {
213             debug_assert!(end.distance(cur) >= V::BYTES);
214             if let Some(cur) = self.search_chunk(cur, topos) {
215                 return Some(cur);
216             }
217             cur = cur.add(V::BYTES);
218         }
219         // Finally handle any remaining bytes less than the size of V. In this
220         // case, our pointer may indeed be unaligned and the load may overlap
221         // with the previous one. But that's okay since we know the previous
222         // load didn't lead to a match (otherwise we wouldn't be here).
223         if cur < end {
224             debug_assert!(end.distance(cur) < V::BYTES);
225             cur = cur.sub(V::BYTES - end.distance(cur));
226             debug_assert_eq!(end.distance(cur), V::BYTES);
227             return self.search_chunk(cur, topos);
228         }
229         None
230     }
231 
232     /// Return a pointer to the last occurrence of the needle in the given
233     /// haystack. If no such occurrence exists, then `None` is returned.
234     ///
235     /// When a match is found, the pointer returned is guaranteed to be
236     /// `>= start` and `< end`.
237     ///
238     /// # Safety
239     ///
240     /// * It must be the case that `start < end` and that the distance between
241     /// them is at least equal to `V::BYTES`. That is, it must always be valid
242     /// to do at least an unaligned load of `V` at `start`.
243     /// * Both `start` and `end` must be valid for reads.
244     /// * Both `start` and `end` must point to an initialized value.
245     /// * Both `start` and `end` must point to the same allocated object and
246     /// must either be in bounds or at most one byte past the end of the
247     /// allocated object.
248     /// * Both `start` and `end` must be _derived from_ a pointer to the same
249     /// object.
250     /// * The distance between `start` and `end` must not overflow `isize`.
251     /// * The distance being in bounds must not rely on "wrapping around" the
252     /// address space.
253     #[inline(always)]
rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>254     pub(crate) unsafe fn rfind_raw(
255         &self,
256         start: *const u8,
257         end: *const u8,
258     ) -> Option<*const u8> {
259         // If we want to support vectors bigger than 256 bits, we probably
260         // need to move up to using a u64 for the masks used below. Currently
261         // they are 32 bits, which means we're SOL for vectors that need masks
262         // bigger than 32 bits. Overall unclear until there's a use case.
263         debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
264 
265         let topos = V::Mask::last_offset;
266         let len = end.distance(start);
267         debug_assert!(
268             len >= V::BYTES,
269             "haystack has length {}, but must be at least {}",
270             len,
271             V::BYTES
272         );
273 
274         if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) {
275             return Some(cur);
276         }
277         let mut cur = end.sub(end.as_usize() & V::ALIGN);
278         debug_assert!(start <= cur && cur <= end);
279         if len >= Self::LOOP_SIZE {
280             while cur >= start.add(Self::LOOP_SIZE) {
281                 debug_assert_eq!(0, cur.as_usize() % V::BYTES);
282 
283                 cur = cur.sub(Self::LOOP_SIZE);
284                 let a = V::load_aligned(cur);
285                 let b = V::load_aligned(cur.add(1 * V::BYTES));
286                 let c = V::load_aligned(cur.add(2 * V::BYTES));
287                 let d = V::load_aligned(cur.add(3 * V::BYTES));
288                 let eqa = self.v1.cmpeq(a);
289                 let eqb = self.v1.cmpeq(b);
290                 let eqc = self.v1.cmpeq(c);
291                 let eqd = self.v1.cmpeq(d);
292                 let or1 = eqa.or(eqb);
293                 let or2 = eqc.or(eqd);
294                 let or3 = or1.or(or2);
295                 if or3.movemask_will_have_non_zero() {
296                     let mask = eqd.movemask();
297                     if mask.has_non_zero() {
298                         return Some(cur.add(3 * V::BYTES).add(topos(mask)));
299                     }
300 
301                     let mask = eqc.movemask();
302                     if mask.has_non_zero() {
303                         return Some(cur.add(2 * V::BYTES).add(topos(mask)));
304                     }
305 
306                     let mask = eqb.movemask();
307                     if mask.has_non_zero() {
308                         return Some(cur.add(1 * V::BYTES).add(topos(mask)));
309                     }
310 
311                     let mask = eqa.movemask();
312                     debug_assert!(mask.has_non_zero());
313                     return Some(cur.add(topos(mask)));
314                 }
315             }
316         }
317         while cur >= start.add(V::BYTES) {
318             debug_assert!(cur.distance(start) >= V::BYTES);
319             cur = cur.sub(V::BYTES);
320             if let Some(cur) = self.search_chunk(cur, topos) {
321                 return Some(cur);
322             }
323         }
324         if cur > start {
325             debug_assert!(cur.distance(start) < V::BYTES);
326             return self.search_chunk(start, topos);
327         }
328         None
329     }
330 
331     /// Return a count of all matching bytes in the given haystack.
332     ///
333     /// # Safety
334     ///
335     /// * It must be the case that `start < end` and that the distance between
336     /// them is at least equal to `V::BYTES`. That is, it must always be valid
337     /// to do at least an unaligned load of `V` at `start`.
338     /// * Both `start` and `end` must be valid for reads.
339     /// * Both `start` and `end` must point to an initialized value.
340     /// * Both `start` and `end` must point to the same allocated object and
341     /// must either be in bounds or at most one byte past the end of the
342     /// allocated object.
343     /// * Both `start` and `end` must be _derived from_ a pointer to the same
344     /// object.
345     /// * The distance between `start` and `end` must not overflow `isize`.
346     /// * The distance being in bounds must not rely on "wrapping around" the
347     /// address space.
348     #[inline(always)]
count_raw( &self, start: *const u8, end: *const u8, ) -> usize349     pub(crate) unsafe fn count_raw(
350         &self,
351         start: *const u8,
352         end: *const u8,
353     ) -> usize {
354         debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
355 
356         let confirm = |b| b == self.needle1();
357         let len = end.distance(start);
358         debug_assert!(
359             len >= V::BYTES,
360             "haystack has length {}, but must be at least {}",
361             len,
362             V::BYTES
363         );
364 
365         // Set `cur` to the first V-aligned pointer greater than `start`.
366         let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN));
367         // Count any matching bytes before we start our aligned loop.
368         let mut count = count_byte_by_byte(start, cur, confirm);
369         debug_assert!(cur > start && end.sub(V::BYTES) >= start);
370         if len >= Self::LOOP_SIZE {
371             while cur <= end.sub(Self::LOOP_SIZE) {
372                 debug_assert_eq!(0, cur.as_usize() % V::BYTES);
373 
374                 let a = V::load_aligned(cur);
375                 let b = V::load_aligned(cur.add(1 * V::BYTES));
376                 let c = V::load_aligned(cur.add(2 * V::BYTES));
377                 let d = V::load_aligned(cur.add(3 * V::BYTES));
378                 let eqa = self.v1.cmpeq(a);
379                 let eqb = self.v1.cmpeq(b);
380                 let eqc = self.v1.cmpeq(c);
381                 let eqd = self.v1.cmpeq(d);
382                 count += eqa.movemask().count_ones();
383                 count += eqb.movemask().count_ones();
384                 count += eqc.movemask().count_ones();
385                 count += eqd.movemask().count_ones();
386                 cur = cur.add(Self::LOOP_SIZE);
387             }
388         }
389         // Handle any leftovers after the aligned loop above. We use unaligned
390         // loads here, but I believe we are guaranteed that they are aligned
391         // since `cur` is aligned.
392         while cur <= end.sub(V::BYTES) {
393             debug_assert!(end.distance(cur) >= V::BYTES);
394             let chunk = V::load_unaligned(cur);
395             count += self.v1.cmpeq(chunk).movemask().count_ones();
396             cur = cur.add(V::BYTES);
397         }
398         // And finally count any leftovers that weren't caught above.
399         count += count_byte_by_byte(cur, end, confirm);
400         count
401     }
402 
403     /// Search `V::BYTES` starting at `cur` via an unaligned load.
404     ///
405     /// `mask_to_offset` should be a function that converts a `movemask` to
406     /// an offset such that `cur.add(offset)` corresponds to a pointer to the
407     /// match location if one is found. Generally it is expected to use either
408     /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether
409     /// one is implementing a forward or reverse search, respectively.
410     ///
411     /// # Safety
412     ///
413     /// `cur` must be a valid pointer and it must be valid to do an unaligned
414     /// load of size `V::BYTES` at `cur`.
415     #[inline(always)]
search_chunk( &self, cur: *const u8, mask_to_offset: impl Fn(V::Mask) -> usize, ) -> Option<*const u8>416     unsafe fn search_chunk(
417         &self,
418         cur: *const u8,
419         mask_to_offset: impl Fn(V::Mask) -> usize,
420     ) -> Option<*const u8> {
421         let chunk = V::load_unaligned(cur);
422         let mask = self.v1.cmpeq(chunk).movemask();
423         if mask.has_non_zero() {
424             Some(cur.add(mask_to_offset(mask)))
425         } else {
426             None
427         }
428     }
429 }
430 
431 /// Finds all occurrences of two bytes in a haystack.
432 ///
433 /// That is, this reports matches of one of two possible bytes. For example,
434 /// searching for `a` or `b` in `afoobar` would report matches at offsets `0`,
435 /// `4` and `5`.
436 #[derive(Clone, Copy, Debug)]
437 pub(crate) struct Two<V> {
438     s1: u8,
439     s2: u8,
440     v1: V,
441     v2: V,
442 }
443 
444 impl<V: Vector> Two<V> {
445     /// The number of bytes we examine per each iteration of our search loop.
446     const LOOP_SIZE: usize = 2 * V::BYTES;
447 
448     /// Create a new searcher that finds occurrences of the byte given.
449     #[inline(always)]
new(needle1: u8, needle2: u8) -> Two<V>450     pub(crate) unsafe fn new(needle1: u8, needle2: u8) -> Two<V> {
451         Two {
452             s1: needle1,
453             s2: needle2,
454             v1: V::splat(needle1),
455             v2: V::splat(needle2),
456         }
457     }
458 
459     /// Returns the first needle given to `Two::new`.
460     #[inline(always)]
needle1(&self) -> u8461     pub(crate) fn needle1(&self) -> u8 {
462         self.s1
463     }
464 
465     /// Returns the second needle given to `Two::new`.
466     #[inline(always)]
needle2(&self) -> u8467     pub(crate) fn needle2(&self) -> u8 {
468         self.s2
469     }
470 
471     /// Return a pointer to the first occurrence of one of the needles in the
472     /// given haystack. If no such occurrence exists, then `None` is returned.
473     ///
474     /// When a match is found, the pointer returned is guaranteed to be
475     /// `>= start` and `< end`.
476     ///
477     /// # Safety
478     ///
479     /// * It must be the case that `start < end` and that the distance between
480     /// them is at least equal to `V::BYTES`. That is, it must always be valid
481     /// to do at least an unaligned load of `V` at `start`.
482     /// * Both `start` and `end` must be valid for reads.
483     /// * Both `start` and `end` must point to an initialized value.
484     /// * Both `start` and `end` must point to the same allocated object and
485     /// must either be in bounds or at most one byte past the end of the
486     /// allocated object.
487     /// * Both `start` and `end` must be _derived from_ a pointer to the same
488     /// object.
489     /// * The distance between `start` and `end` must not overflow `isize`.
490     /// * The distance being in bounds must not rely on "wrapping around" the
491     /// address space.
492     #[inline(always)]
find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>493     pub(crate) unsafe fn find_raw(
494         &self,
495         start: *const u8,
496         end: *const u8,
497     ) -> Option<*const u8> {
498         // If we want to support vectors bigger than 256 bits, we probably
499         // need to move up to using a u64 for the masks used below. Currently
500         // they are 32 bits, which means we're SOL for vectors that need masks
501         // bigger than 32 bits. Overall unclear until there's a use case.
502         debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
503 
504         let topos = V::Mask::first_offset;
505         let len = end.distance(start);
506         debug_assert!(
507             len >= V::BYTES,
508             "haystack has length {}, but must be at least {}",
509             len,
510             V::BYTES
511         );
512 
513         // Search a possibly unaligned chunk at `start`. This covers any part
514         // of the haystack prior to where aligned loads can start.
515         if let Some(cur) = self.search_chunk(start, topos) {
516             return Some(cur);
517         }
518         // Set `cur` to the first V-aligned pointer greater than `start`.
519         let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN));
520         debug_assert!(cur > start && end.sub(V::BYTES) >= start);
521         if len >= Self::LOOP_SIZE {
522             while cur <= end.sub(Self::LOOP_SIZE) {
523                 debug_assert_eq!(0, cur.as_usize() % V::BYTES);
524 
525                 let a = V::load_aligned(cur);
526                 let b = V::load_aligned(cur.add(V::BYTES));
527                 let eqa1 = self.v1.cmpeq(a);
528                 let eqb1 = self.v1.cmpeq(b);
529                 let eqa2 = self.v2.cmpeq(a);
530                 let eqb2 = self.v2.cmpeq(b);
531                 let or1 = eqa1.or(eqb1);
532                 let or2 = eqa2.or(eqb2);
533                 let or3 = or1.or(or2);
534                 if or3.movemask_will_have_non_zero() {
535                     let mask = eqa1.movemask().or(eqa2.movemask());
536                     if mask.has_non_zero() {
537                         return Some(cur.add(topos(mask)));
538                     }
539 
540                     let mask = eqb1.movemask().or(eqb2.movemask());
541                     debug_assert!(mask.has_non_zero());
542                     return Some(cur.add(V::BYTES).add(topos(mask)));
543                 }
544                 cur = cur.add(Self::LOOP_SIZE);
545             }
546         }
547         // Handle any leftovers after the aligned loop above. We use unaligned
548         // loads here, but I believe we are guaranteed that they are aligned
549         // since `cur` is aligned.
550         while cur <= end.sub(V::BYTES) {
551             debug_assert!(end.distance(cur) >= V::BYTES);
552             if let Some(cur) = self.search_chunk(cur, topos) {
553                 return Some(cur);
554             }
555             cur = cur.add(V::BYTES);
556         }
557         // Finally handle any remaining bytes less than the size of V. In this
558         // case, our pointer may indeed be unaligned and the load may overlap
559         // with the previous one. But that's okay since we know the previous
560         // load didn't lead to a match (otherwise we wouldn't be here).
561         if cur < end {
562             debug_assert!(end.distance(cur) < V::BYTES);
563             cur = cur.sub(V::BYTES - end.distance(cur));
564             debug_assert_eq!(end.distance(cur), V::BYTES);
565             return self.search_chunk(cur, topos);
566         }
567         None
568     }
569 
570     /// Return a pointer to the last occurrence of the needle in the given
571     /// haystack. If no such occurrence exists, then `None` is returned.
572     ///
573     /// When a match is found, the pointer returned is guaranteed to be
574     /// `>= start` and `< end`.
575     ///
576     /// # Safety
577     ///
578     /// * It must be the case that `start < end` and that the distance between
579     /// them is at least equal to `V::BYTES`. That is, it must always be valid
580     /// to do at least an unaligned load of `V` at `start`.
581     /// * Both `start` and `end` must be valid for reads.
582     /// * Both `start` and `end` must point to an initialized value.
583     /// * Both `start` and `end` must point to the same allocated object and
584     /// must either be in bounds or at most one byte past the end of the
585     /// allocated object.
586     /// * Both `start` and `end` must be _derived from_ a pointer to the same
587     /// object.
588     /// * The distance between `start` and `end` must not overflow `isize`.
589     /// * The distance being in bounds must not rely on "wrapping around" the
590     /// address space.
591     #[inline(always)]
rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>592     pub(crate) unsafe fn rfind_raw(
593         &self,
594         start: *const u8,
595         end: *const u8,
596     ) -> Option<*const u8> {
597         // If we want to support vectors bigger than 256 bits, we probably
598         // need to move up to using a u64 for the masks used below. Currently
599         // they are 32 bits, which means we're SOL for vectors that need masks
600         // bigger than 32 bits. Overall unclear until there's a use case.
601         debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
602 
603         let topos = V::Mask::last_offset;
604         let len = end.distance(start);
605         debug_assert!(
606             len >= V::BYTES,
607             "haystack has length {}, but must be at least {}",
608             len,
609             V::BYTES
610         );
611 
612         if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) {
613             return Some(cur);
614         }
615         let mut cur = end.sub(end.as_usize() & V::ALIGN);
616         debug_assert!(start <= cur && cur <= end);
617         if len >= Self::LOOP_SIZE {
618             while cur >= start.add(Self::LOOP_SIZE) {
619                 debug_assert_eq!(0, cur.as_usize() % V::BYTES);
620 
621                 cur = cur.sub(Self::LOOP_SIZE);
622                 let a = V::load_aligned(cur);
623                 let b = V::load_aligned(cur.add(V::BYTES));
624                 let eqa1 = self.v1.cmpeq(a);
625                 let eqb1 = self.v1.cmpeq(b);
626                 let eqa2 = self.v2.cmpeq(a);
627                 let eqb2 = self.v2.cmpeq(b);
628                 let or1 = eqa1.or(eqb1);
629                 let or2 = eqa2.or(eqb2);
630                 let or3 = or1.or(or2);
631                 if or3.movemask_will_have_non_zero() {
632                     let mask = eqb1.movemask().or(eqb2.movemask());
633                     if mask.has_non_zero() {
634                         return Some(cur.add(V::BYTES).add(topos(mask)));
635                     }
636 
637                     let mask = eqa1.movemask().or(eqa2.movemask());
638                     debug_assert!(mask.has_non_zero());
639                     return Some(cur.add(topos(mask)));
640                 }
641             }
642         }
643         while cur >= start.add(V::BYTES) {
644             debug_assert!(cur.distance(start) >= V::BYTES);
645             cur = cur.sub(V::BYTES);
646             if let Some(cur) = self.search_chunk(cur, topos) {
647                 return Some(cur);
648             }
649         }
650         if cur > start {
651             debug_assert!(cur.distance(start) < V::BYTES);
652             return self.search_chunk(start, topos);
653         }
654         None
655     }
656 
657     /// Search `V::BYTES` starting at `cur` via an unaligned load.
658     ///
659     /// `mask_to_offset` should be a function that converts a `movemask` to
660     /// an offset such that `cur.add(offset)` corresponds to a pointer to the
661     /// match location if one is found. Generally it is expected to use either
662     /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether
663     /// one is implementing a forward or reverse search, respectively.
664     ///
665     /// # Safety
666     ///
667     /// `cur` must be a valid pointer and it must be valid to do an unaligned
668     /// load of size `V::BYTES` at `cur`.
669     #[inline(always)]
search_chunk( &self, cur: *const u8, mask_to_offset: impl Fn(V::Mask) -> usize, ) -> Option<*const u8>670     unsafe fn search_chunk(
671         &self,
672         cur: *const u8,
673         mask_to_offset: impl Fn(V::Mask) -> usize,
674     ) -> Option<*const u8> {
675         let chunk = V::load_unaligned(cur);
676         let eq1 = self.v1.cmpeq(chunk);
677         let eq2 = self.v2.cmpeq(chunk);
678         let mask = eq1.or(eq2).movemask();
679         if mask.has_non_zero() {
680             let mask1 = eq1.movemask();
681             let mask2 = eq2.movemask();
682             Some(cur.add(mask_to_offset(mask1.or(mask2))))
683         } else {
684             None
685         }
686     }
687 }
688 
689 /// Finds all occurrences of two bytes in a haystack.
690 ///
691 /// That is, this reports matches of one of two possible bytes. For example,
692 /// searching for `a` or `b` in `afoobar` would report matches at offsets `0`,
693 /// `4` and `5`.
694 #[derive(Clone, Copy, Debug)]
695 pub(crate) struct Three<V> {
696     s1: u8,
697     s2: u8,
698     s3: u8,
699     v1: V,
700     v2: V,
701     v3: V,
702 }
703 
704 impl<V: Vector> Three<V> {
705     /// The number of bytes we examine per each iteration of our search loop.
706     const LOOP_SIZE: usize = 2 * V::BYTES;
707 
708     /// Create a new searcher that finds occurrences of the byte given.
709     #[inline(always)]
new( needle1: u8, needle2: u8, needle3: u8, ) -> Three<V>710     pub(crate) unsafe fn new(
711         needle1: u8,
712         needle2: u8,
713         needle3: u8,
714     ) -> Three<V> {
715         Three {
716             s1: needle1,
717             s2: needle2,
718             s3: needle3,
719             v1: V::splat(needle1),
720             v2: V::splat(needle2),
721             v3: V::splat(needle3),
722         }
723     }
724 
725     /// Returns the first needle given to `Three::new`.
726     #[inline(always)]
needle1(&self) -> u8727     pub(crate) fn needle1(&self) -> u8 {
728         self.s1
729     }
730 
731     /// Returns the second needle given to `Three::new`.
732     #[inline(always)]
needle2(&self) -> u8733     pub(crate) fn needle2(&self) -> u8 {
734         self.s2
735     }
736 
737     /// Returns the third needle given to `Three::new`.
738     #[inline(always)]
needle3(&self) -> u8739     pub(crate) fn needle3(&self) -> u8 {
740         self.s3
741     }
742 
743     /// Return a pointer to the first occurrence of one of the needles in the
744     /// given haystack. If no such occurrence exists, then `None` is returned.
745     ///
746     /// When a match is found, the pointer returned is guaranteed to be
747     /// `>= start` and `< end`.
748     ///
749     /// # Safety
750     ///
751     /// * It must be the case that `start < end` and that the distance between
752     /// them is at least equal to `V::BYTES`. That is, it must always be valid
753     /// to do at least an unaligned load of `V` at `start`.
754     /// * Both `start` and `end` must be valid for reads.
755     /// * Both `start` and `end` must point to an initialized value.
756     /// * Both `start` and `end` must point to the same allocated object and
757     /// must either be in bounds or at most one byte past the end of the
758     /// allocated object.
759     /// * Both `start` and `end` must be _derived from_ a pointer to the same
760     /// object.
761     /// * The distance between `start` and `end` must not overflow `isize`.
762     /// * The distance being in bounds must not rely on "wrapping around" the
763     /// address space.
764     #[inline(always)]
find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>765     pub(crate) unsafe fn find_raw(
766         &self,
767         start: *const u8,
768         end: *const u8,
769     ) -> Option<*const u8> {
770         // If we want to support vectors bigger than 256 bits, we probably
771         // need to move up to using a u64 for the masks used below. Currently
772         // they are 32 bits, which means we're SOL for vectors that need masks
773         // bigger than 32 bits. Overall unclear until there's a use case.
774         debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
775 
776         let topos = V::Mask::first_offset;
777         let len = end.distance(start);
778         debug_assert!(
779             len >= V::BYTES,
780             "haystack has length {}, but must be at least {}",
781             len,
782             V::BYTES
783         );
784 
785         // Search a possibly unaligned chunk at `start`. This covers any part
786         // of the haystack prior to where aligned loads can start.
787         if let Some(cur) = self.search_chunk(start, topos) {
788             return Some(cur);
789         }
790         // Set `cur` to the first V-aligned pointer greater than `start`.
791         let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN));
792         debug_assert!(cur > start && end.sub(V::BYTES) >= start);
793         if len >= Self::LOOP_SIZE {
794             while cur <= end.sub(Self::LOOP_SIZE) {
795                 debug_assert_eq!(0, cur.as_usize() % V::BYTES);
796 
797                 let a = V::load_aligned(cur);
798                 let b = V::load_aligned(cur.add(V::BYTES));
799                 let eqa1 = self.v1.cmpeq(a);
800                 let eqb1 = self.v1.cmpeq(b);
801                 let eqa2 = self.v2.cmpeq(a);
802                 let eqb2 = self.v2.cmpeq(b);
803                 let eqa3 = self.v3.cmpeq(a);
804                 let eqb3 = self.v3.cmpeq(b);
805                 let or1 = eqa1.or(eqb1);
806                 let or2 = eqa2.or(eqb2);
807                 let or3 = eqa3.or(eqb3);
808                 let or4 = or1.or(or2);
809                 let or5 = or3.or(or4);
810                 if or5.movemask_will_have_non_zero() {
811                     let mask = eqa1
812                         .movemask()
813                         .or(eqa2.movemask())
814                         .or(eqa3.movemask());
815                     if mask.has_non_zero() {
816                         return Some(cur.add(topos(mask)));
817                     }
818 
819                     let mask = eqb1
820                         .movemask()
821                         .or(eqb2.movemask())
822                         .or(eqb3.movemask());
823                     debug_assert!(mask.has_non_zero());
824                     return Some(cur.add(V::BYTES).add(topos(mask)));
825                 }
826                 cur = cur.add(Self::LOOP_SIZE);
827             }
828         }
829         // Handle any leftovers after the aligned loop above. We use unaligned
830         // loads here, but I believe we are guaranteed that they are aligned
831         // since `cur` is aligned.
832         while cur <= end.sub(V::BYTES) {
833             debug_assert!(end.distance(cur) >= V::BYTES);
834             if let Some(cur) = self.search_chunk(cur, topos) {
835                 return Some(cur);
836             }
837             cur = cur.add(V::BYTES);
838         }
839         // Finally handle any remaining bytes less than the size of V. In this
840         // case, our pointer may indeed be unaligned and the load may overlap
841         // with the previous one. But that's okay since we know the previous
842         // load didn't lead to a match (otherwise we wouldn't be here).
843         if cur < end {
844             debug_assert!(end.distance(cur) < V::BYTES);
845             cur = cur.sub(V::BYTES - end.distance(cur));
846             debug_assert_eq!(end.distance(cur), V::BYTES);
847             return self.search_chunk(cur, topos);
848         }
849         None
850     }
851 
852     /// Return a pointer to the last occurrence of the needle in the given
853     /// haystack. If no such occurrence exists, then `None` is returned.
854     ///
855     /// When a match is found, the pointer returned is guaranteed to be
856     /// `>= start` and `< end`.
857     ///
858     /// # Safety
859     ///
860     /// * It must be the case that `start < end` and that the distance between
861     /// them is at least equal to `V::BYTES`. That is, it must always be valid
862     /// to do at least an unaligned load of `V` at `start`.
863     /// * Both `start` and `end` must be valid for reads.
864     /// * Both `start` and `end` must point to an initialized value.
865     /// * Both `start` and `end` must point to the same allocated object and
866     /// must either be in bounds or at most one byte past the end of the
867     /// allocated object.
868     /// * Both `start` and `end` must be _derived from_ a pointer to the same
869     /// object.
870     /// * The distance between `start` and `end` must not overflow `isize`.
871     /// * The distance being in bounds must not rely on "wrapping around" the
872     /// address space.
873     #[inline(always)]
rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>874     pub(crate) unsafe fn rfind_raw(
875         &self,
876         start: *const u8,
877         end: *const u8,
878     ) -> Option<*const u8> {
879         // If we want to support vectors bigger than 256 bits, we probably
880         // need to move up to using a u64 for the masks used below. Currently
881         // they are 32 bits, which means we're SOL for vectors that need masks
882         // bigger than 32 bits. Overall unclear until there's a use case.
883         debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
884 
885         let topos = V::Mask::last_offset;
886         let len = end.distance(start);
887         debug_assert!(
888             len >= V::BYTES,
889             "haystack has length {}, but must be at least {}",
890             len,
891             V::BYTES
892         );
893 
894         if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) {
895             return Some(cur);
896         }
897         let mut cur = end.sub(end.as_usize() & V::ALIGN);
898         debug_assert!(start <= cur && cur <= end);
899         if len >= Self::LOOP_SIZE {
900             while cur >= start.add(Self::LOOP_SIZE) {
901                 debug_assert_eq!(0, cur.as_usize() % V::BYTES);
902 
903                 cur = cur.sub(Self::LOOP_SIZE);
904                 let a = V::load_aligned(cur);
905                 let b = V::load_aligned(cur.add(V::BYTES));
906                 let eqa1 = self.v1.cmpeq(a);
907                 let eqb1 = self.v1.cmpeq(b);
908                 let eqa2 = self.v2.cmpeq(a);
909                 let eqb2 = self.v2.cmpeq(b);
910                 let eqa3 = self.v3.cmpeq(a);
911                 let eqb3 = self.v3.cmpeq(b);
912                 let or1 = eqa1.or(eqb1);
913                 let or2 = eqa2.or(eqb2);
914                 let or3 = eqa3.or(eqb3);
915                 let or4 = or1.or(or2);
916                 let or5 = or3.or(or4);
917                 if or5.movemask_will_have_non_zero() {
918                     let mask = eqb1
919                         .movemask()
920                         .or(eqb2.movemask())
921                         .or(eqb3.movemask());
922                     if mask.has_non_zero() {
923                         return Some(cur.add(V::BYTES).add(topos(mask)));
924                     }
925 
926                     let mask = eqa1
927                         .movemask()
928                         .or(eqa2.movemask())
929                         .or(eqa3.movemask());
930                     debug_assert!(mask.has_non_zero());
931                     return Some(cur.add(topos(mask)));
932                 }
933             }
934         }
935         while cur >= start.add(V::BYTES) {
936             debug_assert!(cur.distance(start) >= V::BYTES);
937             cur = cur.sub(V::BYTES);
938             if let Some(cur) = self.search_chunk(cur, topos) {
939                 return Some(cur);
940             }
941         }
942         if cur > start {
943             debug_assert!(cur.distance(start) < V::BYTES);
944             return self.search_chunk(start, topos);
945         }
946         None
947     }
948 
949     /// Search `V::BYTES` starting at `cur` via an unaligned load.
950     ///
951     /// `mask_to_offset` should be a function that converts a `movemask` to
952     /// an offset such that `cur.add(offset)` corresponds to a pointer to the
953     /// match location if one is found. Generally it is expected to use either
954     /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether
955     /// one is implementing a forward or reverse search, respectively.
956     ///
957     /// # Safety
958     ///
959     /// `cur` must be a valid pointer and it must be valid to do an unaligned
960     /// load of size `V::BYTES` at `cur`.
961     #[inline(always)]
search_chunk( &self, cur: *const u8, mask_to_offset: impl Fn(V::Mask) -> usize, ) -> Option<*const u8>962     unsafe fn search_chunk(
963         &self,
964         cur: *const u8,
965         mask_to_offset: impl Fn(V::Mask) -> usize,
966     ) -> Option<*const u8> {
967         let chunk = V::load_unaligned(cur);
968         let eq1 = self.v1.cmpeq(chunk);
969         let eq2 = self.v2.cmpeq(chunk);
970         let eq3 = self.v3.cmpeq(chunk);
971         let mask = eq1.or(eq2).or(eq3).movemask();
972         if mask.has_non_zero() {
973             let mask1 = eq1.movemask();
974             let mask2 = eq2.movemask();
975             let mask3 = eq3.movemask();
976             Some(cur.add(mask_to_offset(mask1.or(mask2).or(mask3))))
977         } else {
978             None
979         }
980     }
981 }
982 
983 /// An iterator over all occurrences of a set of bytes in a haystack.
984 ///
985 /// This iterator implements the routines necessary to provide a
986 /// `DoubleEndedIterator` impl, which means it can also be used to find
987 /// occurrences in reverse order.
988 ///
989 /// The lifetime parameters are as follows:
990 ///
991 /// * `'h` refers to the lifetime of the haystack being searched.
992 ///
993 /// This type is intended to be used to implement all iterators for the
994 /// `memchr` family of functions. It handles a tiny bit of marginally tricky
995 /// raw pointer math, but otherwise expects the caller to provide `find_raw`
996 /// and `rfind_raw` routines for each call of `next` and `next_back`,
997 /// respectively.
998 #[derive(Clone, Debug)]
999 pub(crate) struct Iter<'h> {
1000     /// The original starting point into the haystack. We use this to convert
1001     /// pointers to offsets.
1002     original_start: *const u8,
1003     /// The current starting point into the haystack. That is, where the next
1004     /// search will begin.
1005     start: *const u8,
1006     /// The current ending point into the haystack. That is, where the next
1007     /// reverse search will begin.
1008     end: *const u8,
1009     /// A marker for tracking the lifetime of the start/cur_start/cur_end
1010     /// pointers above, which all point into the haystack.
1011     haystack: core::marker::PhantomData<&'h [u8]>,
1012 }
1013 
1014 // SAFETY: Iter contains no shared references to anything that performs any
1015 // interior mutations. Also, the lifetime guarantees that Iter will not outlive
1016 // the haystack.
1017 unsafe impl<'h> Send for Iter<'h> {}
1018 
1019 // SAFETY: Iter perform no interior mutations, therefore no explicit
1020 // synchronization is necessary. Also, the lifetime guarantees that Iter will
1021 // not outlive the haystack.
1022 unsafe impl<'h> Sync for Iter<'h> {}
1023 
1024 impl<'h> Iter<'h> {
1025     /// Create a new generic memchr iterator.
1026     #[inline(always)]
new(haystack: &'h [u8]) -> Iter<'h>1027     pub(crate) fn new(haystack: &'h [u8]) -> Iter<'h> {
1028         Iter {
1029             original_start: haystack.as_ptr(),
1030             start: haystack.as_ptr(),
1031             end: haystack.as_ptr().wrapping_add(haystack.len()),
1032             haystack: core::marker::PhantomData,
1033         }
1034     }
1035 
1036     /// Returns the next occurrence in the forward direction.
1037     ///
1038     /// # Safety
1039     ///
1040     /// Callers must ensure that if a pointer is returned from the closure
1041     /// provided, then it must be greater than or equal to the start pointer
1042     /// and less than the end pointer.
1043     #[inline(always)]
next( &mut self, mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>, ) -> Option<usize>1044     pub(crate) unsafe fn next(
1045         &mut self,
1046         mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>,
1047     ) -> Option<usize> {
1048         // SAFETY: Pointers are derived directly from the same &[u8] haystack.
1049         // We only ever modify start/end corresponding to a matching offset
1050         // found between start and end. Thus all changes to start/end maintain
1051         // our safety requirements.
1052         //
1053         // The only other assumption we rely on is that the pointer returned
1054         // by `find_raw` satisfies `self.start <= found < self.end`, and that
1055         // safety contract is forwarded to the caller.
1056         let found = find_raw(self.start, self.end)?;
1057         let result = found.distance(self.original_start);
1058         self.start = found.add(1);
1059         Some(result)
1060     }
1061 
1062     /// Returns the number of remaining elements in this iterator.
1063     #[inline(always)]
count( self, mut count_raw: impl FnMut(*const u8, *const u8) -> usize, ) -> usize1064     pub(crate) fn count(
1065         self,
1066         mut count_raw: impl FnMut(*const u8, *const u8) -> usize,
1067     ) -> usize {
1068         // SAFETY: Pointers are derived directly from the same &[u8] haystack.
1069         // We only ever modify start/end corresponding to a matching offset
1070         // found between start and end. Thus all changes to start/end maintain
1071         // our safety requirements.
1072         count_raw(self.start, self.end)
1073     }
1074 
1075     /// Returns the next occurrence in reverse.
1076     ///
1077     /// # Safety
1078     ///
1079     /// Callers must ensure that if a pointer is returned from the closure
1080     /// provided, then it must be greater than or equal to the start pointer
1081     /// and less than the end pointer.
1082     #[inline(always)]
next_back( &mut self, mut rfind_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>, ) -> Option<usize>1083     pub(crate) unsafe fn next_back(
1084         &mut self,
1085         mut rfind_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>,
1086     ) -> Option<usize> {
1087         // SAFETY: Pointers are derived directly from the same &[u8] haystack.
1088         // We only ever modify start/end corresponding to a matching offset
1089         // found between start and end. Thus all changes to start/end maintain
1090         // our safety requirements.
1091         //
1092         // The only other assumption we rely on is that the pointer returned
1093         // by `rfind_raw` satisfies `self.start <= found < self.end`, and that
1094         // safety contract is forwarded to the caller.
1095         let found = rfind_raw(self.start, self.end)?;
1096         let result = found.distance(self.original_start);
1097         self.end = found;
1098         Some(result)
1099     }
1100 
1101     /// Provides an implementation of `Iterator::size_hint`.
1102     #[inline(always)]
size_hint(&self) -> (usize, Option<usize>)1103     pub(crate) fn size_hint(&self) -> (usize, Option<usize>) {
1104         (0, Some(self.end.as_usize().saturating_sub(self.start.as_usize())))
1105     }
1106 }
1107 
1108 /// Search a slice using a function that operates on raw pointers.
1109 ///
1110 /// Given a function to search a contiguous sequence of memory for the location
1111 /// of a non-empty set of bytes, this will execute that search on a slice of
1112 /// bytes. The pointer returned by the given function will be converted to an
1113 /// offset relative to the starting point of the given slice. That is, if a
1114 /// match is found, the offset returned by this routine is guaranteed to be a
1115 /// valid index into `haystack`.
1116 ///
1117 /// Callers may use this for a forward or reverse search.
1118 ///
1119 /// # Safety
1120 ///
1121 /// Callers must ensure that if a pointer is returned by `find_raw`, then the
1122 /// pointer must be greater than or equal to the starting pointer and less than
1123 /// the end pointer.
1124 #[inline(always)]
search_slice_with_raw( haystack: &[u8], mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>, ) -> Option<usize>1125 pub(crate) unsafe fn search_slice_with_raw(
1126     haystack: &[u8],
1127     mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>,
1128 ) -> Option<usize> {
1129     // SAFETY: We rely on `find_raw` to return a correct and valid pointer, but
1130     // otherwise, `start` and `end` are valid due to the guarantees provided by
1131     // a &[u8].
1132     let start = haystack.as_ptr();
1133     let end = start.add(haystack.len());
1134     let found = find_raw(start, end)?;
1135     Some(found.distance(start))
1136 }
1137 
1138 /// Performs a forward byte-at-a-time loop until either `ptr >= end_ptr` or
1139 /// until `confirm(*ptr)` returns `true`. If the former occurs, then `None` is
1140 /// returned. If the latter occurs, then the pointer at which `confirm` returns
1141 /// `true` is returned.
1142 ///
1143 /// # Safety
1144 ///
1145 /// Callers must provide valid pointers and they must satisfy `start_ptr <=
1146 /// ptr` and `ptr <= end_ptr`.
1147 #[inline(always)]
fwd_byte_by_byte<F: Fn(u8) -> bool>( start: *const u8, end: *const u8, confirm: F, ) -> Option<*const u8>1148 pub(crate) unsafe fn fwd_byte_by_byte<F: Fn(u8) -> bool>(
1149     start: *const u8,
1150     end: *const u8,
1151     confirm: F,
1152 ) -> Option<*const u8> {
1153     debug_assert!(start <= end);
1154     let mut ptr = start;
1155     while ptr < end {
1156         if confirm(*ptr) {
1157             return Some(ptr);
1158         }
1159         ptr = ptr.offset(1);
1160     }
1161     None
1162 }
1163 
1164 /// Performs a reverse byte-at-a-time loop until either `ptr < start_ptr` or
1165 /// until `confirm(*ptr)` returns `true`. If the former occurs, then `None` is
1166 /// returned. If the latter occurs, then the pointer at which `confirm` returns
1167 /// `true` is returned.
1168 ///
1169 /// # Safety
1170 ///
1171 /// Callers must provide valid pointers and they must satisfy `start_ptr <=
1172 /// ptr` and `ptr <= end_ptr`.
1173 #[inline(always)]
rev_byte_by_byte<F: Fn(u8) -> bool>( start: *const u8, end: *const u8, confirm: F, ) -> Option<*const u8>1174 pub(crate) unsafe fn rev_byte_by_byte<F: Fn(u8) -> bool>(
1175     start: *const u8,
1176     end: *const u8,
1177     confirm: F,
1178 ) -> Option<*const u8> {
1179     debug_assert!(start <= end);
1180 
1181     let mut ptr = end;
1182     while ptr > start {
1183         ptr = ptr.offset(-1);
1184         if confirm(*ptr) {
1185             return Some(ptr);
1186         }
1187     }
1188     None
1189 }
1190 
1191 /// Performs a forward byte-at-a-time loop until `ptr >= end_ptr` and returns
1192 /// the number of times `confirm(*ptr)` returns `true`.
1193 ///
1194 /// # Safety
1195 ///
1196 /// Callers must provide valid pointers and they must satisfy `start_ptr <=
1197 /// ptr` and `ptr <= end_ptr`.
1198 #[inline(always)]
count_byte_by_byte<F: Fn(u8) -> bool>( start: *const u8, end: *const u8, confirm: F, ) -> usize1199 pub(crate) unsafe fn count_byte_by_byte<F: Fn(u8) -> bool>(
1200     start: *const u8,
1201     end: *const u8,
1202     confirm: F,
1203 ) -> usize {
1204     debug_assert!(start <= end);
1205     let mut ptr = start;
1206     let mut count = 0;
1207     while ptr < end {
1208         if confirm(*ptr) {
1209             count += 1;
1210         }
1211         ptr = ptr.offset(1);
1212     }
1213     count
1214 }
1215