1 /*!
2 Generic crate-internal routines for the `memchr` family of functions.
3 */
4
5 // What follows is a vector algorithm generic over the specific vector
6 // type to detect the position of one, two or three needles in a haystack.
7 // From what I know, this is a "classic" algorithm, although I don't
8 // believe it has been published in any peer reviewed journal. I believe
9 // it can be found in places like glibc and Go's standard library. It
10 // appears to be well known and is elaborated on in more detail here:
11 // https://gms.tf/stdfind-and-memchr-optimizations.html
12 //
13 // While the routine below is fairly long and perhaps intimidating, the basic
14 // idea is actually very simple and can be expressed straight-forwardly in
15 // pseudo code. The psuedo code below is written for 128 bit vectors, but the
16 // actual code below works for anything that implements the Vector trait.
17 //
18 // needle = (n1 << 15) | (n1 << 14) | ... | (n1 << 1) | n1
19 // // Note: shift amount is in bytes
20 //
21 // while i <= haystack.len() - 16:
22 // // A 16 byte vector. Each byte in chunk corresponds to a byte in
23 // // the haystack.
24 // chunk = haystack[i:i+16]
25 // // Compare bytes in needle with bytes in chunk. The result is a 16
26 // // byte chunk where each byte is 0xFF if the corresponding bytes
27 // // in needle and chunk were equal, or 0x00 otherwise.
28 // eqs = cmpeq(needle, chunk)
29 // // Return a 32 bit integer where the most significant 16 bits
30 // // are always 0 and the lower 16 bits correspond to whether the
31 // // most significant bit in the correspond byte in `eqs` is set.
32 // // In other words, `mask as u16` has bit i set if and only if
33 // // needle[i] == chunk[i].
34 // mask = movemask(eqs)
35 //
36 // // Mask is 0 if there is no match, and non-zero otherwise.
37 // if mask != 0:
38 // // trailing_zeros tells us the position of the least significant
39 // // bit that is set.
40 // return i + trailing_zeros(mask)
41 //
42 // // haystack length may not be a multiple of 16, so search the rest.
43 // while i < haystack.len():
44 // if haystack[i] == n1:
45 // return i
46 //
47 // // No match found.
48 // return NULL
49 //
50 // In fact, we could loosely translate the above code to Rust line-for-line
51 // and it would be a pretty fast algorithm. But, we pull out all the stops
52 // to go as fast as possible:
53 //
54 // 1. We use aligned loads. That is, we do some finagling to make sure our
55 // primary loop not only proceeds in increments of 16 bytes, but that
56 // the address of haystack's pointer that we dereference is aligned to
57 // 16 bytes. 16 is a magic number here because it is the size of SSE2
58 // 128-bit vector. (For the AVX2 algorithm, 32 is the magic number.)
59 // Therefore, to get aligned loads, our pointer's address must be evenly
60 // divisible by 16.
61 // 2. Our primary loop proceeds 64 bytes at a time instead of 16. It's
62 // kind of like loop unrolling, but we combine the equality comparisons
63 // using a vector OR such that we only need to extract a single mask to
64 // determine whether a match exists or not. If so, then we do some
65 // book-keeping to determine the precise location but otherwise mush on.
66 // 3. We use our "chunk" comparison routine in as many places as possible,
67 // even if it means using unaligned loads. In particular, if haystack
68 // starts with an unaligned address, then we do an unaligned load to
69 // search the first 16 bytes. We then start our primary loop at the
70 // smallest subsequent aligned address, which will actually overlap with
71 // previously searched bytes. But we're OK with that. We do a similar
72 // dance at the end of our primary loop. Finally, to avoid a
73 // byte-at-a-time loop at the end, we do a final 16 byte unaligned load
74 // that may overlap with a previous load. This is OK because it converts
75 // a loop into a small number of very fast vector instructions. The overlap
76 // is OK because we know the place where the overlap occurs does not
77 // contain a match.
78 //
79 // And that's pretty all there is to it. Note that since the below is
80 // generic and since it's meant to be inlined into routines with a
81 // `#[target_feature(enable = "...")]` annotation, we must mark all routines as
82 // both unsafe and `#[inline(always)]`.
83 //
84 // The fact that the code below is generic does somewhat inhibit us. For
85 // example, I've noticed that introducing an unlineable `#[cold]` function to
86 // handle the match case in the loop generates tighter assembly, but there is
87 // no way to do this in the generic code below because the generic code doesn't
88 // know what `target_feature` annotation to apply to the unlineable function.
89 // We could make such functions part of the `Vector` trait, but we instead live
90 // with the slightly sub-optimal codegen for now since it doesn't seem to have
91 // a noticeable perf difference.
92
93 use crate::{
94 ext::Pointer,
95 vector::{MoveMask, Vector},
96 };
97
98 /// Finds all occurrences of a single byte in a haystack.
99 #[derive(Clone, Copy, Debug)]
100 pub(crate) struct One<V> {
101 s1: u8,
102 v1: V,
103 }
104
105 impl<V: Vector> One<V> {
106 /// The number of bytes we examine per each iteration of our search loop.
107 const LOOP_SIZE: usize = 4 * V::BYTES;
108
109 /// Create a new searcher that finds occurrences of the byte given.
110 #[inline(always)]
new(needle: u8) -> One<V>111 pub(crate) unsafe fn new(needle: u8) -> One<V> {
112 One { s1: needle, v1: V::splat(needle) }
113 }
114
115 /// Returns the needle given to `One::new`.
116 #[inline(always)]
needle1(&self) -> u8117 pub(crate) fn needle1(&self) -> u8 {
118 self.s1
119 }
120
121 /// Return a pointer to the first occurrence of the needle in the given
122 /// haystack. If no such occurrence exists, then `None` is returned.
123 ///
124 /// When a match is found, the pointer returned is guaranteed to be
125 /// `>= start` and `< end`.
126 ///
127 /// # Safety
128 ///
129 /// * It must be the case that `start < end` and that the distance between
130 /// them is at least equal to `V::BYTES`. That is, it must always be valid
131 /// to do at least an unaligned load of `V` at `start`.
132 /// * Both `start` and `end` must be valid for reads.
133 /// * Both `start` and `end` must point to an initialized value.
134 /// * Both `start` and `end` must point to the same allocated object and
135 /// must either be in bounds or at most one byte past the end of the
136 /// allocated object.
137 /// * Both `start` and `end` must be _derived from_ a pointer to the same
138 /// object.
139 /// * The distance between `start` and `end` must not overflow `isize`.
140 /// * The distance being in bounds must not rely on "wrapping around" the
141 /// address space.
142 #[inline(always)]
find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>143 pub(crate) unsafe fn find_raw(
144 &self,
145 start: *const u8,
146 end: *const u8,
147 ) -> Option<*const u8> {
148 // If we want to support vectors bigger than 256 bits, we probably
149 // need to move up to using a u64 for the masks used below. Currently
150 // they are 32 bits, which means we're SOL for vectors that need masks
151 // bigger than 32 bits. Overall unclear until there's a use case.
152 debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
153
154 let topos = V::Mask::first_offset;
155 let len = end.distance(start);
156 debug_assert!(
157 len >= V::BYTES,
158 "haystack has length {}, but must be at least {}",
159 len,
160 V::BYTES
161 );
162
163 // Search a possibly unaligned chunk at `start`. This covers any part
164 // of the haystack prior to where aligned loads can start.
165 if let Some(cur) = self.search_chunk(start, topos) {
166 return Some(cur);
167 }
168 // Set `cur` to the first V-aligned pointer greater than `start`.
169 let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN));
170 debug_assert!(cur > start && end.sub(V::BYTES) >= start);
171 if len >= Self::LOOP_SIZE {
172 while cur <= end.sub(Self::LOOP_SIZE) {
173 debug_assert_eq!(0, cur.as_usize() % V::BYTES);
174
175 let a = V::load_aligned(cur);
176 let b = V::load_aligned(cur.add(1 * V::BYTES));
177 let c = V::load_aligned(cur.add(2 * V::BYTES));
178 let d = V::load_aligned(cur.add(3 * V::BYTES));
179 let eqa = self.v1.cmpeq(a);
180 let eqb = self.v1.cmpeq(b);
181 let eqc = self.v1.cmpeq(c);
182 let eqd = self.v1.cmpeq(d);
183 let or1 = eqa.or(eqb);
184 let or2 = eqc.or(eqd);
185 let or3 = or1.or(or2);
186 if or3.movemask_will_have_non_zero() {
187 let mask = eqa.movemask();
188 if mask.has_non_zero() {
189 return Some(cur.add(topos(mask)));
190 }
191
192 let mask = eqb.movemask();
193 if mask.has_non_zero() {
194 return Some(cur.add(1 * V::BYTES).add(topos(mask)));
195 }
196
197 let mask = eqc.movemask();
198 if mask.has_non_zero() {
199 return Some(cur.add(2 * V::BYTES).add(topos(mask)));
200 }
201
202 let mask = eqd.movemask();
203 debug_assert!(mask.has_non_zero());
204 return Some(cur.add(3 * V::BYTES).add(topos(mask)));
205 }
206 cur = cur.add(Self::LOOP_SIZE);
207 }
208 }
209 // Handle any leftovers after the aligned loop above. We use unaligned
210 // loads here, but I believe we are guaranteed that they are aligned
211 // since `cur` is aligned.
212 while cur <= end.sub(V::BYTES) {
213 debug_assert!(end.distance(cur) >= V::BYTES);
214 if let Some(cur) = self.search_chunk(cur, topos) {
215 return Some(cur);
216 }
217 cur = cur.add(V::BYTES);
218 }
219 // Finally handle any remaining bytes less than the size of V. In this
220 // case, our pointer may indeed be unaligned and the load may overlap
221 // with the previous one. But that's okay since we know the previous
222 // load didn't lead to a match (otherwise we wouldn't be here).
223 if cur < end {
224 debug_assert!(end.distance(cur) < V::BYTES);
225 cur = cur.sub(V::BYTES - end.distance(cur));
226 debug_assert_eq!(end.distance(cur), V::BYTES);
227 return self.search_chunk(cur, topos);
228 }
229 None
230 }
231
232 /// Return a pointer to the last occurrence of the needle in the given
233 /// haystack. If no such occurrence exists, then `None` is returned.
234 ///
235 /// When a match is found, the pointer returned is guaranteed to be
236 /// `>= start` and `< end`.
237 ///
238 /// # Safety
239 ///
240 /// * It must be the case that `start < end` and that the distance between
241 /// them is at least equal to `V::BYTES`. That is, it must always be valid
242 /// to do at least an unaligned load of `V` at `start`.
243 /// * Both `start` and `end` must be valid for reads.
244 /// * Both `start` and `end` must point to an initialized value.
245 /// * Both `start` and `end` must point to the same allocated object and
246 /// must either be in bounds or at most one byte past the end of the
247 /// allocated object.
248 /// * Both `start` and `end` must be _derived from_ a pointer to the same
249 /// object.
250 /// * The distance between `start` and `end` must not overflow `isize`.
251 /// * The distance being in bounds must not rely on "wrapping around" the
252 /// address space.
253 #[inline(always)]
rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>254 pub(crate) unsafe fn rfind_raw(
255 &self,
256 start: *const u8,
257 end: *const u8,
258 ) -> Option<*const u8> {
259 // If we want to support vectors bigger than 256 bits, we probably
260 // need to move up to using a u64 for the masks used below. Currently
261 // they are 32 bits, which means we're SOL for vectors that need masks
262 // bigger than 32 bits. Overall unclear until there's a use case.
263 debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
264
265 let topos = V::Mask::last_offset;
266 let len = end.distance(start);
267 debug_assert!(
268 len >= V::BYTES,
269 "haystack has length {}, but must be at least {}",
270 len,
271 V::BYTES
272 );
273
274 if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) {
275 return Some(cur);
276 }
277 let mut cur = end.sub(end.as_usize() & V::ALIGN);
278 debug_assert!(start <= cur && cur <= end);
279 if len >= Self::LOOP_SIZE {
280 while cur >= start.add(Self::LOOP_SIZE) {
281 debug_assert_eq!(0, cur.as_usize() % V::BYTES);
282
283 cur = cur.sub(Self::LOOP_SIZE);
284 let a = V::load_aligned(cur);
285 let b = V::load_aligned(cur.add(1 * V::BYTES));
286 let c = V::load_aligned(cur.add(2 * V::BYTES));
287 let d = V::load_aligned(cur.add(3 * V::BYTES));
288 let eqa = self.v1.cmpeq(a);
289 let eqb = self.v1.cmpeq(b);
290 let eqc = self.v1.cmpeq(c);
291 let eqd = self.v1.cmpeq(d);
292 let or1 = eqa.or(eqb);
293 let or2 = eqc.or(eqd);
294 let or3 = or1.or(or2);
295 if or3.movemask_will_have_non_zero() {
296 let mask = eqd.movemask();
297 if mask.has_non_zero() {
298 return Some(cur.add(3 * V::BYTES).add(topos(mask)));
299 }
300
301 let mask = eqc.movemask();
302 if mask.has_non_zero() {
303 return Some(cur.add(2 * V::BYTES).add(topos(mask)));
304 }
305
306 let mask = eqb.movemask();
307 if mask.has_non_zero() {
308 return Some(cur.add(1 * V::BYTES).add(topos(mask)));
309 }
310
311 let mask = eqa.movemask();
312 debug_assert!(mask.has_non_zero());
313 return Some(cur.add(topos(mask)));
314 }
315 }
316 }
317 while cur >= start.add(V::BYTES) {
318 debug_assert!(cur.distance(start) >= V::BYTES);
319 cur = cur.sub(V::BYTES);
320 if let Some(cur) = self.search_chunk(cur, topos) {
321 return Some(cur);
322 }
323 }
324 if cur > start {
325 debug_assert!(cur.distance(start) < V::BYTES);
326 return self.search_chunk(start, topos);
327 }
328 None
329 }
330
331 /// Return a count of all matching bytes in the given haystack.
332 ///
333 /// # Safety
334 ///
335 /// * It must be the case that `start < end` and that the distance between
336 /// them is at least equal to `V::BYTES`. That is, it must always be valid
337 /// to do at least an unaligned load of `V` at `start`.
338 /// * Both `start` and `end` must be valid for reads.
339 /// * Both `start` and `end` must point to an initialized value.
340 /// * Both `start` and `end` must point to the same allocated object and
341 /// must either be in bounds or at most one byte past the end of the
342 /// allocated object.
343 /// * Both `start` and `end` must be _derived from_ a pointer to the same
344 /// object.
345 /// * The distance between `start` and `end` must not overflow `isize`.
346 /// * The distance being in bounds must not rely on "wrapping around" the
347 /// address space.
348 #[inline(always)]
count_raw( &self, start: *const u8, end: *const u8, ) -> usize349 pub(crate) unsafe fn count_raw(
350 &self,
351 start: *const u8,
352 end: *const u8,
353 ) -> usize {
354 debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
355
356 let confirm = |b| b == self.needle1();
357 let len = end.distance(start);
358 debug_assert!(
359 len >= V::BYTES,
360 "haystack has length {}, but must be at least {}",
361 len,
362 V::BYTES
363 );
364
365 // Set `cur` to the first V-aligned pointer greater than `start`.
366 let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN));
367 // Count any matching bytes before we start our aligned loop.
368 let mut count = count_byte_by_byte(start, cur, confirm);
369 debug_assert!(cur > start && end.sub(V::BYTES) >= start);
370 if len >= Self::LOOP_SIZE {
371 while cur <= end.sub(Self::LOOP_SIZE) {
372 debug_assert_eq!(0, cur.as_usize() % V::BYTES);
373
374 let a = V::load_aligned(cur);
375 let b = V::load_aligned(cur.add(1 * V::BYTES));
376 let c = V::load_aligned(cur.add(2 * V::BYTES));
377 let d = V::load_aligned(cur.add(3 * V::BYTES));
378 let eqa = self.v1.cmpeq(a);
379 let eqb = self.v1.cmpeq(b);
380 let eqc = self.v1.cmpeq(c);
381 let eqd = self.v1.cmpeq(d);
382 count += eqa.movemask().count_ones();
383 count += eqb.movemask().count_ones();
384 count += eqc.movemask().count_ones();
385 count += eqd.movemask().count_ones();
386 cur = cur.add(Self::LOOP_SIZE);
387 }
388 }
389 // Handle any leftovers after the aligned loop above. We use unaligned
390 // loads here, but I believe we are guaranteed that they are aligned
391 // since `cur` is aligned.
392 while cur <= end.sub(V::BYTES) {
393 debug_assert!(end.distance(cur) >= V::BYTES);
394 let chunk = V::load_unaligned(cur);
395 count += self.v1.cmpeq(chunk).movemask().count_ones();
396 cur = cur.add(V::BYTES);
397 }
398 // And finally count any leftovers that weren't caught above.
399 count += count_byte_by_byte(cur, end, confirm);
400 count
401 }
402
403 /// Search `V::BYTES` starting at `cur` via an unaligned load.
404 ///
405 /// `mask_to_offset` should be a function that converts a `movemask` to
406 /// an offset such that `cur.add(offset)` corresponds to a pointer to the
407 /// match location if one is found. Generally it is expected to use either
408 /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether
409 /// one is implementing a forward or reverse search, respectively.
410 ///
411 /// # Safety
412 ///
413 /// `cur` must be a valid pointer and it must be valid to do an unaligned
414 /// load of size `V::BYTES` at `cur`.
415 #[inline(always)]
search_chunk( &self, cur: *const u8, mask_to_offset: impl Fn(V::Mask) -> usize, ) -> Option<*const u8>416 unsafe fn search_chunk(
417 &self,
418 cur: *const u8,
419 mask_to_offset: impl Fn(V::Mask) -> usize,
420 ) -> Option<*const u8> {
421 let chunk = V::load_unaligned(cur);
422 let mask = self.v1.cmpeq(chunk).movemask();
423 if mask.has_non_zero() {
424 Some(cur.add(mask_to_offset(mask)))
425 } else {
426 None
427 }
428 }
429 }
430
431 /// Finds all occurrences of two bytes in a haystack.
432 ///
433 /// That is, this reports matches of one of two possible bytes. For example,
434 /// searching for `a` or `b` in `afoobar` would report matches at offsets `0`,
435 /// `4` and `5`.
436 #[derive(Clone, Copy, Debug)]
437 pub(crate) struct Two<V> {
438 s1: u8,
439 s2: u8,
440 v1: V,
441 v2: V,
442 }
443
444 impl<V: Vector> Two<V> {
445 /// The number of bytes we examine per each iteration of our search loop.
446 const LOOP_SIZE: usize = 2 * V::BYTES;
447
448 /// Create a new searcher that finds occurrences of the byte given.
449 #[inline(always)]
new(needle1: u8, needle2: u8) -> Two<V>450 pub(crate) unsafe fn new(needle1: u8, needle2: u8) -> Two<V> {
451 Two {
452 s1: needle1,
453 s2: needle2,
454 v1: V::splat(needle1),
455 v2: V::splat(needle2),
456 }
457 }
458
459 /// Returns the first needle given to `Two::new`.
460 #[inline(always)]
needle1(&self) -> u8461 pub(crate) fn needle1(&self) -> u8 {
462 self.s1
463 }
464
465 /// Returns the second needle given to `Two::new`.
466 #[inline(always)]
needle2(&self) -> u8467 pub(crate) fn needle2(&self) -> u8 {
468 self.s2
469 }
470
471 /// Return a pointer to the first occurrence of one of the needles in the
472 /// given haystack. If no such occurrence exists, then `None` is returned.
473 ///
474 /// When a match is found, the pointer returned is guaranteed to be
475 /// `>= start` and `< end`.
476 ///
477 /// # Safety
478 ///
479 /// * It must be the case that `start < end` and that the distance between
480 /// them is at least equal to `V::BYTES`. That is, it must always be valid
481 /// to do at least an unaligned load of `V` at `start`.
482 /// * Both `start` and `end` must be valid for reads.
483 /// * Both `start` and `end` must point to an initialized value.
484 /// * Both `start` and `end` must point to the same allocated object and
485 /// must either be in bounds or at most one byte past the end of the
486 /// allocated object.
487 /// * Both `start` and `end` must be _derived from_ a pointer to the same
488 /// object.
489 /// * The distance between `start` and `end` must not overflow `isize`.
490 /// * The distance being in bounds must not rely on "wrapping around" the
491 /// address space.
492 #[inline(always)]
find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>493 pub(crate) unsafe fn find_raw(
494 &self,
495 start: *const u8,
496 end: *const u8,
497 ) -> Option<*const u8> {
498 // If we want to support vectors bigger than 256 bits, we probably
499 // need to move up to using a u64 for the masks used below. Currently
500 // they are 32 bits, which means we're SOL for vectors that need masks
501 // bigger than 32 bits. Overall unclear until there's a use case.
502 debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
503
504 let topos = V::Mask::first_offset;
505 let len = end.distance(start);
506 debug_assert!(
507 len >= V::BYTES,
508 "haystack has length {}, but must be at least {}",
509 len,
510 V::BYTES
511 );
512
513 // Search a possibly unaligned chunk at `start`. This covers any part
514 // of the haystack prior to where aligned loads can start.
515 if let Some(cur) = self.search_chunk(start, topos) {
516 return Some(cur);
517 }
518 // Set `cur` to the first V-aligned pointer greater than `start`.
519 let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN));
520 debug_assert!(cur > start && end.sub(V::BYTES) >= start);
521 if len >= Self::LOOP_SIZE {
522 while cur <= end.sub(Self::LOOP_SIZE) {
523 debug_assert_eq!(0, cur.as_usize() % V::BYTES);
524
525 let a = V::load_aligned(cur);
526 let b = V::load_aligned(cur.add(V::BYTES));
527 let eqa1 = self.v1.cmpeq(a);
528 let eqb1 = self.v1.cmpeq(b);
529 let eqa2 = self.v2.cmpeq(a);
530 let eqb2 = self.v2.cmpeq(b);
531 let or1 = eqa1.or(eqb1);
532 let or2 = eqa2.or(eqb2);
533 let or3 = or1.or(or2);
534 if or3.movemask_will_have_non_zero() {
535 let mask = eqa1.movemask().or(eqa2.movemask());
536 if mask.has_non_zero() {
537 return Some(cur.add(topos(mask)));
538 }
539
540 let mask = eqb1.movemask().or(eqb2.movemask());
541 debug_assert!(mask.has_non_zero());
542 return Some(cur.add(V::BYTES).add(topos(mask)));
543 }
544 cur = cur.add(Self::LOOP_SIZE);
545 }
546 }
547 // Handle any leftovers after the aligned loop above. We use unaligned
548 // loads here, but I believe we are guaranteed that they are aligned
549 // since `cur` is aligned.
550 while cur <= end.sub(V::BYTES) {
551 debug_assert!(end.distance(cur) >= V::BYTES);
552 if let Some(cur) = self.search_chunk(cur, topos) {
553 return Some(cur);
554 }
555 cur = cur.add(V::BYTES);
556 }
557 // Finally handle any remaining bytes less than the size of V. In this
558 // case, our pointer may indeed be unaligned and the load may overlap
559 // with the previous one. But that's okay since we know the previous
560 // load didn't lead to a match (otherwise we wouldn't be here).
561 if cur < end {
562 debug_assert!(end.distance(cur) < V::BYTES);
563 cur = cur.sub(V::BYTES - end.distance(cur));
564 debug_assert_eq!(end.distance(cur), V::BYTES);
565 return self.search_chunk(cur, topos);
566 }
567 None
568 }
569
570 /// Return a pointer to the last occurrence of the needle in the given
571 /// haystack. If no such occurrence exists, then `None` is returned.
572 ///
573 /// When a match is found, the pointer returned is guaranteed to be
574 /// `>= start` and `< end`.
575 ///
576 /// # Safety
577 ///
578 /// * It must be the case that `start < end` and that the distance between
579 /// them is at least equal to `V::BYTES`. That is, it must always be valid
580 /// to do at least an unaligned load of `V` at `start`.
581 /// * Both `start` and `end` must be valid for reads.
582 /// * Both `start` and `end` must point to an initialized value.
583 /// * Both `start` and `end` must point to the same allocated object and
584 /// must either be in bounds or at most one byte past the end of the
585 /// allocated object.
586 /// * Both `start` and `end` must be _derived from_ a pointer to the same
587 /// object.
588 /// * The distance between `start` and `end` must not overflow `isize`.
589 /// * The distance being in bounds must not rely on "wrapping around" the
590 /// address space.
591 #[inline(always)]
rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>592 pub(crate) unsafe fn rfind_raw(
593 &self,
594 start: *const u8,
595 end: *const u8,
596 ) -> Option<*const u8> {
597 // If we want to support vectors bigger than 256 bits, we probably
598 // need to move up to using a u64 for the masks used below. Currently
599 // they are 32 bits, which means we're SOL for vectors that need masks
600 // bigger than 32 bits. Overall unclear until there's a use case.
601 debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
602
603 let topos = V::Mask::last_offset;
604 let len = end.distance(start);
605 debug_assert!(
606 len >= V::BYTES,
607 "haystack has length {}, but must be at least {}",
608 len,
609 V::BYTES
610 );
611
612 if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) {
613 return Some(cur);
614 }
615 let mut cur = end.sub(end.as_usize() & V::ALIGN);
616 debug_assert!(start <= cur && cur <= end);
617 if len >= Self::LOOP_SIZE {
618 while cur >= start.add(Self::LOOP_SIZE) {
619 debug_assert_eq!(0, cur.as_usize() % V::BYTES);
620
621 cur = cur.sub(Self::LOOP_SIZE);
622 let a = V::load_aligned(cur);
623 let b = V::load_aligned(cur.add(V::BYTES));
624 let eqa1 = self.v1.cmpeq(a);
625 let eqb1 = self.v1.cmpeq(b);
626 let eqa2 = self.v2.cmpeq(a);
627 let eqb2 = self.v2.cmpeq(b);
628 let or1 = eqa1.or(eqb1);
629 let or2 = eqa2.or(eqb2);
630 let or3 = or1.or(or2);
631 if or3.movemask_will_have_non_zero() {
632 let mask = eqb1.movemask().or(eqb2.movemask());
633 if mask.has_non_zero() {
634 return Some(cur.add(V::BYTES).add(topos(mask)));
635 }
636
637 let mask = eqa1.movemask().or(eqa2.movemask());
638 debug_assert!(mask.has_non_zero());
639 return Some(cur.add(topos(mask)));
640 }
641 }
642 }
643 while cur >= start.add(V::BYTES) {
644 debug_assert!(cur.distance(start) >= V::BYTES);
645 cur = cur.sub(V::BYTES);
646 if let Some(cur) = self.search_chunk(cur, topos) {
647 return Some(cur);
648 }
649 }
650 if cur > start {
651 debug_assert!(cur.distance(start) < V::BYTES);
652 return self.search_chunk(start, topos);
653 }
654 None
655 }
656
657 /// Search `V::BYTES` starting at `cur` via an unaligned load.
658 ///
659 /// `mask_to_offset` should be a function that converts a `movemask` to
660 /// an offset such that `cur.add(offset)` corresponds to a pointer to the
661 /// match location if one is found. Generally it is expected to use either
662 /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether
663 /// one is implementing a forward or reverse search, respectively.
664 ///
665 /// # Safety
666 ///
667 /// `cur` must be a valid pointer and it must be valid to do an unaligned
668 /// load of size `V::BYTES` at `cur`.
669 #[inline(always)]
search_chunk( &self, cur: *const u8, mask_to_offset: impl Fn(V::Mask) -> usize, ) -> Option<*const u8>670 unsafe fn search_chunk(
671 &self,
672 cur: *const u8,
673 mask_to_offset: impl Fn(V::Mask) -> usize,
674 ) -> Option<*const u8> {
675 let chunk = V::load_unaligned(cur);
676 let eq1 = self.v1.cmpeq(chunk);
677 let eq2 = self.v2.cmpeq(chunk);
678 let mask = eq1.or(eq2).movemask();
679 if mask.has_non_zero() {
680 let mask1 = eq1.movemask();
681 let mask2 = eq2.movemask();
682 Some(cur.add(mask_to_offset(mask1.or(mask2))))
683 } else {
684 None
685 }
686 }
687 }
688
689 /// Finds all occurrences of two bytes in a haystack.
690 ///
691 /// That is, this reports matches of one of two possible bytes. For example,
692 /// searching for `a` or `b` in `afoobar` would report matches at offsets `0`,
693 /// `4` and `5`.
694 #[derive(Clone, Copy, Debug)]
695 pub(crate) struct Three<V> {
696 s1: u8,
697 s2: u8,
698 s3: u8,
699 v1: V,
700 v2: V,
701 v3: V,
702 }
703
704 impl<V: Vector> Three<V> {
705 /// The number of bytes we examine per each iteration of our search loop.
706 const LOOP_SIZE: usize = 2 * V::BYTES;
707
708 /// Create a new searcher that finds occurrences of the byte given.
709 #[inline(always)]
new( needle1: u8, needle2: u8, needle3: u8, ) -> Three<V>710 pub(crate) unsafe fn new(
711 needle1: u8,
712 needle2: u8,
713 needle3: u8,
714 ) -> Three<V> {
715 Three {
716 s1: needle1,
717 s2: needle2,
718 s3: needle3,
719 v1: V::splat(needle1),
720 v2: V::splat(needle2),
721 v3: V::splat(needle3),
722 }
723 }
724
725 /// Returns the first needle given to `Three::new`.
726 #[inline(always)]
needle1(&self) -> u8727 pub(crate) fn needle1(&self) -> u8 {
728 self.s1
729 }
730
731 /// Returns the second needle given to `Three::new`.
732 #[inline(always)]
needle2(&self) -> u8733 pub(crate) fn needle2(&self) -> u8 {
734 self.s2
735 }
736
737 /// Returns the third needle given to `Three::new`.
738 #[inline(always)]
needle3(&self) -> u8739 pub(crate) fn needle3(&self) -> u8 {
740 self.s3
741 }
742
743 /// Return a pointer to the first occurrence of one of the needles in the
744 /// given haystack. If no such occurrence exists, then `None` is returned.
745 ///
746 /// When a match is found, the pointer returned is guaranteed to be
747 /// `>= start` and `< end`.
748 ///
749 /// # Safety
750 ///
751 /// * It must be the case that `start < end` and that the distance between
752 /// them is at least equal to `V::BYTES`. That is, it must always be valid
753 /// to do at least an unaligned load of `V` at `start`.
754 /// * Both `start` and `end` must be valid for reads.
755 /// * Both `start` and `end` must point to an initialized value.
756 /// * Both `start` and `end` must point to the same allocated object and
757 /// must either be in bounds or at most one byte past the end of the
758 /// allocated object.
759 /// * Both `start` and `end` must be _derived from_ a pointer to the same
760 /// object.
761 /// * The distance between `start` and `end` must not overflow `isize`.
762 /// * The distance being in bounds must not rely on "wrapping around" the
763 /// address space.
764 #[inline(always)]
find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>765 pub(crate) unsafe fn find_raw(
766 &self,
767 start: *const u8,
768 end: *const u8,
769 ) -> Option<*const u8> {
770 // If we want to support vectors bigger than 256 bits, we probably
771 // need to move up to using a u64 for the masks used below. Currently
772 // they are 32 bits, which means we're SOL for vectors that need masks
773 // bigger than 32 bits. Overall unclear until there's a use case.
774 debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
775
776 let topos = V::Mask::first_offset;
777 let len = end.distance(start);
778 debug_assert!(
779 len >= V::BYTES,
780 "haystack has length {}, but must be at least {}",
781 len,
782 V::BYTES
783 );
784
785 // Search a possibly unaligned chunk at `start`. This covers any part
786 // of the haystack prior to where aligned loads can start.
787 if let Some(cur) = self.search_chunk(start, topos) {
788 return Some(cur);
789 }
790 // Set `cur` to the first V-aligned pointer greater than `start`.
791 let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN));
792 debug_assert!(cur > start && end.sub(V::BYTES) >= start);
793 if len >= Self::LOOP_SIZE {
794 while cur <= end.sub(Self::LOOP_SIZE) {
795 debug_assert_eq!(0, cur.as_usize() % V::BYTES);
796
797 let a = V::load_aligned(cur);
798 let b = V::load_aligned(cur.add(V::BYTES));
799 let eqa1 = self.v1.cmpeq(a);
800 let eqb1 = self.v1.cmpeq(b);
801 let eqa2 = self.v2.cmpeq(a);
802 let eqb2 = self.v2.cmpeq(b);
803 let eqa3 = self.v3.cmpeq(a);
804 let eqb3 = self.v3.cmpeq(b);
805 let or1 = eqa1.or(eqb1);
806 let or2 = eqa2.or(eqb2);
807 let or3 = eqa3.or(eqb3);
808 let or4 = or1.or(or2);
809 let or5 = or3.or(or4);
810 if or5.movemask_will_have_non_zero() {
811 let mask = eqa1
812 .movemask()
813 .or(eqa2.movemask())
814 .or(eqa3.movemask());
815 if mask.has_non_zero() {
816 return Some(cur.add(topos(mask)));
817 }
818
819 let mask = eqb1
820 .movemask()
821 .or(eqb2.movemask())
822 .or(eqb3.movemask());
823 debug_assert!(mask.has_non_zero());
824 return Some(cur.add(V::BYTES).add(topos(mask)));
825 }
826 cur = cur.add(Self::LOOP_SIZE);
827 }
828 }
829 // Handle any leftovers after the aligned loop above. We use unaligned
830 // loads here, but I believe we are guaranteed that they are aligned
831 // since `cur` is aligned.
832 while cur <= end.sub(V::BYTES) {
833 debug_assert!(end.distance(cur) >= V::BYTES);
834 if let Some(cur) = self.search_chunk(cur, topos) {
835 return Some(cur);
836 }
837 cur = cur.add(V::BYTES);
838 }
839 // Finally handle any remaining bytes less than the size of V. In this
840 // case, our pointer may indeed be unaligned and the load may overlap
841 // with the previous one. But that's okay since we know the previous
842 // load didn't lead to a match (otherwise we wouldn't be here).
843 if cur < end {
844 debug_assert!(end.distance(cur) < V::BYTES);
845 cur = cur.sub(V::BYTES - end.distance(cur));
846 debug_assert_eq!(end.distance(cur), V::BYTES);
847 return self.search_chunk(cur, topos);
848 }
849 None
850 }
851
852 /// Return a pointer to the last occurrence of the needle in the given
853 /// haystack. If no such occurrence exists, then `None` is returned.
854 ///
855 /// When a match is found, the pointer returned is guaranteed to be
856 /// `>= start` and `< end`.
857 ///
858 /// # Safety
859 ///
860 /// * It must be the case that `start < end` and that the distance between
861 /// them is at least equal to `V::BYTES`. That is, it must always be valid
862 /// to do at least an unaligned load of `V` at `start`.
863 /// * Both `start` and `end` must be valid for reads.
864 /// * Both `start` and `end` must point to an initialized value.
865 /// * Both `start` and `end` must point to the same allocated object and
866 /// must either be in bounds or at most one byte past the end of the
867 /// allocated object.
868 /// * Both `start` and `end` must be _derived from_ a pointer to the same
869 /// object.
870 /// * The distance between `start` and `end` must not overflow `isize`.
871 /// * The distance being in bounds must not rely on "wrapping around" the
872 /// address space.
873 #[inline(always)]
rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>874 pub(crate) unsafe fn rfind_raw(
875 &self,
876 start: *const u8,
877 end: *const u8,
878 ) -> Option<*const u8> {
879 // If we want to support vectors bigger than 256 bits, we probably
880 // need to move up to using a u64 for the masks used below. Currently
881 // they are 32 bits, which means we're SOL for vectors that need masks
882 // bigger than 32 bits. Overall unclear until there's a use case.
883 debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
884
885 let topos = V::Mask::last_offset;
886 let len = end.distance(start);
887 debug_assert!(
888 len >= V::BYTES,
889 "haystack has length {}, but must be at least {}",
890 len,
891 V::BYTES
892 );
893
894 if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) {
895 return Some(cur);
896 }
897 let mut cur = end.sub(end.as_usize() & V::ALIGN);
898 debug_assert!(start <= cur && cur <= end);
899 if len >= Self::LOOP_SIZE {
900 while cur >= start.add(Self::LOOP_SIZE) {
901 debug_assert_eq!(0, cur.as_usize() % V::BYTES);
902
903 cur = cur.sub(Self::LOOP_SIZE);
904 let a = V::load_aligned(cur);
905 let b = V::load_aligned(cur.add(V::BYTES));
906 let eqa1 = self.v1.cmpeq(a);
907 let eqb1 = self.v1.cmpeq(b);
908 let eqa2 = self.v2.cmpeq(a);
909 let eqb2 = self.v2.cmpeq(b);
910 let eqa3 = self.v3.cmpeq(a);
911 let eqb3 = self.v3.cmpeq(b);
912 let or1 = eqa1.or(eqb1);
913 let or2 = eqa2.or(eqb2);
914 let or3 = eqa3.or(eqb3);
915 let or4 = or1.or(or2);
916 let or5 = or3.or(or4);
917 if or5.movemask_will_have_non_zero() {
918 let mask = eqb1
919 .movemask()
920 .or(eqb2.movemask())
921 .or(eqb3.movemask());
922 if mask.has_non_zero() {
923 return Some(cur.add(V::BYTES).add(topos(mask)));
924 }
925
926 let mask = eqa1
927 .movemask()
928 .or(eqa2.movemask())
929 .or(eqa3.movemask());
930 debug_assert!(mask.has_non_zero());
931 return Some(cur.add(topos(mask)));
932 }
933 }
934 }
935 while cur >= start.add(V::BYTES) {
936 debug_assert!(cur.distance(start) >= V::BYTES);
937 cur = cur.sub(V::BYTES);
938 if let Some(cur) = self.search_chunk(cur, topos) {
939 return Some(cur);
940 }
941 }
942 if cur > start {
943 debug_assert!(cur.distance(start) < V::BYTES);
944 return self.search_chunk(start, topos);
945 }
946 None
947 }
948
949 /// Search `V::BYTES` starting at `cur` via an unaligned load.
950 ///
951 /// `mask_to_offset` should be a function that converts a `movemask` to
952 /// an offset such that `cur.add(offset)` corresponds to a pointer to the
953 /// match location if one is found. Generally it is expected to use either
954 /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether
955 /// one is implementing a forward or reverse search, respectively.
956 ///
957 /// # Safety
958 ///
959 /// `cur` must be a valid pointer and it must be valid to do an unaligned
960 /// load of size `V::BYTES` at `cur`.
961 #[inline(always)]
search_chunk( &self, cur: *const u8, mask_to_offset: impl Fn(V::Mask) -> usize, ) -> Option<*const u8>962 unsafe fn search_chunk(
963 &self,
964 cur: *const u8,
965 mask_to_offset: impl Fn(V::Mask) -> usize,
966 ) -> Option<*const u8> {
967 let chunk = V::load_unaligned(cur);
968 let eq1 = self.v1.cmpeq(chunk);
969 let eq2 = self.v2.cmpeq(chunk);
970 let eq3 = self.v3.cmpeq(chunk);
971 let mask = eq1.or(eq2).or(eq3).movemask();
972 if mask.has_non_zero() {
973 let mask1 = eq1.movemask();
974 let mask2 = eq2.movemask();
975 let mask3 = eq3.movemask();
976 Some(cur.add(mask_to_offset(mask1.or(mask2).or(mask3))))
977 } else {
978 None
979 }
980 }
981 }
982
983 /// An iterator over all occurrences of a set of bytes in a haystack.
984 ///
985 /// This iterator implements the routines necessary to provide a
986 /// `DoubleEndedIterator` impl, which means it can also be used to find
987 /// occurrences in reverse order.
988 ///
989 /// The lifetime parameters are as follows:
990 ///
991 /// * `'h` refers to the lifetime of the haystack being searched.
992 ///
993 /// This type is intended to be used to implement all iterators for the
994 /// `memchr` family of functions. It handles a tiny bit of marginally tricky
995 /// raw pointer math, but otherwise expects the caller to provide `find_raw`
996 /// and `rfind_raw` routines for each call of `next` and `next_back`,
997 /// respectively.
998 #[derive(Clone, Debug)]
999 pub(crate) struct Iter<'h> {
1000 /// The original starting point into the haystack. We use this to convert
1001 /// pointers to offsets.
1002 original_start: *const u8,
1003 /// The current starting point into the haystack. That is, where the next
1004 /// search will begin.
1005 start: *const u8,
1006 /// The current ending point into the haystack. That is, where the next
1007 /// reverse search will begin.
1008 end: *const u8,
1009 /// A marker for tracking the lifetime of the start/cur_start/cur_end
1010 /// pointers above, which all point into the haystack.
1011 haystack: core::marker::PhantomData<&'h [u8]>,
1012 }
1013
1014 // SAFETY: Iter contains no shared references to anything that performs any
1015 // interior mutations. Also, the lifetime guarantees that Iter will not outlive
1016 // the haystack.
1017 unsafe impl<'h> Send for Iter<'h> {}
1018
1019 // SAFETY: Iter perform no interior mutations, therefore no explicit
1020 // synchronization is necessary. Also, the lifetime guarantees that Iter will
1021 // not outlive the haystack.
1022 unsafe impl<'h> Sync for Iter<'h> {}
1023
1024 impl<'h> Iter<'h> {
1025 /// Create a new generic memchr iterator.
1026 #[inline(always)]
new(haystack: &'h [u8]) -> Iter<'h>1027 pub(crate) fn new(haystack: &'h [u8]) -> Iter<'h> {
1028 Iter {
1029 original_start: haystack.as_ptr(),
1030 start: haystack.as_ptr(),
1031 end: haystack.as_ptr().wrapping_add(haystack.len()),
1032 haystack: core::marker::PhantomData,
1033 }
1034 }
1035
1036 /// Returns the next occurrence in the forward direction.
1037 ///
1038 /// # Safety
1039 ///
1040 /// Callers must ensure that if a pointer is returned from the closure
1041 /// provided, then it must be greater than or equal to the start pointer
1042 /// and less than the end pointer.
1043 #[inline(always)]
next( &mut self, mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>, ) -> Option<usize>1044 pub(crate) unsafe fn next(
1045 &mut self,
1046 mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>,
1047 ) -> Option<usize> {
1048 // SAFETY: Pointers are derived directly from the same &[u8] haystack.
1049 // We only ever modify start/end corresponding to a matching offset
1050 // found between start and end. Thus all changes to start/end maintain
1051 // our safety requirements.
1052 //
1053 // The only other assumption we rely on is that the pointer returned
1054 // by `find_raw` satisfies `self.start <= found < self.end`, and that
1055 // safety contract is forwarded to the caller.
1056 let found = find_raw(self.start, self.end)?;
1057 let result = found.distance(self.original_start);
1058 self.start = found.add(1);
1059 Some(result)
1060 }
1061
1062 /// Returns the number of remaining elements in this iterator.
1063 #[inline(always)]
count( self, mut count_raw: impl FnMut(*const u8, *const u8) -> usize, ) -> usize1064 pub(crate) fn count(
1065 self,
1066 mut count_raw: impl FnMut(*const u8, *const u8) -> usize,
1067 ) -> usize {
1068 // SAFETY: Pointers are derived directly from the same &[u8] haystack.
1069 // We only ever modify start/end corresponding to a matching offset
1070 // found between start and end. Thus all changes to start/end maintain
1071 // our safety requirements.
1072 count_raw(self.start, self.end)
1073 }
1074
1075 /// Returns the next occurrence in reverse.
1076 ///
1077 /// # Safety
1078 ///
1079 /// Callers must ensure that if a pointer is returned from the closure
1080 /// provided, then it must be greater than or equal to the start pointer
1081 /// and less than the end pointer.
1082 #[inline(always)]
next_back( &mut self, mut rfind_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>, ) -> Option<usize>1083 pub(crate) unsafe fn next_back(
1084 &mut self,
1085 mut rfind_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>,
1086 ) -> Option<usize> {
1087 // SAFETY: Pointers are derived directly from the same &[u8] haystack.
1088 // We only ever modify start/end corresponding to a matching offset
1089 // found between start and end. Thus all changes to start/end maintain
1090 // our safety requirements.
1091 //
1092 // The only other assumption we rely on is that the pointer returned
1093 // by `rfind_raw` satisfies `self.start <= found < self.end`, and that
1094 // safety contract is forwarded to the caller.
1095 let found = rfind_raw(self.start, self.end)?;
1096 let result = found.distance(self.original_start);
1097 self.end = found;
1098 Some(result)
1099 }
1100
1101 /// Provides an implementation of `Iterator::size_hint`.
1102 #[inline(always)]
size_hint(&self) -> (usize, Option<usize>)1103 pub(crate) fn size_hint(&self) -> (usize, Option<usize>) {
1104 (0, Some(self.end.as_usize().saturating_sub(self.start.as_usize())))
1105 }
1106 }
1107
1108 /// Search a slice using a function that operates on raw pointers.
1109 ///
1110 /// Given a function to search a contiguous sequence of memory for the location
1111 /// of a non-empty set of bytes, this will execute that search on a slice of
1112 /// bytes. The pointer returned by the given function will be converted to an
1113 /// offset relative to the starting point of the given slice. That is, if a
1114 /// match is found, the offset returned by this routine is guaranteed to be a
1115 /// valid index into `haystack`.
1116 ///
1117 /// Callers may use this for a forward or reverse search.
1118 ///
1119 /// # Safety
1120 ///
1121 /// Callers must ensure that if a pointer is returned by `find_raw`, then the
1122 /// pointer must be greater than or equal to the starting pointer and less than
1123 /// the end pointer.
1124 #[inline(always)]
search_slice_with_raw( haystack: &[u8], mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>, ) -> Option<usize>1125 pub(crate) unsafe fn search_slice_with_raw(
1126 haystack: &[u8],
1127 mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>,
1128 ) -> Option<usize> {
1129 // SAFETY: We rely on `find_raw` to return a correct and valid pointer, but
1130 // otherwise, `start` and `end` are valid due to the guarantees provided by
1131 // a &[u8].
1132 let start = haystack.as_ptr();
1133 let end = start.add(haystack.len());
1134 let found = find_raw(start, end)?;
1135 Some(found.distance(start))
1136 }
1137
1138 /// Performs a forward byte-at-a-time loop until either `ptr >= end_ptr` or
1139 /// until `confirm(*ptr)` returns `true`. If the former occurs, then `None` is
1140 /// returned. If the latter occurs, then the pointer at which `confirm` returns
1141 /// `true` is returned.
1142 ///
1143 /// # Safety
1144 ///
1145 /// Callers must provide valid pointers and they must satisfy `start_ptr <=
1146 /// ptr` and `ptr <= end_ptr`.
1147 #[inline(always)]
fwd_byte_by_byte<F: Fn(u8) -> bool>( start: *const u8, end: *const u8, confirm: F, ) -> Option<*const u8>1148 pub(crate) unsafe fn fwd_byte_by_byte<F: Fn(u8) -> bool>(
1149 start: *const u8,
1150 end: *const u8,
1151 confirm: F,
1152 ) -> Option<*const u8> {
1153 debug_assert!(start <= end);
1154 let mut ptr = start;
1155 while ptr < end {
1156 if confirm(*ptr) {
1157 return Some(ptr);
1158 }
1159 ptr = ptr.offset(1);
1160 }
1161 None
1162 }
1163
1164 /// Performs a reverse byte-at-a-time loop until either `ptr < start_ptr` or
1165 /// until `confirm(*ptr)` returns `true`. If the former occurs, then `None` is
1166 /// returned. If the latter occurs, then the pointer at which `confirm` returns
1167 /// `true` is returned.
1168 ///
1169 /// # Safety
1170 ///
1171 /// Callers must provide valid pointers and they must satisfy `start_ptr <=
1172 /// ptr` and `ptr <= end_ptr`.
1173 #[inline(always)]
rev_byte_by_byte<F: Fn(u8) -> bool>( start: *const u8, end: *const u8, confirm: F, ) -> Option<*const u8>1174 pub(crate) unsafe fn rev_byte_by_byte<F: Fn(u8) -> bool>(
1175 start: *const u8,
1176 end: *const u8,
1177 confirm: F,
1178 ) -> Option<*const u8> {
1179 debug_assert!(start <= end);
1180
1181 let mut ptr = end;
1182 while ptr > start {
1183 ptr = ptr.offset(-1);
1184 if confirm(*ptr) {
1185 return Some(ptr);
1186 }
1187 }
1188 None
1189 }
1190
1191 /// Performs a forward byte-at-a-time loop until `ptr >= end_ptr` and returns
1192 /// the number of times `confirm(*ptr)` returns `true`.
1193 ///
1194 /// # Safety
1195 ///
1196 /// Callers must provide valid pointers and they must satisfy `start_ptr <=
1197 /// ptr` and `ptr <= end_ptr`.
1198 #[inline(always)]
count_byte_by_byte<F: Fn(u8) -> bool>( start: *const u8, end: *const u8, confirm: F, ) -> usize1199 pub(crate) unsafe fn count_byte_by_byte<F: Fn(u8) -> bool>(
1200 start: *const u8,
1201 end: *const u8,
1202 confirm: F,
1203 ) -> usize {
1204 debug_assert!(start <= end);
1205 let mut ptr = start;
1206 let mut count = 0;
1207 while ptr < end {
1208 if confirm(*ptr) {
1209 count += 1;
1210 }
1211 ptr = ptr.offset(1);
1212 }
1213 count
1214 }
1215