1 /*! 2 This module defines 256-bit vector implementations of `memchr` and friends. 3 4 The main types in this module are [`One`], [`Two`] and [`Three`]. They are for 5 searching for one, two or three distinct bytes, respectively, in a haystack. 6 Each type also has corresponding double ended iterators. These searchers are 7 typically much faster than scalar routines accomplishing the same task. 8 9 The `One` searcher also provides a [`One::count`] routine for efficiently 10 counting the number of times a single byte occurs in a haystack. This is 11 useful, for example, for counting the number of lines in a haystack. This 12 routine exists because it is usually faster, especially with a high match 13 count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its 14 `Iterator::count` implementation to use this routine.) 15 16 Only one, two and three bytes are supported because three bytes is about 17 the point where one sees diminishing returns. Beyond this point and it's 18 probably (but not necessarily) better to just use a simple `[bool; 256]` array 19 or similar. However, it depends mightily on the specific work-load and the 20 expected match frequency. 21 */ 22 23 use core::arch::x86_64::{__m128i, __m256i}; 24 25 use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector}; 26 27 /// Finds all occurrences of a single byte in a haystack. 28 #[derive(Clone, Copy, Debug)] 29 pub struct One { 30 /// Used for haystacks less than 32 bytes. 31 sse2: generic::One<__m128i>, 32 /// Used for haystacks bigger than 32 bytes. 33 avx2: generic::One<__m256i>, 34 } 35 36 impl One { 37 /// Create a new searcher that finds occurrences of the needle byte given. 38 /// 39 /// This particular searcher is specialized to use AVX2 vector instructions 40 /// that typically make it quite fast. (SSE2 is used for haystacks that 41 /// are too short to accommodate an AVX2 vector.) 42 /// 43 /// If either SSE2 or AVX2 is unavailable in the current environment, then 44 /// `None` is returned. 45 #[inline] new(needle: u8) -> Option<One>46 pub fn new(needle: u8) -> Option<One> { 47 if One::is_available() { 48 // SAFETY: we check that sse2 and avx2 are available above. 49 unsafe { Some(One::new_unchecked(needle)) } 50 } else { 51 None 52 } 53 } 54 55 /// Create a new finder specific to AVX2 vectors and routines without 56 /// checking that either SSE2 or AVX2 is available. 57 /// 58 /// # Safety 59 /// 60 /// Callers must guarantee that it is safe to execute both `sse2` and 61 /// `avx2` instructions in the current environment. 62 /// 63 /// Note that it is a common misconception that if one compiles for an 64 /// `x86_64` target, then they therefore automatically have access to SSE2 65 /// instructions. While this is almost always the case, it isn't true in 66 /// 100% of cases. 67 #[target_feature(enable = "sse2", enable = "avx2")] 68 #[inline] new_unchecked(needle: u8) -> One69 pub unsafe fn new_unchecked(needle: u8) -> One { 70 One { 71 sse2: generic::One::new(needle), 72 avx2: generic::One::new(needle), 73 } 74 } 75 76 /// Returns true when this implementation is available in the current 77 /// environment. 78 /// 79 /// When this is true, it is guaranteed that [`One::new`] will return 80 /// a `Some` value. Similarly, when it is false, it is guaranteed that 81 /// `One::new` will return a `None` value. 82 /// 83 /// Note also that for the lifetime of a single program, if this returns 84 /// true then it will always return true. 85 #[inline] is_available() -> bool86 pub fn is_available() -> bool { 87 #[cfg(not(target_feature = "sse2"))] 88 { 89 false 90 } 91 #[cfg(target_feature = "sse2")] 92 { 93 #[cfg(target_feature = "avx2")] 94 { 95 true 96 } 97 #[cfg(not(target_feature = "avx2"))] 98 { 99 #[cfg(feature = "std")] 100 { 101 std::is_x86_feature_detected!("avx2") 102 } 103 #[cfg(not(feature = "std"))] 104 { 105 false 106 } 107 } 108 } 109 } 110 111 /// Return the first occurrence of one of the needle bytes in the given 112 /// haystack. If no such occurrence exists, then `None` is returned. 113 /// 114 /// The occurrence is reported as an offset into `haystack`. Its maximum 115 /// value is `haystack.len() - 1`. 116 #[inline] find(&self, haystack: &[u8]) -> Option<usize>117 pub fn find(&self, haystack: &[u8]) -> Option<usize> { 118 // SAFETY: `find_raw` guarantees that if a pointer is returned, it 119 // falls within the bounds of the start and end pointers. 120 unsafe { 121 generic::search_slice_with_raw(haystack, |s, e| { 122 self.find_raw(s, e) 123 }) 124 } 125 } 126 127 /// Return the last occurrence of one of the needle bytes in the given 128 /// haystack. If no such occurrence exists, then `None` is returned. 129 /// 130 /// The occurrence is reported as an offset into `haystack`. Its maximum 131 /// value is `haystack.len() - 1`. 132 #[inline] rfind(&self, haystack: &[u8]) -> Option<usize>133 pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { 134 // SAFETY: `find_raw` guarantees that if a pointer is returned, it 135 // falls within the bounds of the start and end pointers. 136 unsafe { 137 generic::search_slice_with_raw(haystack, |s, e| { 138 self.rfind_raw(s, e) 139 }) 140 } 141 } 142 143 /// Counts all occurrences of this byte in the given haystack. 144 #[inline] count(&self, haystack: &[u8]) -> usize145 pub fn count(&self, haystack: &[u8]) -> usize { 146 // SAFETY: All of our pointers are derived directly from a borrowed 147 // slice, which is guaranteed to be valid. 148 unsafe { 149 let start = haystack.as_ptr(); 150 let end = start.add(haystack.len()); 151 self.count_raw(start, end) 152 } 153 } 154 155 /// Like `find`, but accepts and returns raw pointers. 156 /// 157 /// When a match is found, the pointer returned is guaranteed to be 158 /// `>= start` and `< end`. 159 /// 160 /// This routine is useful if you're already using raw pointers and would 161 /// like to avoid converting back to a slice before executing a search. 162 /// 163 /// # Safety 164 /// 165 /// * Both `start` and `end` must be valid for reads. 166 /// * Both `start` and `end` must point to an initialized value. 167 /// * Both `start` and `end` must point to the same allocated object and 168 /// must either be in bounds or at most one byte past the end of the 169 /// allocated object. 170 /// * Both `start` and `end` must be _derived from_ a pointer to the same 171 /// object. 172 /// * The distance between `start` and `end` must not overflow `isize`. 173 /// * The distance being in bounds must not rely on "wrapping around" the 174 /// address space. 175 /// 176 /// Note that callers may pass a pair of pointers such that `start >= end`. 177 /// In that case, `None` will always be returned. 178 #[inline] find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>179 pub unsafe fn find_raw( 180 &self, 181 start: *const u8, 182 end: *const u8, 183 ) -> Option<*const u8> { 184 if start >= end { 185 return None; 186 } 187 let len = end.distance(start); 188 if len < __m256i::BYTES { 189 return if len < __m128i::BYTES { 190 // SAFETY: We require the caller to pass valid start/end 191 // pointers. 192 generic::fwd_byte_by_byte(start, end, |b| { 193 b == self.sse2.needle1() 194 }) 195 } else { 196 // SAFETY: We require the caller to pass valid start/end 197 // pointers. 198 self.find_raw_sse2(start, end) 199 }; 200 } 201 // SAFETY: Building a `One` means it's safe to call both 'sse2' and 202 // 'avx2' routines. Also, we've checked that our haystack is big 203 // enough to run on the vector routine. Pointer validity is caller's 204 // responsibility. 205 // 206 // Note that we could call `self.avx2.find_raw` directly here. But that 207 // means we'd have to annotate this routine with `target_feature`. 208 // Which is fine, because this routine is `unsafe` anyway and the 209 // `target_feature` obligation is met by virtue of building a `One`. 210 // The real problem is that a routine with a `target_feature` 211 // annotation generally can't be inlined into caller code unless 212 // the caller code has the same target feature annotations. Namely, 213 // the common case (at time of writing) is for calling code to not 214 // have the `avx2` target feature enabled *at compile time*. Without 215 // `target_feature` on this routine, it can be inlined which will 216 // handle some of the short-haystack cases above without touching the 217 // architecture specific code. 218 self.find_raw_avx2(start, end) 219 } 220 221 /// Like `rfind`, but accepts and returns raw pointers. 222 /// 223 /// When a match is found, the pointer returned is guaranteed to be 224 /// `>= start` and `< end`. 225 /// 226 /// This routine is useful if you're already using raw pointers and would 227 /// like to avoid converting back to a slice before executing a search. 228 /// 229 /// # Safety 230 /// 231 /// * Both `start` and `end` must be valid for reads. 232 /// * Both `start` and `end` must point to an initialized value. 233 /// * Both `start` and `end` must point to the same allocated object and 234 /// must either be in bounds or at most one byte past the end of the 235 /// allocated object. 236 /// * Both `start` and `end` must be _derived from_ a pointer to the same 237 /// object. 238 /// * The distance between `start` and `end` must not overflow `isize`. 239 /// * The distance being in bounds must not rely on "wrapping around" the 240 /// address space. 241 /// 242 /// Note that callers may pass a pair of pointers such that `start >= end`. 243 /// In that case, `None` will always be returned. 244 #[inline] rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>245 pub unsafe fn rfind_raw( 246 &self, 247 start: *const u8, 248 end: *const u8, 249 ) -> Option<*const u8> { 250 if start >= end { 251 return None; 252 } 253 let len = end.distance(start); 254 if len < __m256i::BYTES { 255 return if len < __m128i::BYTES { 256 // SAFETY: We require the caller to pass valid start/end 257 // pointers. 258 generic::rev_byte_by_byte(start, end, |b| { 259 b == self.sse2.needle1() 260 }) 261 } else { 262 // SAFETY: We require the caller to pass valid start/end 263 // pointers. 264 self.rfind_raw_sse2(start, end) 265 }; 266 } 267 // SAFETY: Building a `One` means it's safe to call both 'sse2' and 268 // 'avx2' routines. Also, we've checked that our haystack is big 269 // enough to run on the vector routine. Pointer validity is caller's 270 // responsibility. 271 // 272 // See note in forward routine above for why we don't just call 273 // `self.avx2.rfind_raw` directly here. 274 self.rfind_raw_avx2(start, end) 275 } 276 277 /// Counts all occurrences of this byte in the given haystack represented 278 /// by raw pointers. 279 /// 280 /// This routine is useful if you're already using raw pointers and would 281 /// like to avoid converting back to a slice before executing a search. 282 /// 283 /// # Safety 284 /// 285 /// * Both `start` and `end` must be valid for reads. 286 /// * Both `start` and `end` must point to an initialized value. 287 /// * Both `start` and `end` must point to the same allocated object and 288 /// must either be in bounds or at most one byte past the end of the 289 /// allocated object. 290 /// * Both `start` and `end` must be _derived from_ a pointer to the same 291 /// object. 292 /// * The distance between `start` and `end` must not overflow `isize`. 293 /// * The distance being in bounds must not rely on "wrapping around" the 294 /// address space. 295 /// 296 /// Note that callers may pass a pair of pointers such that `start >= end`. 297 /// In that case, `0` will always be returned. 298 #[inline] count_raw(&self, start: *const u8, end: *const u8) -> usize299 pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { 300 if start >= end { 301 return 0; 302 } 303 let len = end.distance(start); 304 if len < __m256i::BYTES { 305 return if len < __m128i::BYTES { 306 // SAFETY: We require the caller to pass valid start/end 307 // pointers. 308 generic::count_byte_by_byte(start, end, |b| { 309 b == self.sse2.needle1() 310 }) 311 } else { 312 // SAFETY: We require the caller to pass valid start/end 313 // pointers. 314 self.count_raw_sse2(start, end) 315 }; 316 } 317 // SAFETY: Building a `One` means it's safe to call both 'sse2' and 318 // 'avx2' routines. Also, we've checked that our haystack is big 319 // enough to run on the vector routine. Pointer validity is caller's 320 // responsibility. 321 self.count_raw_avx2(start, end) 322 } 323 324 /// Execute a search using SSE2 vectors and routines. 325 /// 326 /// # Safety 327 /// 328 /// Same as [`One::find_raw`], except the distance between `start` and 329 /// `end` must be at least the size of an SSE2 vector (in bytes). 330 /// 331 /// (The target feature safety obligation is automatically fulfilled by 332 /// virtue of being a method on `One`, which can only be constructed 333 /// when it is safe to call `sse2`/`avx2` routines.) 334 #[target_feature(enable = "sse2")] 335 #[inline] find_raw_sse2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>336 unsafe fn find_raw_sse2( 337 &self, 338 start: *const u8, 339 end: *const u8, 340 ) -> Option<*const u8> { 341 self.sse2.find_raw(start, end) 342 } 343 344 /// Execute a search using SSE2 vectors and routines. 345 /// 346 /// # Safety 347 /// 348 /// Same as [`One::rfind_raw`], except the distance between `start` and 349 /// `end` must be at least the size of an SSE2 vector (in bytes). 350 /// 351 /// (The target feature safety obligation is automatically fulfilled by 352 /// virtue of being a method on `One`, which can only be constructed 353 /// when it is safe to call `sse2`/`avx2` routines.) 354 #[target_feature(enable = "sse2")] 355 #[inline] rfind_raw_sse2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>356 unsafe fn rfind_raw_sse2( 357 &self, 358 start: *const u8, 359 end: *const u8, 360 ) -> Option<*const u8> { 361 self.sse2.rfind_raw(start, end) 362 } 363 364 /// Execute a count using SSE2 vectors and routines. 365 /// 366 /// # Safety 367 /// 368 /// Same as [`One::count_raw`], except the distance between `start` and 369 /// `end` must be at least the size of an SSE2 vector (in bytes). 370 /// 371 /// (The target feature safety obligation is automatically fulfilled by 372 /// virtue of being a method on `One`, which can only be constructed 373 /// when it is safe to call `sse2`/`avx2` routines.) 374 #[target_feature(enable = "sse2")] 375 #[inline] count_raw_sse2( &self, start: *const u8, end: *const u8, ) -> usize376 unsafe fn count_raw_sse2( 377 &self, 378 start: *const u8, 379 end: *const u8, 380 ) -> usize { 381 self.sse2.count_raw(start, end) 382 } 383 384 /// Execute a search using AVX2 vectors and routines. 385 /// 386 /// # Safety 387 /// 388 /// Same as [`One::find_raw`], except the distance between `start` and 389 /// `end` must be at least the size of an AVX2 vector (in bytes). 390 /// 391 /// (The target feature safety obligation is automatically fulfilled by 392 /// virtue of being a method on `One`, which can only be constructed 393 /// when it is safe to call `sse2`/`avx2` routines.) 394 #[target_feature(enable = "avx2")] 395 #[inline] find_raw_avx2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>396 unsafe fn find_raw_avx2( 397 &self, 398 start: *const u8, 399 end: *const u8, 400 ) -> Option<*const u8> { 401 self.avx2.find_raw(start, end) 402 } 403 404 /// Execute a search using AVX2 vectors and routines. 405 /// 406 /// # Safety 407 /// 408 /// Same as [`One::rfind_raw`], except the distance between `start` and 409 /// `end` must be at least the size of an AVX2 vector (in bytes). 410 /// 411 /// (The target feature safety obligation is automatically fulfilled by 412 /// virtue of being a method on `One`, which can only be constructed 413 /// when it is safe to call `sse2`/`avx2` routines.) 414 #[target_feature(enable = "avx2")] 415 #[inline] rfind_raw_avx2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>416 unsafe fn rfind_raw_avx2( 417 &self, 418 start: *const u8, 419 end: *const u8, 420 ) -> Option<*const u8> { 421 self.avx2.rfind_raw(start, end) 422 } 423 424 /// Execute a count using AVX2 vectors and routines. 425 /// 426 /// # Safety 427 /// 428 /// Same as [`One::count_raw`], except the distance between `start` and 429 /// `end` must be at least the size of an AVX2 vector (in bytes). 430 /// 431 /// (The target feature safety obligation is automatically fulfilled by 432 /// virtue of being a method on `One`, which can only be constructed 433 /// when it is safe to call `sse2`/`avx2` routines.) 434 #[target_feature(enable = "avx2")] 435 #[inline] count_raw_avx2( &self, start: *const u8, end: *const u8, ) -> usize436 unsafe fn count_raw_avx2( 437 &self, 438 start: *const u8, 439 end: *const u8, 440 ) -> usize { 441 self.avx2.count_raw(start, end) 442 } 443 444 /// Returns an iterator over all occurrences of the needle byte in the 445 /// given haystack. 446 /// 447 /// The iterator returned implements `DoubleEndedIterator`. This means it 448 /// can also be used to find occurrences in reverse order. 449 #[inline] iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h>450 pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { 451 OneIter { searcher: self, it: generic::Iter::new(haystack) } 452 } 453 } 454 455 /// An iterator over all occurrences of a single byte in a haystack. 456 /// 457 /// This iterator implements `DoubleEndedIterator`, which means it can also be 458 /// used to find occurrences in reverse order. 459 /// 460 /// This iterator is created by the [`One::iter`] method. 461 /// 462 /// The lifetime parameters are as follows: 463 /// 464 /// * `'a` refers to the lifetime of the underlying [`One`] searcher. 465 /// * `'h` refers to the lifetime of the haystack being searched. 466 #[derive(Clone, Debug)] 467 pub struct OneIter<'a, 'h> { 468 searcher: &'a One, 469 it: generic::Iter<'h>, 470 } 471 472 impl<'a, 'h> Iterator for OneIter<'a, 'h> { 473 type Item = usize; 474 475 #[inline] next(&mut self) -> Option<usize>476 fn next(&mut self) -> Option<usize> { 477 // SAFETY: We rely on the generic iterator to provide valid start 478 // and end pointers, but we guarantee that any pointer returned by 479 // 'find_raw' falls within the bounds of the start and end pointer. 480 unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } 481 } 482 483 #[inline] count(self) -> usize484 fn count(self) -> usize { 485 self.it.count(|s, e| { 486 // SAFETY: We rely on our generic iterator to return valid start 487 // and end pointers. 488 unsafe { self.searcher.count_raw(s, e) } 489 }) 490 } 491 492 #[inline] size_hint(&self) -> (usize, Option<usize>)493 fn size_hint(&self) -> (usize, Option<usize>) { 494 self.it.size_hint() 495 } 496 } 497 498 impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { 499 #[inline] next_back(&mut self) -> Option<usize>500 fn next_back(&mut self) -> Option<usize> { 501 // SAFETY: We rely on the generic iterator to provide valid start 502 // and end pointers, but we guarantee that any pointer returned by 503 // 'rfind_raw' falls within the bounds of the start and end pointer. 504 unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } 505 } 506 } 507 508 impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {} 509 510 /// Finds all occurrences of two bytes in a haystack. 511 /// 512 /// That is, this reports matches of one of two possible bytes. For example, 513 /// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, 514 /// `4` and `5`. 515 #[derive(Clone, Copy, Debug)] 516 pub struct Two { 517 /// Used for haystacks less than 32 bytes. 518 sse2: generic::Two<__m128i>, 519 /// Used for haystacks bigger than 32 bytes. 520 avx2: generic::Two<__m256i>, 521 } 522 523 impl Two { 524 /// Create a new searcher that finds occurrences of the needle bytes given. 525 /// 526 /// This particular searcher is specialized to use AVX2 vector instructions 527 /// that typically make it quite fast. (SSE2 is used for haystacks that 528 /// are too short to accommodate an AVX2 vector.) 529 /// 530 /// If either SSE2 or AVX2 is unavailable in the current environment, then 531 /// `None` is returned. 532 #[inline] new(needle1: u8, needle2: u8) -> Option<Two>533 pub fn new(needle1: u8, needle2: u8) -> Option<Two> { 534 if Two::is_available() { 535 // SAFETY: we check that sse2 and avx2 are available above. 536 unsafe { Some(Two::new_unchecked(needle1, needle2)) } 537 } else { 538 None 539 } 540 } 541 542 /// Create a new finder specific to AVX2 vectors and routines without 543 /// checking that either SSE2 or AVX2 is available. 544 /// 545 /// # Safety 546 /// 547 /// Callers must guarantee that it is safe to execute both `sse2` and 548 /// `avx2` instructions in the current environment. 549 /// 550 /// Note that it is a common misconception that if one compiles for an 551 /// `x86_64` target, then they therefore automatically have access to SSE2 552 /// instructions. While this is almost always the case, it isn't true in 553 /// 100% of cases. 554 #[target_feature(enable = "sse2", enable = "avx2")] 555 #[inline] new_unchecked(needle1: u8, needle2: u8) -> Two556 pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two { 557 Two { 558 sse2: generic::Two::new(needle1, needle2), 559 avx2: generic::Two::new(needle1, needle2), 560 } 561 } 562 563 /// Returns true when this implementation is available in the current 564 /// environment. 565 /// 566 /// When this is true, it is guaranteed that [`Two::new`] will return 567 /// a `Some` value. Similarly, when it is false, it is guaranteed that 568 /// `Two::new` will return a `None` value. 569 /// 570 /// Note also that for the lifetime of a single program, if this returns 571 /// true then it will always return true. 572 #[inline] is_available() -> bool573 pub fn is_available() -> bool { 574 #[cfg(not(target_feature = "sse2"))] 575 { 576 false 577 } 578 #[cfg(target_feature = "sse2")] 579 { 580 #[cfg(target_feature = "avx2")] 581 { 582 true 583 } 584 #[cfg(not(target_feature = "avx2"))] 585 { 586 #[cfg(feature = "std")] 587 { 588 std::is_x86_feature_detected!("avx2") 589 } 590 #[cfg(not(feature = "std"))] 591 { 592 false 593 } 594 } 595 } 596 } 597 598 /// Return the first occurrence of one of the needle bytes in the given 599 /// haystack. If no such occurrence exists, then `None` is returned. 600 /// 601 /// The occurrence is reported as an offset into `haystack`. Its maximum 602 /// value is `haystack.len() - 1`. 603 #[inline] find(&self, haystack: &[u8]) -> Option<usize>604 pub fn find(&self, haystack: &[u8]) -> Option<usize> { 605 // SAFETY: `find_raw` guarantees that if a pointer is returned, it 606 // falls within the bounds of the start and end pointers. 607 unsafe { 608 generic::search_slice_with_raw(haystack, |s, e| { 609 self.find_raw(s, e) 610 }) 611 } 612 } 613 614 /// Return the last occurrence of one of the needle bytes in the given 615 /// haystack. If no such occurrence exists, then `None` is returned. 616 /// 617 /// The occurrence is reported as an offset into `haystack`. Its maximum 618 /// value is `haystack.len() - 1`. 619 #[inline] rfind(&self, haystack: &[u8]) -> Option<usize>620 pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { 621 // SAFETY: `find_raw` guarantees that if a pointer is returned, it 622 // falls within the bounds of the start and end pointers. 623 unsafe { 624 generic::search_slice_with_raw(haystack, |s, e| { 625 self.rfind_raw(s, e) 626 }) 627 } 628 } 629 630 /// Like `find`, but accepts and returns raw pointers. 631 /// 632 /// When a match is found, the pointer returned is guaranteed to be 633 /// `>= start` and `< end`. 634 /// 635 /// This routine is useful if you're already using raw pointers and would 636 /// like to avoid converting back to a slice before executing a search. 637 /// 638 /// # Safety 639 /// 640 /// * Both `start` and `end` must be valid for reads. 641 /// * Both `start` and `end` must point to an initialized value. 642 /// * Both `start` and `end` must point to the same allocated object and 643 /// must either be in bounds or at most one byte past the end of the 644 /// allocated object. 645 /// * Both `start` and `end` must be _derived from_ a pointer to the same 646 /// object. 647 /// * The distance between `start` and `end` must not overflow `isize`. 648 /// * The distance being in bounds must not rely on "wrapping around" the 649 /// address space. 650 /// 651 /// Note that callers may pass a pair of pointers such that `start >= end`. 652 /// In that case, `None` will always be returned. 653 #[inline] find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>654 pub unsafe fn find_raw( 655 &self, 656 start: *const u8, 657 end: *const u8, 658 ) -> Option<*const u8> { 659 if start >= end { 660 return None; 661 } 662 let len = end.distance(start); 663 if len < __m256i::BYTES { 664 return if len < __m128i::BYTES { 665 // SAFETY: We require the caller to pass valid start/end 666 // pointers. 667 generic::fwd_byte_by_byte(start, end, |b| { 668 b == self.sse2.needle1() || b == self.sse2.needle2() 669 }) 670 } else { 671 // SAFETY: We require the caller to pass valid start/end 672 // pointers. 673 self.find_raw_sse2(start, end) 674 }; 675 } 676 // SAFETY: Building a `Two` means it's safe to call both 'sse2' and 677 // 'avx2' routines. Also, we've checked that our haystack is big 678 // enough to run on the vector routine. Pointer validity is caller's 679 // responsibility. 680 // 681 // Note that we could call `self.avx2.find_raw` directly here. But that 682 // means we'd have to annotate this routine with `target_feature`. 683 // Which is fine, because this routine is `unsafe` anyway and the 684 // `target_feature` obligation is met by virtue of building a `Two`. 685 // The real problem is that a routine with a `target_feature` 686 // annotation generally can't be inlined into caller code unless 687 // the caller code has the same target feature annotations. Namely, 688 // the common case (at time of writing) is for calling code to not 689 // have the `avx2` target feature enabled *at compile time*. Without 690 // `target_feature` on this routine, it can be inlined which will 691 // handle some of the short-haystack cases above without touching the 692 // architecture specific code. 693 self.find_raw_avx2(start, end) 694 } 695 696 /// Like `rfind`, but accepts and returns raw pointers. 697 /// 698 /// When a match is found, the pointer returned is guaranteed to be 699 /// `>= start` and `< end`. 700 /// 701 /// This routine is useful if you're already using raw pointers and would 702 /// like to avoid converting back to a slice before executing a search. 703 /// 704 /// # Safety 705 /// 706 /// * Both `start` and `end` must be valid for reads. 707 /// * Both `start` and `end` must point to an initialized value. 708 /// * Both `start` and `end` must point to the same allocated object and 709 /// must either be in bounds or at most one byte past the end of the 710 /// allocated object. 711 /// * Both `start` and `end` must be _derived from_ a pointer to the same 712 /// object. 713 /// * The distance between `start` and `end` must not overflow `isize`. 714 /// * The distance being in bounds must not rely on "wrapping around" the 715 /// address space. 716 /// 717 /// Note that callers may pass a pair of pointers such that `start >= end`. 718 /// In that case, `None` will always be returned. 719 #[inline] rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>720 pub unsafe fn rfind_raw( 721 &self, 722 start: *const u8, 723 end: *const u8, 724 ) -> Option<*const u8> { 725 if start >= end { 726 return None; 727 } 728 let len = end.distance(start); 729 if len < __m256i::BYTES { 730 return if len < __m128i::BYTES { 731 // SAFETY: We require the caller to pass valid start/end 732 // pointers. 733 generic::rev_byte_by_byte(start, end, |b| { 734 b == self.sse2.needle1() || b == self.sse2.needle2() 735 }) 736 } else { 737 // SAFETY: We require the caller to pass valid start/end 738 // pointers. 739 self.rfind_raw_sse2(start, end) 740 }; 741 } 742 // SAFETY: Building a `Two` means it's safe to call both 'sse2' and 743 // 'avx2' routines. Also, we've checked that our haystack is big 744 // enough to run on the vector routine. Pointer validity is caller's 745 // responsibility. 746 // 747 // See note in forward routine above for why we don't just call 748 // `self.avx2.rfind_raw` directly here. 749 self.rfind_raw_avx2(start, end) 750 } 751 752 /// Execute a search using SSE2 vectors and routines. 753 /// 754 /// # Safety 755 /// 756 /// Same as [`Two::find_raw`], except the distance between `start` and 757 /// `end` must be at least the size of an SSE2 vector (in bytes). 758 /// 759 /// (The target feature safety obligation is automatically fulfilled by 760 /// virtue of being a method on `Two`, which can only be constructed 761 /// when it is safe to call `sse2`/`avx2` routines.) 762 #[target_feature(enable = "sse2")] 763 #[inline] find_raw_sse2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>764 unsafe fn find_raw_sse2( 765 &self, 766 start: *const u8, 767 end: *const u8, 768 ) -> Option<*const u8> { 769 self.sse2.find_raw(start, end) 770 } 771 772 /// Execute a search using SSE2 vectors and routines. 773 /// 774 /// # Safety 775 /// 776 /// Same as [`Two::rfind_raw`], except the distance between `start` and 777 /// `end` must be at least the size of an SSE2 vector (in bytes). 778 /// 779 /// (The target feature safety obligation is automatically fulfilled by 780 /// virtue of being a method on `Two`, which can only be constructed 781 /// when it is safe to call `sse2`/`avx2` routines.) 782 #[target_feature(enable = "sse2")] 783 #[inline] rfind_raw_sse2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>784 unsafe fn rfind_raw_sse2( 785 &self, 786 start: *const u8, 787 end: *const u8, 788 ) -> Option<*const u8> { 789 self.sse2.rfind_raw(start, end) 790 } 791 792 /// Execute a search using AVX2 vectors and routines. 793 /// 794 /// # Safety 795 /// 796 /// Same as [`Two::find_raw`], except the distance between `start` and 797 /// `end` must be at least the size of an AVX2 vector (in bytes). 798 /// 799 /// (The target feature safety obligation is automatically fulfilled by 800 /// virtue of being a method on `Two`, which can only be constructed 801 /// when it is safe to call `sse2`/`avx2` routines.) 802 #[target_feature(enable = "avx2")] 803 #[inline] find_raw_avx2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>804 unsafe fn find_raw_avx2( 805 &self, 806 start: *const u8, 807 end: *const u8, 808 ) -> Option<*const u8> { 809 self.avx2.find_raw(start, end) 810 } 811 812 /// Execute a search using AVX2 vectors and routines. 813 /// 814 /// # Safety 815 /// 816 /// Same as [`Two::rfind_raw`], except the distance between `start` and 817 /// `end` must be at least the size of an AVX2 vector (in bytes). 818 /// 819 /// (The target feature safety obligation is automatically fulfilled by 820 /// virtue of being a method on `Two`, which can only be constructed 821 /// when it is safe to call `sse2`/`avx2` routines.) 822 #[target_feature(enable = "avx2")] 823 #[inline] rfind_raw_avx2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>824 unsafe fn rfind_raw_avx2( 825 &self, 826 start: *const u8, 827 end: *const u8, 828 ) -> Option<*const u8> { 829 self.avx2.rfind_raw(start, end) 830 } 831 832 /// Returns an iterator over all occurrences of the needle bytes in the 833 /// given haystack. 834 /// 835 /// The iterator returned implements `DoubleEndedIterator`. This means it 836 /// can also be used to find occurrences in reverse order. 837 #[inline] iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h>838 pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { 839 TwoIter { searcher: self, it: generic::Iter::new(haystack) } 840 } 841 } 842 843 /// An iterator over all occurrences of two possible bytes in a haystack. 844 /// 845 /// This iterator implements `DoubleEndedIterator`, which means it can also be 846 /// used to find occurrences in reverse order. 847 /// 848 /// This iterator is created by the [`Two::iter`] method. 849 /// 850 /// The lifetime parameters are as follows: 851 /// 852 /// * `'a` refers to the lifetime of the underlying [`Two`] searcher. 853 /// * `'h` refers to the lifetime of the haystack being searched. 854 #[derive(Clone, Debug)] 855 pub struct TwoIter<'a, 'h> { 856 searcher: &'a Two, 857 it: generic::Iter<'h>, 858 } 859 860 impl<'a, 'h> Iterator for TwoIter<'a, 'h> { 861 type Item = usize; 862 863 #[inline] next(&mut self) -> Option<usize>864 fn next(&mut self) -> Option<usize> { 865 // SAFETY: We rely on the generic iterator to provide valid start 866 // and end pointers, but we guarantee that any pointer returned by 867 // 'find_raw' falls within the bounds of the start and end pointer. 868 unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } 869 } 870 871 #[inline] size_hint(&self) -> (usize, Option<usize>)872 fn size_hint(&self) -> (usize, Option<usize>) { 873 self.it.size_hint() 874 } 875 } 876 877 impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { 878 #[inline] next_back(&mut self) -> Option<usize>879 fn next_back(&mut self) -> Option<usize> { 880 // SAFETY: We rely on the generic iterator to provide valid start 881 // and end pointers, but we guarantee that any pointer returned by 882 // 'rfind_raw' falls within the bounds of the start and end pointer. 883 unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } 884 } 885 } 886 887 impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {} 888 889 /// Finds all occurrences of three bytes in a haystack. 890 /// 891 /// That is, this reports matches of one of three possible bytes. For example, 892 /// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets 893 /// `0`, `2`, `3`, `4` and `5`. 894 #[derive(Clone, Copy, Debug)] 895 pub struct Three { 896 /// Used for haystacks less than 32 bytes. 897 sse2: generic::Three<__m128i>, 898 /// Used for haystacks bigger than 32 bytes. 899 avx2: generic::Three<__m256i>, 900 } 901 902 impl Three { 903 /// Create a new searcher that finds occurrences of the needle bytes given. 904 /// 905 /// This particular searcher is specialized to use AVX2 vector instructions 906 /// that typically make it quite fast. (SSE2 is used for haystacks that 907 /// are too short to accommodate an AVX2 vector.) 908 /// 909 /// If either SSE2 or AVX2 is unavailable in the current environment, then 910 /// `None` is returned. 911 #[inline] new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three>912 pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three> { 913 if Three::is_available() { 914 // SAFETY: we check that sse2 and avx2 are available above. 915 unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) } 916 } else { 917 None 918 } 919 } 920 921 /// Create a new finder specific to AVX2 vectors and routines without 922 /// checking that either SSE2 or AVX2 is available. 923 /// 924 /// # Safety 925 /// 926 /// Callers must guarantee that it is safe to execute both `sse2` and 927 /// `avx2` instructions in the current environment. 928 /// 929 /// Note that it is a common misconception that if one compiles for an 930 /// `x86_64` target, then they therefore automatically have access to SSE2 931 /// instructions. While this is almost always the case, it isn't true in 932 /// 100% of cases. 933 #[target_feature(enable = "sse2", enable = "avx2")] 934 #[inline] new_unchecked( needle1: u8, needle2: u8, needle3: u8, ) -> Three935 pub unsafe fn new_unchecked( 936 needle1: u8, 937 needle2: u8, 938 needle3: u8, 939 ) -> Three { 940 Three { 941 sse2: generic::Three::new(needle1, needle2, needle3), 942 avx2: generic::Three::new(needle1, needle2, needle3), 943 } 944 } 945 946 /// Returns true when this implementation is available in the current 947 /// environment. 948 /// 949 /// When this is true, it is guaranteed that [`Three::new`] will return 950 /// a `Some` value. Similarly, when it is false, it is guaranteed that 951 /// `Three::new` will return a `None` value. 952 /// 953 /// Note also that for the lifetime of a single program, if this returns 954 /// true then it will always return true. 955 #[inline] is_available() -> bool956 pub fn is_available() -> bool { 957 #[cfg(not(target_feature = "sse2"))] 958 { 959 false 960 } 961 #[cfg(target_feature = "sse2")] 962 { 963 #[cfg(target_feature = "avx2")] 964 { 965 true 966 } 967 #[cfg(not(target_feature = "avx2"))] 968 { 969 #[cfg(feature = "std")] 970 { 971 std::is_x86_feature_detected!("avx2") 972 } 973 #[cfg(not(feature = "std"))] 974 { 975 false 976 } 977 } 978 } 979 } 980 981 /// Return the first occurrence of one of the needle bytes in the given 982 /// haystack. If no such occurrence exists, then `None` is returned. 983 /// 984 /// The occurrence is reported as an offset into `haystack`. Its maximum 985 /// value is `haystack.len() - 1`. 986 #[inline] find(&self, haystack: &[u8]) -> Option<usize>987 pub fn find(&self, haystack: &[u8]) -> Option<usize> { 988 // SAFETY: `find_raw` guarantees that if a pointer is returned, it 989 // falls within the bounds of the start and end pointers. 990 unsafe { 991 generic::search_slice_with_raw(haystack, |s, e| { 992 self.find_raw(s, e) 993 }) 994 } 995 } 996 997 /// Return the last occurrence of one of the needle bytes in the given 998 /// haystack. If no such occurrence exists, then `None` is returned. 999 /// 1000 /// The occurrence is reported as an offset into `haystack`. Its maximum 1001 /// value is `haystack.len() - 1`. 1002 #[inline] rfind(&self, haystack: &[u8]) -> Option<usize>1003 pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { 1004 // SAFETY: `find_raw` guarantees that if a pointer is returned, it 1005 // falls within the bounds of the start and end pointers. 1006 unsafe { 1007 generic::search_slice_with_raw(haystack, |s, e| { 1008 self.rfind_raw(s, e) 1009 }) 1010 } 1011 } 1012 1013 /// Like `find`, but accepts and returns raw pointers. 1014 /// 1015 /// When a match is found, the pointer returned is guaranteed to be 1016 /// `>= start` and `< end`. 1017 /// 1018 /// This routine is useful if you're already using raw pointers and would 1019 /// like to avoid converting back to a slice before executing a search. 1020 /// 1021 /// # Safety 1022 /// 1023 /// * Both `start` and `end` must be valid for reads. 1024 /// * Both `start` and `end` must point to an initialized value. 1025 /// * Both `start` and `end` must point to the same allocated object and 1026 /// must either be in bounds or at most one byte past the end of the 1027 /// allocated object. 1028 /// * Both `start` and `end` must be _derived from_ a pointer to the same 1029 /// object. 1030 /// * The distance between `start` and `end` must not overflow `isize`. 1031 /// * The distance being in bounds must not rely on "wrapping around" the 1032 /// address space. 1033 /// 1034 /// Note that callers may pass a pair of pointers such that `start >= end`. 1035 /// In that case, `None` will always be returned. 1036 #[inline] find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>1037 pub unsafe fn find_raw( 1038 &self, 1039 start: *const u8, 1040 end: *const u8, 1041 ) -> Option<*const u8> { 1042 if start >= end { 1043 return None; 1044 } 1045 let len = end.distance(start); 1046 if len < __m256i::BYTES { 1047 return if len < __m128i::BYTES { 1048 // SAFETY: We require the caller to pass valid start/end 1049 // pointers. 1050 generic::fwd_byte_by_byte(start, end, |b| { 1051 b == self.sse2.needle1() 1052 || b == self.sse2.needle2() 1053 || b == self.sse2.needle3() 1054 }) 1055 } else { 1056 // SAFETY: We require the caller to pass valid start/end 1057 // pointers. 1058 self.find_raw_sse2(start, end) 1059 }; 1060 } 1061 // SAFETY: Building a `Three` means it's safe to call both 'sse2' and 1062 // 'avx2' routines. Also, we've checked that our haystack is big 1063 // enough to run on the vector routine. Pointer validity is caller's 1064 // responsibility. 1065 // 1066 // Note that we could call `self.avx2.find_raw` directly here. But that 1067 // means we'd have to annotate this routine with `target_feature`. 1068 // Which is fine, because this routine is `unsafe` anyway and the 1069 // `target_feature` obligation is met by virtue of building a `Three`. 1070 // The real problem is that a routine with a `target_feature` 1071 // annotation generally can't be inlined into caller code unless 1072 // the caller code has the same target feature annotations. Namely, 1073 // the common case (at time of writing) is for calling code to not 1074 // have the `avx2` target feature enabled *at compile time*. Without 1075 // `target_feature` on this routine, it can be inlined which will 1076 // handle some of the short-haystack cases above without touching the 1077 // architecture specific code. 1078 self.find_raw_avx2(start, end) 1079 } 1080 1081 /// Like `rfind`, but accepts and returns raw pointers. 1082 /// 1083 /// When a match is found, the pointer returned is guaranteed to be 1084 /// `>= start` and `< end`. 1085 /// 1086 /// This routine is useful if you're already using raw pointers and would 1087 /// like to avoid converting back to a slice before executing a search. 1088 /// 1089 /// # Safety 1090 /// 1091 /// * Both `start` and `end` must be valid for reads. 1092 /// * Both `start` and `end` must point to an initialized value. 1093 /// * Both `start` and `end` must point to the same allocated object and 1094 /// must either be in bounds or at most one byte past the end of the 1095 /// allocated object. 1096 /// * Both `start` and `end` must be _derived from_ a pointer to the same 1097 /// object. 1098 /// * The distance between `start` and `end` must not overflow `isize`. 1099 /// * The distance being in bounds must not rely on "wrapping around" the 1100 /// address space. 1101 /// 1102 /// Note that callers may pass a pair of pointers such that `start >= end`. 1103 /// In that case, `None` will always be returned. 1104 #[inline] rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>1105 pub unsafe fn rfind_raw( 1106 &self, 1107 start: *const u8, 1108 end: *const u8, 1109 ) -> Option<*const u8> { 1110 if start >= end { 1111 return None; 1112 } 1113 let len = end.distance(start); 1114 if len < __m256i::BYTES { 1115 return if len < __m128i::BYTES { 1116 // SAFETY: We require the caller to pass valid start/end 1117 // pointers. 1118 generic::rev_byte_by_byte(start, end, |b| { 1119 b == self.sse2.needle1() 1120 || b == self.sse2.needle2() 1121 || b == self.sse2.needle3() 1122 }) 1123 } else { 1124 // SAFETY: We require the caller to pass valid start/end 1125 // pointers. 1126 self.rfind_raw_sse2(start, end) 1127 }; 1128 } 1129 // SAFETY: Building a `Three` means it's safe to call both 'sse2' and 1130 // 'avx2' routines. Also, we've checked that our haystack is big 1131 // enough to run on the vector routine. Pointer validity is caller's 1132 // responsibility. 1133 // 1134 // See note in forward routine above for why we don't just call 1135 // `self.avx2.rfind_raw` directly here. 1136 self.rfind_raw_avx2(start, end) 1137 } 1138 1139 /// Execute a search using SSE2 vectors and routines. 1140 /// 1141 /// # Safety 1142 /// 1143 /// Same as [`Three::find_raw`], except the distance between `start` and 1144 /// `end` must be at least the size of an SSE2 vector (in bytes). 1145 /// 1146 /// (The target feature safety obligation is automatically fulfilled by 1147 /// virtue of being a method on `Three`, which can only be constructed 1148 /// when it is safe to call `sse2`/`avx2` routines.) 1149 #[target_feature(enable = "sse2")] 1150 #[inline] find_raw_sse2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>1151 unsafe fn find_raw_sse2( 1152 &self, 1153 start: *const u8, 1154 end: *const u8, 1155 ) -> Option<*const u8> { 1156 self.sse2.find_raw(start, end) 1157 } 1158 1159 /// Execute a search using SSE2 vectors and routines. 1160 /// 1161 /// # Safety 1162 /// 1163 /// Same as [`Three::rfind_raw`], except the distance between `start` and 1164 /// `end` must be at least the size of an SSE2 vector (in bytes). 1165 /// 1166 /// (The target feature safety obligation is automatically fulfilled by 1167 /// virtue of being a method on `Three`, which can only be constructed 1168 /// when it is safe to call `sse2`/`avx2` routines.) 1169 #[target_feature(enable = "sse2")] 1170 #[inline] rfind_raw_sse2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>1171 unsafe fn rfind_raw_sse2( 1172 &self, 1173 start: *const u8, 1174 end: *const u8, 1175 ) -> Option<*const u8> { 1176 self.sse2.rfind_raw(start, end) 1177 } 1178 1179 /// Execute a search using AVX2 vectors and routines. 1180 /// 1181 /// # Safety 1182 /// 1183 /// Same as [`Three::find_raw`], except the distance between `start` and 1184 /// `end` must be at least the size of an AVX2 vector (in bytes). 1185 /// 1186 /// (The target feature safety obligation is automatically fulfilled by 1187 /// virtue of being a method on `Three`, which can only be constructed 1188 /// when it is safe to call `sse2`/`avx2` routines.) 1189 #[target_feature(enable = "avx2")] 1190 #[inline] find_raw_avx2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>1191 unsafe fn find_raw_avx2( 1192 &self, 1193 start: *const u8, 1194 end: *const u8, 1195 ) -> Option<*const u8> { 1196 self.avx2.find_raw(start, end) 1197 } 1198 1199 /// Execute a search using AVX2 vectors and routines. 1200 /// 1201 /// # Safety 1202 /// 1203 /// Same as [`Three::rfind_raw`], except the distance between `start` and 1204 /// `end` must be at least the size of an AVX2 vector (in bytes). 1205 /// 1206 /// (The target feature safety obligation is automatically fulfilled by 1207 /// virtue of being a method on `Three`, which can only be constructed 1208 /// when it is safe to call `sse2`/`avx2` routines.) 1209 #[target_feature(enable = "avx2")] 1210 #[inline] rfind_raw_avx2( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>1211 unsafe fn rfind_raw_avx2( 1212 &self, 1213 start: *const u8, 1214 end: *const u8, 1215 ) -> Option<*const u8> { 1216 self.avx2.rfind_raw(start, end) 1217 } 1218 1219 /// Returns an iterator over all occurrences of the needle bytes in the 1220 /// given haystack. 1221 /// 1222 /// The iterator returned implements `DoubleEndedIterator`. This means it 1223 /// can also be used to find occurrences in reverse order. 1224 #[inline] iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h>1225 pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { 1226 ThreeIter { searcher: self, it: generic::Iter::new(haystack) } 1227 } 1228 } 1229 1230 /// An iterator over all occurrences of three possible bytes in a haystack. 1231 /// 1232 /// This iterator implements `DoubleEndedIterator`, which means it can also be 1233 /// used to find occurrences in reverse order. 1234 /// 1235 /// This iterator is created by the [`Three::iter`] method. 1236 /// 1237 /// The lifetime parameters are as follows: 1238 /// 1239 /// * `'a` refers to the lifetime of the underlying [`Three`] searcher. 1240 /// * `'h` refers to the lifetime of the haystack being searched. 1241 #[derive(Clone, Debug)] 1242 pub struct ThreeIter<'a, 'h> { 1243 searcher: &'a Three, 1244 it: generic::Iter<'h>, 1245 } 1246 1247 impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { 1248 type Item = usize; 1249 1250 #[inline] next(&mut self) -> Option<usize>1251 fn next(&mut self) -> Option<usize> { 1252 // SAFETY: We rely on the generic iterator to provide valid start 1253 // and end pointers, but we guarantee that any pointer returned by 1254 // 'find_raw' falls within the bounds of the start and end pointer. 1255 unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } 1256 } 1257 1258 #[inline] size_hint(&self) -> (usize, Option<usize>)1259 fn size_hint(&self) -> (usize, Option<usize>) { 1260 self.it.size_hint() 1261 } 1262 } 1263 1264 impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { 1265 #[inline] next_back(&mut self) -> Option<usize>1266 fn next_back(&mut self) -> Option<usize> { 1267 // SAFETY: We rely on the generic iterator to provide valid start 1268 // and end pointers, but we guarantee that any pointer returned by 1269 // 'rfind_raw' falls within the bounds of the start and end pointer. 1270 unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } 1271 } 1272 } 1273 1274 impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {} 1275 1276 #[cfg(test)] 1277 mod tests { 1278 use super::*; 1279 1280 define_memchr_quickcheck!(super); 1281 1282 #[test] forward_one()1283 fn forward_one() { 1284 crate::tests::memchr::Runner::new(1).forward_iter( 1285 |haystack, needles| { 1286 Some(One::new(needles[0])?.iter(haystack).collect()) 1287 }, 1288 ) 1289 } 1290 1291 #[test] reverse_one()1292 fn reverse_one() { 1293 crate::tests::memchr::Runner::new(1).reverse_iter( 1294 |haystack, needles| { 1295 Some(One::new(needles[0])?.iter(haystack).rev().collect()) 1296 }, 1297 ) 1298 } 1299 1300 #[test] count_one()1301 fn count_one() { 1302 crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { 1303 Some(One::new(needles[0])?.iter(haystack).count()) 1304 }) 1305 } 1306 1307 #[test] forward_two()1308 fn forward_two() { 1309 crate::tests::memchr::Runner::new(2).forward_iter( 1310 |haystack, needles| { 1311 let n1 = needles.get(0).copied()?; 1312 let n2 = needles.get(1).copied()?; 1313 Some(Two::new(n1, n2)?.iter(haystack).collect()) 1314 }, 1315 ) 1316 } 1317 1318 #[test] reverse_two()1319 fn reverse_two() { 1320 crate::tests::memchr::Runner::new(2).reverse_iter( 1321 |haystack, needles| { 1322 let n1 = needles.get(0).copied()?; 1323 let n2 = needles.get(1).copied()?; 1324 Some(Two::new(n1, n2)?.iter(haystack).rev().collect()) 1325 }, 1326 ) 1327 } 1328 1329 #[test] forward_three()1330 fn forward_three() { 1331 crate::tests::memchr::Runner::new(3).forward_iter( 1332 |haystack, needles| { 1333 let n1 = needles.get(0).copied()?; 1334 let n2 = needles.get(1).copied()?; 1335 let n3 = needles.get(2).copied()?; 1336 Some(Three::new(n1, n2, n3)?.iter(haystack).collect()) 1337 }, 1338 ) 1339 } 1340 1341 #[test] reverse_three()1342 fn reverse_three() { 1343 crate::tests::memchr::Runner::new(3).reverse_iter( 1344 |haystack, needles| { 1345 let n1 = needles.get(0).copied()?; 1346 let n2 = needles.get(1).copied()?; 1347 let n3 = needles.get(2).copied()?; 1348 Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect()) 1349 }, 1350 ) 1351 } 1352 } 1353