1 /*! 2 This module defines 128-bit vector implementations of `memchr` and friends. 3 4 The main types in this module are [`One`], [`Two`] and [`Three`]. They are for 5 searching for one, two or three distinct bytes, respectively, in a haystack. 6 Each type also has corresponding double ended iterators. These searchers are 7 typically much faster than scalar routines accomplishing the same task. 8 9 The `One` searcher also provides a [`One::count`] routine for efficiently 10 counting the number of times a single byte occurs in a haystack. This is 11 useful, for example, for counting the number of lines in a haystack. This 12 routine exists because it is usually faster, especially with a high match 13 count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its 14 `Iterator::count` implementation to use this routine.) 15 16 Only one, two and three bytes are supported because three bytes is about 17 the point where one sees diminishing returns. Beyond this point and it's 18 probably (but not necessarily) better to just use a simple `[bool; 256]` array 19 or similar. However, it depends mightily on the specific work-load and the 20 expected match frequency. 21 */ 22 23 use core::arch::wasm32::v128; 24 25 use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector}; 26 27 /// Finds all occurrences of a single byte in a haystack. 28 #[derive(Clone, Copy, Debug)] 29 pub struct One(generic::One<v128>); 30 31 impl One { 32 /// Create a new searcher that finds occurrences of the needle byte given. 33 /// 34 /// This particular searcher is specialized to use simd128 vector 35 /// instructions that typically make it quite fast. 36 /// 37 /// If simd128 is unavailable in the current environment, then `None` is 38 /// returned. 39 #[inline] new(needle: u8) -> Option<One>40 pub fn new(needle: u8) -> Option<One> { 41 if One::is_available() { 42 // SAFETY: we check that simd128 is available above. 43 unsafe { Some(One::new_unchecked(needle)) } 44 } else { 45 None 46 } 47 } 48 49 /// Create a new finder specific to simd128 vectors and routines without 50 /// checking that simd128 is available. 51 /// 52 /// # Safety 53 /// 54 /// Callers must guarantee that it is safe to execute `simd128` 55 /// instructions in the current environment. 56 #[target_feature(enable = "simd128")] 57 #[inline] new_unchecked(needle: u8) -> One58 pub unsafe fn new_unchecked(needle: u8) -> One { 59 One(generic::One::new(needle)) 60 } 61 62 /// Returns true when this implementation is available in the current 63 /// environment. 64 /// 65 /// When this is true, it is guaranteed that [`One::new`] will return 66 /// a `Some` value. Similarly, when it is false, it is guaranteed that 67 /// `One::new` will return a `None` value. 68 /// 69 /// Note also that for the lifetime of a single program, if this returns 70 /// true then it will always return true. 71 #[inline] is_available() -> bool72 pub fn is_available() -> bool { 73 #[cfg(target_feature = "simd128")] 74 { 75 true 76 } 77 #[cfg(not(target_feature = "simd128"))] 78 { 79 false 80 } 81 } 82 83 /// Return the first occurrence of one of the needle bytes in the given 84 /// haystack. If no such occurrence exists, then `None` is returned. 85 /// 86 /// The occurrence is reported as an offset into `haystack`. Its maximum 87 /// value is `haystack.len() - 1`. 88 #[inline] find(&self, haystack: &[u8]) -> Option<usize>89 pub fn find(&self, haystack: &[u8]) -> Option<usize> { 90 // SAFETY: `find_raw` guarantees that if a pointer is returned, it 91 // falls within the bounds of the start and end pointers. 92 unsafe { 93 generic::search_slice_with_raw(haystack, |s, e| { 94 self.find_raw(s, e) 95 }) 96 } 97 } 98 99 /// Return the last occurrence of one of the needle bytes in the given 100 /// haystack. If no such occurrence exists, then `None` is returned. 101 /// 102 /// The occurrence is reported as an offset into `haystack`. Its maximum 103 /// value is `haystack.len() - 1`. 104 #[inline] rfind(&self, haystack: &[u8]) -> Option<usize>105 pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { 106 // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it 107 // falls within the bounds of the start and end pointers. 108 unsafe { 109 generic::search_slice_with_raw(haystack, |s, e| { 110 self.rfind_raw(s, e) 111 }) 112 } 113 } 114 115 /// Counts all occurrences of this byte in the given haystack. 116 #[inline] count(&self, haystack: &[u8]) -> usize117 pub fn count(&self, haystack: &[u8]) -> usize { 118 // SAFETY: All of our pointers are derived directly from a borrowed 119 // slice, which is guaranteed to be valid. 120 unsafe { 121 let start = haystack.as_ptr(); 122 let end = start.add(haystack.len()); 123 self.count_raw(start, end) 124 } 125 } 126 127 /// Like `find`, but accepts and returns raw pointers. 128 /// 129 /// When a match is found, the pointer returned is guaranteed to be 130 /// `>= start` and `< end`. 131 /// 132 /// This routine is useful if you're already using raw pointers and would 133 /// like to avoid converting back to a slice before executing a search. 134 /// 135 /// # Safety 136 /// 137 /// * Both `start` and `end` must be valid for reads. 138 /// * Both `start` and `end` must point to an initialized value. 139 /// * Both `start` and `end` must point to the same allocated object and 140 /// must either be in bounds or at most one byte past the end of the 141 /// allocated object. 142 /// * Both `start` and `end` must be _derived from_ a pointer to the same 143 /// object. 144 /// * The distance between `start` and `end` must not overflow `isize`. 145 /// * The distance being in bounds must not rely on "wrapping around" the 146 /// address space. 147 /// 148 /// Note that callers may pass a pair of pointers such that `start >= end`. 149 /// In that case, `None` will always be returned. 150 #[inline] find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>151 pub unsafe fn find_raw( 152 &self, 153 start: *const u8, 154 end: *const u8, 155 ) -> Option<*const u8> { 156 if start >= end { 157 return None; 158 } 159 if end.distance(start) < v128::BYTES { 160 // SAFETY: We require the caller to pass valid start/end pointers. 161 return generic::fwd_byte_by_byte(start, end, |b| { 162 b == self.0.needle1() 163 }); 164 } 165 // SAFETY: Building a `One` means it's safe to call 'simd128' routines. 166 // Also, we've checked that our haystack is big enough to run on the 167 // vector routine. Pointer validity is caller's responsibility. 168 self.find_raw_impl(start, end) 169 } 170 171 /// Like `rfind`, but accepts and returns raw pointers. 172 /// 173 /// When a match is found, the pointer returned is guaranteed to be 174 /// `>= start` and `< end`. 175 /// 176 /// This routine is useful if you're already using raw pointers and would 177 /// like to avoid converting back to a slice before executing a search. 178 /// 179 /// # Safety 180 /// 181 /// * Both `start` and `end` must be valid for reads. 182 /// * Both `start` and `end` must point to an initialized value. 183 /// * Both `start` and `end` must point to the same allocated object and 184 /// must either be in bounds or at most one byte past the end of the 185 /// allocated object. 186 /// * Both `start` and `end` must be _derived from_ a pointer to the same 187 /// object. 188 /// * The distance between `start` and `end` must not overflow `isize`. 189 /// * The distance being in bounds must not rely on "wrapping around" the 190 /// address space. 191 /// 192 /// Note that callers may pass a pair of pointers such that `start >= end`. 193 /// In that case, `None` will always be returned. 194 #[inline] rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>195 pub unsafe fn rfind_raw( 196 &self, 197 start: *const u8, 198 end: *const u8, 199 ) -> Option<*const u8> { 200 if start >= end { 201 return None; 202 } 203 if end.distance(start) < v128::BYTES { 204 // SAFETY: We require the caller to pass valid start/end pointers. 205 return generic::rev_byte_by_byte(start, end, |b| { 206 b == self.0.needle1() 207 }); 208 } 209 // SAFETY: Building a `One` means it's safe to call 'simd128' routines. 210 // Also, we've checked that our haystack is big enough to run on the 211 // vector routine. Pointer validity is caller's responsibility. 212 self.rfind_raw_impl(start, end) 213 } 214 215 /// Counts all occurrences of this byte in the given haystack represented 216 /// by raw pointers. 217 /// 218 /// When a match is found, the pointer returned is guaranteed to be 219 /// `>= start` and `< end`. 220 /// 221 /// This routine is useful if you're already using raw pointers and would 222 /// like to avoid converting back to a slice before executing a search. 223 /// 224 /// # Safety 225 /// 226 /// * Both `start` and `end` must be valid for reads. 227 /// * Both `start` and `end` must point to an initialized value. 228 /// * Both `start` and `end` must point to the same allocated object and 229 /// must either be in bounds or at most one byte past the end of the 230 /// allocated object. 231 /// * Both `start` and `end` must be _derived from_ a pointer to the same 232 /// object. 233 /// * The distance between `start` and `end` must not overflow `isize`. 234 /// * The distance being in bounds must not rely on "wrapping around" the 235 /// address space. 236 /// 237 /// Note that callers may pass a pair of pointers such that `start >= end`. 238 /// In that case, `None` will always be returned. 239 #[inline] count_raw(&self, start: *const u8, end: *const u8) -> usize240 pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { 241 if start >= end { 242 return 0; 243 } 244 if end.distance(start) < v128::BYTES { 245 // SAFETY: We require the caller to pass valid start/end pointers. 246 return generic::count_byte_by_byte(start, end, |b| { 247 b == self.0.needle1() 248 }); 249 } 250 // SAFETY: Building a `One` means it's safe to call 'simd128' routines. 251 // Also, we've checked that our haystack is big enough to run on the 252 // vector routine. Pointer validity is caller's responsibility. 253 self.count_raw_impl(start, end) 254 } 255 256 /// Execute a search using simd128 vectors and routines. 257 /// 258 /// # Safety 259 /// 260 /// Same as [`One::find_raw`], except the distance between `start` and 261 /// `end` must be at least the size of a simd128 vector (in bytes). 262 /// 263 /// (The target feature safety obligation is automatically fulfilled by 264 /// virtue of being a method on `One`, which can only be constructed 265 /// when it is safe to call `simd128` routines.) 266 #[target_feature(enable = "simd128")] 267 #[inline] find_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>268 unsafe fn find_raw_impl( 269 &self, 270 start: *const u8, 271 end: *const u8, 272 ) -> Option<*const u8> { 273 self.0.find_raw(start, end) 274 } 275 276 /// Execute a search using simd128 vectors and routines. 277 /// 278 /// # Safety 279 /// 280 /// Same as [`One::rfind_raw`], except the distance between `start` and 281 /// `end` must be at least the size of a simd128 vector (in bytes). 282 /// 283 /// (The target feature safety obligation is automatically fulfilled by 284 /// virtue of being a method on `One`, which can only be constructed 285 /// when it is safe to call `simd128` routines.) 286 #[target_feature(enable = "simd128")] 287 #[inline] rfind_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>288 unsafe fn rfind_raw_impl( 289 &self, 290 start: *const u8, 291 end: *const u8, 292 ) -> Option<*const u8> { 293 self.0.rfind_raw(start, end) 294 } 295 296 /// Execute a count using simd128 vectors and routines. 297 /// 298 /// # Safety 299 /// 300 /// Same as [`One::count_raw`], except the distance between `start` and 301 /// `end` must be at least the size of a simd128 vector (in bytes). 302 /// 303 /// (The target feature safety obligation is automatically fulfilled by 304 /// virtue of being a method on `One`, which can only be constructed 305 /// when it is safe to call `simd128` routines.) 306 #[target_feature(enable = "simd128")] 307 #[inline] count_raw_impl( &self, start: *const u8, end: *const u8, ) -> usize308 unsafe fn count_raw_impl( 309 &self, 310 start: *const u8, 311 end: *const u8, 312 ) -> usize { 313 self.0.count_raw(start, end) 314 } 315 316 /// Returns an iterator over all occurrences of the needle byte in the 317 /// given haystack. 318 /// 319 /// The iterator returned implements `DoubleEndedIterator`. This means it 320 /// can also be used to find occurrences in reverse order. 321 #[inline] iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h>322 pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { 323 OneIter { searcher: self, it: generic::Iter::new(haystack) } 324 } 325 } 326 327 /// An iterator over all occurrences of a single byte in a haystack. 328 /// 329 /// This iterator implements `DoubleEndedIterator`, which means it can also be 330 /// used to find occurrences in reverse order. 331 /// 332 /// This iterator is created by the [`One::iter`] method. 333 /// 334 /// The lifetime parameters are as follows: 335 /// 336 /// * `'a` refers to the lifetime of the underlying [`One`] searcher. 337 /// * `'h` refers to the lifetime of the haystack being searched. 338 #[derive(Clone, Debug)] 339 pub struct OneIter<'a, 'h> { 340 searcher: &'a One, 341 it: generic::Iter<'h>, 342 } 343 344 impl<'a, 'h> Iterator for OneIter<'a, 'h> { 345 type Item = usize; 346 347 #[inline] next(&mut self) -> Option<usize>348 fn next(&mut self) -> Option<usize> { 349 // SAFETY: We rely on the generic iterator to provide valid start 350 // and end pointers, but we guarantee that any pointer returned by 351 // 'find_raw' falls within the bounds of the start and end pointer. 352 unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } 353 } 354 355 #[inline] count(self) -> usize356 fn count(self) -> usize { 357 self.it.count(|s, e| { 358 // SAFETY: We rely on our generic iterator to return valid start 359 // and end pointers. 360 unsafe { self.searcher.count_raw(s, e) } 361 }) 362 } 363 364 #[inline] size_hint(&self) -> (usize, Option<usize>)365 fn size_hint(&self) -> (usize, Option<usize>) { 366 self.it.size_hint() 367 } 368 } 369 370 impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { 371 #[inline] next_back(&mut self) -> Option<usize>372 fn next_back(&mut self) -> Option<usize> { 373 // SAFETY: We rely on the generic iterator to provide valid start 374 // and end pointers, but we guarantee that any pointer returned by 375 // 'rfind_raw' falls within the bounds of the start and end pointer. 376 unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } 377 } 378 } 379 380 impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {} 381 382 /// Finds all occurrences of two bytes in a haystack. 383 /// 384 /// That is, this reports matches of one of two possible bytes. For example, 385 /// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, 386 /// `4` and `5`. 387 #[derive(Clone, Copy, Debug)] 388 pub struct Two(generic::Two<v128>); 389 390 impl Two { 391 /// Create a new searcher that finds occurrences of the needle bytes given. 392 /// 393 /// This particular searcher is specialized to use simd128 vector 394 /// instructions that typically make it quite fast. 395 /// 396 /// If simd128 is unavailable in the current environment, then `None` is 397 /// returned. 398 #[inline] new(needle1: u8, needle2: u8) -> Option<Two>399 pub fn new(needle1: u8, needle2: u8) -> Option<Two> { 400 if Two::is_available() { 401 // SAFETY: we check that simd128 is available above. 402 unsafe { Some(Two::new_unchecked(needle1, needle2)) } 403 } else { 404 None 405 } 406 } 407 408 /// Create a new finder specific to simd128 vectors and routines without 409 /// checking that simd128 is available. 410 /// 411 /// # Safety 412 /// 413 /// Callers must guarantee that it is safe to execute `simd128` 414 /// instructions in the current environment. 415 #[target_feature(enable = "simd128")] 416 #[inline] new_unchecked(needle1: u8, needle2: u8) -> Two417 pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two { 418 Two(generic::Two::new(needle1, needle2)) 419 } 420 421 /// Returns true when this implementation is available in the current 422 /// environment. 423 /// 424 /// When this is true, it is guaranteed that [`Two::new`] will return 425 /// a `Some` value. Similarly, when it is false, it is guaranteed that 426 /// `Two::new` will return a `None` value. 427 /// 428 /// Note also that for the lifetime of a single program, if this returns 429 /// true then it will always return true. 430 #[inline] is_available() -> bool431 pub fn is_available() -> bool { 432 #[cfg(target_feature = "simd128")] 433 { 434 true 435 } 436 #[cfg(not(target_feature = "simd128"))] 437 { 438 false 439 } 440 } 441 442 /// Return the first occurrence of one of the needle bytes in the given 443 /// haystack. If no such occurrence exists, then `None` is returned. 444 /// 445 /// The occurrence is reported as an offset into `haystack`. Its maximum 446 /// value is `haystack.len() - 1`. 447 #[inline] find(&self, haystack: &[u8]) -> Option<usize>448 pub fn find(&self, haystack: &[u8]) -> Option<usize> { 449 // SAFETY: `find_raw` guarantees that if a pointer is returned, it 450 // falls within the bounds of the start and end pointers. 451 unsafe { 452 generic::search_slice_with_raw(haystack, |s, e| { 453 self.find_raw(s, e) 454 }) 455 } 456 } 457 458 /// Return the last occurrence of one of the needle bytes in the given 459 /// haystack. If no such occurrence exists, then `None` is returned. 460 /// 461 /// The occurrence is reported as an offset into `haystack`. Its maximum 462 /// value is `haystack.len() - 1`. 463 #[inline] rfind(&self, haystack: &[u8]) -> Option<usize>464 pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { 465 // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it 466 // falls within the bounds of the start and end pointers. 467 unsafe { 468 generic::search_slice_with_raw(haystack, |s, e| { 469 self.rfind_raw(s, e) 470 }) 471 } 472 } 473 474 /// Like `find`, but accepts and returns raw pointers. 475 /// 476 /// When a match is found, the pointer returned is guaranteed to be 477 /// `>= start` and `< end`. 478 /// 479 /// This routine is useful if you're already using raw pointers and would 480 /// like to avoid converting back to a slice before executing a search. 481 /// 482 /// # Safety 483 /// 484 /// * Both `start` and `end` must be valid for reads. 485 /// * Both `start` and `end` must point to an initialized value. 486 /// * Both `start` and `end` must point to the same allocated object and 487 /// must either be in bounds or at most one byte past the end of the 488 /// allocated object. 489 /// * Both `start` and `end` must be _derived from_ a pointer to the same 490 /// object. 491 /// * The distance between `start` and `end` must not overflow `isize`. 492 /// * The distance being in bounds must not rely on "wrapping around" the 493 /// address space. 494 /// 495 /// Note that callers may pass a pair of pointers such that `start >= end`. 496 /// In that case, `None` will always be returned. 497 #[inline] find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>498 pub unsafe fn find_raw( 499 &self, 500 start: *const u8, 501 end: *const u8, 502 ) -> Option<*const u8> { 503 if start >= end { 504 return None; 505 } 506 if end.distance(start) < v128::BYTES { 507 // SAFETY: We require the caller to pass valid start/end pointers. 508 return generic::fwd_byte_by_byte(start, end, |b| { 509 b == self.0.needle1() || b == self.0.needle2() 510 }); 511 } 512 // SAFETY: Building a `Two` means it's safe to call 'simd128' routines. 513 // Also, we've checked that our haystack is big enough to run on the 514 // vector routine. Pointer validity is caller's responsibility. 515 self.find_raw_impl(start, end) 516 } 517 518 /// Like `rfind`, but accepts and returns raw pointers. 519 /// 520 /// When a match is found, the pointer returned is guaranteed to be 521 /// `>= start` and `< end`. 522 /// 523 /// This routine is useful if you're already using raw pointers and would 524 /// like to avoid converting back to a slice before executing a search. 525 /// 526 /// # Safety 527 /// 528 /// * Both `start` and `end` must be valid for reads. 529 /// * Both `start` and `end` must point to an initialized value. 530 /// * Both `start` and `end` must point to the same allocated object and 531 /// must either be in bounds or at most one byte past the end of the 532 /// allocated object. 533 /// * Both `start` and `end` must be _derived from_ a pointer to the same 534 /// object. 535 /// * The distance between `start` and `end` must not overflow `isize`. 536 /// * The distance being in bounds must not rely on "wrapping around" the 537 /// address space. 538 /// 539 /// Note that callers may pass a pair of pointers such that `start >= end`. 540 /// In that case, `None` will always be returned. 541 #[inline] rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>542 pub unsafe fn rfind_raw( 543 &self, 544 start: *const u8, 545 end: *const u8, 546 ) -> Option<*const u8> { 547 if start >= end { 548 return None; 549 } 550 if end.distance(start) < v128::BYTES { 551 // SAFETY: We require the caller to pass valid start/end pointers. 552 return generic::rev_byte_by_byte(start, end, |b| { 553 b == self.0.needle1() || b == self.0.needle2() 554 }); 555 } 556 // SAFETY: Building a `Two` means it's safe to call 'simd128' routines. 557 // Also, we've checked that our haystack is big enough to run on the 558 // vector routine. Pointer validity is caller's responsibility. 559 self.rfind_raw_impl(start, end) 560 } 561 562 /// Execute a search using simd128 vectors and routines. 563 /// 564 /// # Safety 565 /// 566 /// Same as [`Two::find_raw`], except the distance between `start` and 567 /// `end` must be at least the size of a simd128 vector (in bytes). 568 /// 569 /// (The target feature safety obligation is automatically fulfilled by 570 /// virtue of being a method on `Two`, which can only be constructed 571 /// when it is safe to call `simd128` routines.) 572 #[target_feature(enable = "simd128")] 573 #[inline] find_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>574 unsafe fn find_raw_impl( 575 &self, 576 start: *const u8, 577 end: *const u8, 578 ) -> Option<*const u8> { 579 self.0.find_raw(start, end) 580 } 581 582 /// Execute a search using simd128 vectors and routines. 583 /// 584 /// # Safety 585 /// 586 /// Same as [`Two::rfind_raw`], except the distance between `start` and 587 /// `end` must be at least the size of a simd128 vector (in bytes). 588 /// 589 /// (The target feature safety obligation is automatically fulfilled by 590 /// virtue of being a method on `Two`, which can only be constructed 591 /// when it is safe to call `simd128` routines.) 592 #[target_feature(enable = "simd128")] 593 #[inline] rfind_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>594 unsafe fn rfind_raw_impl( 595 &self, 596 start: *const u8, 597 end: *const u8, 598 ) -> Option<*const u8> { 599 self.0.rfind_raw(start, end) 600 } 601 602 /// Returns an iterator over all occurrences of the needle bytes in the 603 /// given haystack. 604 /// 605 /// The iterator returned implements `DoubleEndedIterator`. This means it 606 /// can also be used to find occurrences in reverse order. 607 #[inline] iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h>608 pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { 609 TwoIter { searcher: self, it: generic::Iter::new(haystack) } 610 } 611 } 612 613 /// An iterator over all occurrences of two possible bytes in a haystack. 614 /// 615 /// This iterator implements `DoubleEndedIterator`, which means it can also be 616 /// used to find occurrences in reverse order. 617 /// 618 /// This iterator is created by the [`Two::iter`] method. 619 /// 620 /// The lifetime parameters are as follows: 621 /// 622 /// * `'a` refers to the lifetime of the underlying [`Two`] searcher. 623 /// * `'h` refers to the lifetime of the haystack being searched. 624 #[derive(Clone, Debug)] 625 pub struct TwoIter<'a, 'h> { 626 searcher: &'a Two, 627 it: generic::Iter<'h>, 628 } 629 630 impl<'a, 'h> Iterator for TwoIter<'a, 'h> { 631 type Item = usize; 632 633 #[inline] next(&mut self) -> Option<usize>634 fn next(&mut self) -> Option<usize> { 635 // SAFETY: We rely on the generic iterator to provide valid start 636 // and end pointers, but we guarantee that any pointer returned by 637 // 'find_raw' falls within the bounds of the start and end pointer. 638 unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } 639 } 640 641 #[inline] size_hint(&self) -> (usize, Option<usize>)642 fn size_hint(&self) -> (usize, Option<usize>) { 643 self.it.size_hint() 644 } 645 } 646 647 impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { 648 #[inline] next_back(&mut self) -> Option<usize>649 fn next_back(&mut self) -> Option<usize> { 650 // SAFETY: We rely on the generic iterator to provide valid start 651 // and end pointers, but we guarantee that any pointer returned by 652 // 'rfind_raw' falls within the bounds of the start and end pointer. 653 unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } 654 } 655 } 656 657 impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {} 658 659 /// Finds all occurrences of three bytes in a haystack. 660 /// 661 /// That is, this reports matches of one of three possible bytes. For example, 662 /// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets 663 /// `0`, `2`, `3`, `4` and `5`. 664 #[derive(Clone, Copy, Debug)] 665 pub struct Three(generic::Three<v128>); 666 667 impl Three { 668 /// Create a new searcher that finds occurrences of the needle bytes given. 669 /// 670 /// This particular searcher is specialized to use simd128 vector 671 /// instructions that typically make it quite fast. 672 /// 673 /// If simd128 is unavailable in the current environment, then `None` is 674 /// returned. 675 #[inline] new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three>676 pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three> { 677 if Three::is_available() { 678 // SAFETY: we check that simd128 is available above. 679 unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) } 680 } else { 681 None 682 } 683 } 684 685 /// Create a new finder specific to simd128 vectors and routines without 686 /// checking that simd128 is available. 687 /// 688 /// # Safety 689 /// 690 /// Callers must guarantee that it is safe to execute `simd128` 691 /// instructions in the current environment. 692 #[target_feature(enable = "simd128")] 693 #[inline] new_unchecked( needle1: u8, needle2: u8, needle3: u8, ) -> Three694 pub unsafe fn new_unchecked( 695 needle1: u8, 696 needle2: u8, 697 needle3: u8, 698 ) -> Three { 699 Three(generic::Three::new(needle1, needle2, needle3)) 700 } 701 702 /// Returns true when this implementation is available in the current 703 /// environment. 704 /// 705 /// When this is true, it is guaranteed that [`Three::new`] will return 706 /// a `Some` value. Similarly, when it is false, it is guaranteed that 707 /// `Three::new` will return a `None` value. 708 /// 709 /// Note also that for the lifetime of a single program, if this returns 710 /// true then it will always return true. 711 #[inline] is_available() -> bool712 pub fn is_available() -> bool { 713 #[cfg(target_feature = "simd128")] 714 { 715 true 716 } 717 #[cfg(not(target_feature = "simd128"))] 718 { 719 false 720 } 721 } 722 723 /// Return the first occurrence of one of the needle bytes in the given 724 /// haystack. If no such occurrence exists, then `None` is returned. 725 /// 726 /// The occurrence is reported as an offset into `haystack`. Its maximum 727 /// value is `haystack.len() - 1`. 728 #[inline] find(&self, haystack: &[u8]) -> Option<usize>729 pub fn find(&self, haystack: &[u8]) -> Option<usize> { 730 // SAFETY: `find_raw` guarantees that if a pointer is returned, it 731 // falls within the bounds of the start and end pointers. 732 unsafe { 733 generic::search_slice_with_raw(haystack, |s, e| { 734 self.find_raw(s, e) 735 }) 736 } 737 } 738 739 /// Return the last occurrence of one of the needle bytes in the given 740 /// haystack. If no such occurrence exists, then `None` is returned. 741 /// 742 /// The occurrence is reported as an offset into `haystack`. Its maximum 743 /// value is `haystack.len() - 1`. 744 #[inline] rfind(&self, haystack: &[u8]) -> Option<usize>745 pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { 746 // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it 747 // falls within the bounds of the start and end pointers. 748 unsafe { 749 generic::search_slice_with_raw(haystack, |s, e| { 750 self.rfind_raw(s, e) 751 }) 752 } 753 } 754 755 /// Like `find`, but accepts and returns raw pointers. 756 /// 757 /// When a match is found, the pointer returned is guaranteed to be 758 /// `>= start` and `< end`. 759 /// 760 /// This routine is useful if you're already using raw pointers and would 761 /// like to avoid converting back to a slice before executing a search. 762 /// 763 /// # Safety 764 /// 765 /// * Both `start` and `end` must be valid for reads. 766 /// * Both `start` and `end` must point to an initialized value. 767 /// * Both `start` and `end` must point to the same allocated object and 768 /// must either be in bounds or at most one byte past the end of the 769 /// allocated object. 770 /// * Both `start` and `end` must be _derived from_ a pointer to the same 771 /// object. 772 /// * The distance between `start` and `end` must not overflow `isize`. 773 /// * The distance being in bounds must not rely on "wrapping around" the 774 /// address space. 775 /// 776 /// Note that callers may pass a pair of pointers such that `start >= end`. 777 /// In that case, `None` will always be returned. 778 #[inline] find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>779 pub unsafe fn find_raw( 780 &self, 781 start: *const u8, 782 end: *const u8, 783 ) -> Option<*const u8> { 784 if start >= end { 785 return None; 786 } 787 if end.distance(start) < v128::BYTES { 788 // SAFETY: We require the caller to pass valid start/end pointers. 789 return generic::fwd_byte_by_byte(start, end, |b| { 790 b == self.0.needle1() 791 || b == self.0.needle2() 792 || b == self.0.needle3() 793 }); 794 } 795 // SAFETY: Building a `Three` means it's safe to call 'simd128' 796 // routines. Also, we've checked that our haystack is big enough to run 797 // on the vector routine. Pointer validity is caller's responsibility. 798 self.find_raw_impl(start, end) 799 } 800 801 /// Like `rfind`, but accepts and returns raw pointers. 802 /// 803 /// When a match is found, the pointer returned is guaranteed to be 804 /// `>= start` and `< end`. 805 /// 806 /// This routine is useful if you're already using raw pointers and would 807 /// like to avoid converting back to a slice before executing a search. 808 /// 809 /// # Safety 810 /// 811 /// * Both `start` and `end` must be valid for reads. 812 /// * Both `start` and `end` must point to an initialized value. 813 /// * Both `start` and `end` must point to the same allocated object and 814 /// must either be in bounds or at most one byte past the end of the 815 /// allocated object. 816 /// * Both `start` and `end` must be _derived from_ a pointer to the same 817 /// object. 818 /// * The distance between `start` and `end` must not overflow `isize`. 819 /// * The distance being in bounds must not rely on "wrapping around" the 820 /// address space. 821 /// 822 /// Note that callers may pass a pair of pointers such that `start >= end`. 823 /// In that case, `None` will always be returned. 824 #[inline] rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>825 pub unsafe fn rfind_raw( 826 &self, 827 start: *const u8, 828 end: *const u8, 829 ) -> Option<*const u8> { 830 if start >= end { 831 return None; 832 } 833 if end.distance(start) < v128::BYTES { 834 // SAFETY: We require the caller to pass valid start/end pointers. 835 return generic::rev_byte_by_byte(start, end, |b| { 836 b == self.0.needle1() 837 || b == self.0.needle2() 838 || b == self.0.needle3() 839 }); 840 } 841 // SAFETY: Building a `Three` means it's safe to call 'simd128' 842 // routines. Also, we've checked that our haystack is big enough to run 843 // on the vector routine. Pointer validity is caller's responsibility. 844 self.rfind_raw_impl(start, end) 845 } 846 847 /// Execute a search using simd128 vectors and routines. 848 /// 849 /// # Safety 850 /// 851 /// Same as [`Three::find_raw`], except the distance between `start` and 852 /// `end` must be at least the size of a simd128 vector (in bytes). 853 /// 854 /// (The target feature safety obligation is automatically fulfilled by 855 /// virtue of being a method on `Three`, which can only be constructed 856 /// when it is safe to call `simd128` routines.) 857 #[target_feature(enable = "simd128")] 858 #[inline] find_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>859 unsafe fn find_raw_impl( 860 &self, 861 start: *const u8, 862 end: *const u8, 863 ) -> Option<*const u8> { 864 self.0.find_raw(start, end) 865 } 866 867 /// Execute a search using simd128 vectors and routines. 868 /// 869 /// # Safety 870 /// 871 /// Same as [`Three::rfind_raw`], except the distance between `start` and 872 /// `end` must be at least the size of a simd128 vector (in bytes). 873 /// 874 /// (The target feature safety obligation is automatically fulfilled by 875 /// virtue of being a method on `Three`, which can only be constructed 876 /// when it is safe to call `simd128` routines.) 877 #[target_feature(enable = "simd128")] 878 #[inline] rfind_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>879 unsafe fn rfind_raw_impl( 880 &self, 881 start: *const u8, 882 end: *const u8, 883 ) -> Option<*const u8> { 884 self.0.rfind_raw(start, end) 885 } 886 887 /// Returns an iterator over all occurrences of the needle byte in the 888 /// given haystack. 889 /// 890 /// The iterator returned implements `DoubleEndedIterator`. This means it 891 /// can also be used to find occurrences in reverse order. 892 #[inline] iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h>893 pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { 894 ThreeIter { searcher: self, it: generic::Iter::new(haystack) } 895 } 896 } 897 898 /// An iterator over all occurrences of three possible bytes in a haystack. 899 /// 900 /// This iterator implements `DoubleEndedIterator`, which means it can also be 901 /// used to find occurrences in reverse order. 902 /// 903 /// This iterator is created by the [`Three::iter`] method. 904 /// 905 /// The lifetime parameters are as follows: 906 /// 907 /// * `'a` refers to the lifetime of the underlying [`Three`] searcher. 908 /// * `'h` refers to the lifetime of the haystack being searched. 909 #[derive(Clone, Debug)] 910 pub struct ThreeIter<'a, 'h> { 911 searcher: &'a Three, 912 it: generic::Iter<'h>, 913 } 914 915 impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { 916 type Item = usize; 917 918 #[inline] next(&mut self) -> Option<usize>919 fn next(&mut self) -> Option<usize> { 920 // SAFETY: We rely on the generic iterator to provide valid start 921 // and end pointers, but we guarantee that any pointer returned by 922 // 'find_raw' falls within the bounds of the start and end pointer. 923 unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } 924 } 925 926 #[inline] size_hint(&self) -> (usize, Option<usize>)927 fn size_hint(&self) -> (usize, Option<usize>) { 928 self.it.size_hint() 929 } 930 } 931 932 impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { 933 #[inline] next_back(&mut self) -> Option<usize>934 fn next_back(&mut self) -> Option<usize> { 935 // SAFETY: We rely on the generic iterator to provide valid start 936 // and end pointers, but we guarantee that any pointer returned by 937 // 'rfind_raw' falls within the bounds of the start and end pointer. 938 unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } 939 } 940 } 941 942 impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {} 943 944 #[cfg(test)] 945 mod tests { 946 use super::*; 947 948 define_memchr_quickcheck!(super); 949 950 #[test] forward_one()951 fn forward_one() { 952 crate::tests::memchr::Runner::new(1).forward_iter( 953 |haystack, needles| { 954 Some(One::new(needles[0])?.iter(haystack).collect()) 955 }, 956 ) 957 } 958 959 #[test] reverse_one()960 fn reverse_one() { 961 crate::tests::memchr::Runner::new(1).reverse_iter( 962 |haystack, needles| { 963 Some(One::new(needles[0])?.iter(haystack).rev().collect()) 964 }, 965 ) 966 } 967 968 #[test] count_one()969 fn count_one() { 970 crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { 971 Some(One::new(needles[0])?.iter(haystack).count()) 972 }) 973 } 974 975 #[test] forward_two()976 fn forward_two() { 977 crate::tests::memchr::Runner::new(2).forward_iter( 978 |haystack, needles| { 979 let n1 = needles.get(0).copied()?; 980 let n2 = needles.get(1).copied()?; 981 Some(Two::new(n1, n2)?.iter(haystack).collect()) 982 }, 983 ) 984 } 985 986 #[test] reverse_two()987 fn reverse_two() { 988 crate::tests::memchr::Runner::new(2).reverse_iter( 989 |haystack, needles| { 990 let n1 = needles.get(0).copied()?; 991 let n2 = needles.get(1).copied()?; 992 Some(Two::new(n1, n2)?.iter(haystack).rev().collect()) 993 }, 994 ) 995 } 996 997 #[test] forward_three()998 fn forward_three() { 999 crate::tests::memchr::Runner::new(3).forward_iter( 1000 |haystack, needles| { 1001 let n1 = needles.get(0).copied()?; 1002 let n2 = needles.get(1).copied()?; 1003 let n3 = needles.get(2).copied()?; 1004 Some(Three::new(n1, n2, n3)?.iter(haystack).collect()) 1005 }, 1006 ) 1007 } 1008 1009 #[test] reverse_three()1010 fn reverse_three() { 1011 crate::tests::memchr::Runner::new(3).reverse_iter( 1012 |haystack, needles| { 1013 let n1 = needles.get(0).copied()?; 1014 let n2 = needles.get(1).copied()?; 1015 let n3 = needles.get(2).copied()?; 1016 Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect()) 1017 }, 1018 ) 1019 } 1020 } 1021