1 /*! 2 This module defines 128-bit vector implementations of `memchr` and friends. 3 4 The main types in this module are [`One`], [`Two`] and [`Three`]. They are for 5 searching for one, two or three distinct bytes, respectively, in a haystack. 6 Each type also has corresponding double ended iterators. These searchers are 7 typically much faster than scalar routines accomplishing the same task. 8 9 The `One` searcher also provides a [`One::count`] routine for efficiently 10 counting the number of times a single byte occurs in a haystack. This is 11 useful, for example, for counting the number of lines in a haystack. This 12 routine exists because it is usually faster, especially with a high match 13 count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its 14 `Iterator::count` implementation to use this routine.) 15 16 Only one, two and three bytes are supported because three bytes is about 17 the point where one sees diminishing returns. Beyond this point and it's 18 probably (but not necessarily) better to just use a simple `[bool; 256]` array 19 or similar. However, it depends mightily on the specific work-load and the 20 expected match frequency. 21 */ 22 23 use core::arch::x86_64::__m128i; 24 25 use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector}; 26 27 /// Finds all occurrences of a single byte in a haystack. 28 #[derive(Clone, Copy, Debug)] 29 pub struct One(generic::One<__m128i>); 30 31 impl One { 32 /// Create a new searcher that finds occurrences of the needle byte given. 33 /// 34 /// This particular searcher is specialized to use SSE2 vector instructions 35 /// that typically make it quite fast. 36 /// 37 /// If SSE2 is unavailable in the current environment, then `None` is 38 /// returned. 39 #[inline] new(needle: u8) -> Option<One>40 pub fn new(needle: u8) -> Option<One> { 41 if One::is_available() { 42 // SAFETY: we check that sse2 is available above. 43 unsafe { Some(One::new_unchecked(needle)) } 44 } else { 45 None 46 } 47 } 48 49 /// Create a new finder specific to SSE2 vectors and routines without 50 /// checking that SSE2 is available. 51 /// 52 /// # Safety 53 /// 54 /// Callers must guarantee that it is safe to execute `sse2` instructions 55 /// in the current environment. 56 /// 57 /// Note that it is a common misconception that if one compiles for an 58 /// `x86_64` target, then they therefore automatically have access to SSE2 59 /// instructions. While this is almost always the case, it isn't true in 60 /// 100% of cases. 61 #[target_feature(enable = "sse2")] 62 #[inline] new_unchecked(needle: u8) -> One63 pub unsafe fn new_unchecked(needle: u8) -> One { 64 One(generic::One::new(needle)) 65 } 66 67 /// Returns true when this implementation is available in the current 68 /// environment. 69 /// 70 /// When this is true, it is guaranteed that [`One::new`] will return 71 /// a `Some` value. Similarly, when it is false, it is guaranteed that 72 /// `One::new` will return a `None` value. 73 /// 74 /// Note also that for the lifetime of a single program, if this returns 75 /// true then it will always return true. 76 #[inline] is_available() -> bool77 pub fn is_available() -> bool { 78 #[cfg(target_feature = "sse2")] 79 { 80 true 81 } 82 #[cfg(not(target_feature = "sse2"))] 83 { 84 false 85 } 86 } 87 88 /// Return the first occurrence of one of the needle bytes in the given 89 /// haystack. If no such occurrence exists, then `None` is returned. 90 /// 91 /// The occurrence is reported as an offset into `haystack`. Its maximum 92 /// value is `haystack.len() - 1`. 93 #[inline] find(&self, haystack: &[u8]) -> Option<usize>94 pub fn find(&self, haystack: &[u8]) -> Option<usize> { 95 // SAFETY: `find_raw` guarantees that if a pointer is returned, it 96 // falls within the bounds of the start and end pointers. 97 unsafe { 98 generic::search_slice_with_raw(haystack, |s, e| { 99 self.find_raw(s, e) 100 }) 101 } 102 } 103 104 /// Return the last occurrence of one of the needle bytes in the given 105 /// haystack. If no such occurrence exists, then `None` is returned. 106 /// 107 /// The occurrence is reported as an offset into `haystack`. Its maximum 108 /// value is `haystack.len() - 1`. 109 #[inline] rfind(&self, haystack: &[u8]) -> Option<usize>110 pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { 111 // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it 112 // falls within the bounds of the start and end pointers. 113 unsafe { 114 generic::search_slice_with_raw(haystack, |s, e| { 115 self.rfind_raw(s, e) 116 }) 117 } 118 } 119 120 /// Counts all occurrences of this byte in the given haystack. 121 #[inline] count(&self, haystack: &[u8]) -> usize122 pub fn count(&self, haystack: &[u8]) -> usize { 123 // SAFETY: All of our pointers are derived directly from a borrowed 124 // slice, which is guaranteed to be valid. 125 unsafe { 126 let start = haystack.as_ptr(); 127 let end = start.add(haystack.len()); 128 self.count_raw(start, end) 129 } 130 } 131 132 /// Like `find`, but accepts and returns raw pointers. 133 /// 134 /// When a match is found, the pointer returned is guaranteed to be 135 /// `>= start` and `< end`. 136 /// 137 /// This routine is useful if you're already using raw pointers and would 138 /// like to avoid converting back to a slice before executing a search. 139 /// 140 /// # Safety 141 /// 142 /// * Both `start` and `end` must be valid for reads. 143 /// * Both `start` and `end` must point to an initialized value. 144 /// * Both `start` and `end` must point to the same allocated object and 145 /// must either be in bounds or at most one byte past the end of the 146 /// allocated object. 147 /// * Both `start` and `end` must be _derived from_ a pointer to the same 148 /// object. 149 /// * The distance between `start` and `end` must not overflow `isize`. 150 /// * The distance being in bounds must not rely on "wrapping around" the 151 /// address space. 152 /// 153 /// Note that callers may pass a pair of pointers such that `start >= end`. 154 /// In that case, `None` will always be returned. 155 #[inline] find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>156 pub unsafe fn find_raw( 157 &self, 158 start: *const u8, 159 end: *const u8, 160 ) -> Option<*const u8> { 161 if start >= end { 162 return None; 163 } 164 if end.distance(start) < __m128i::BYTES { 165 // SAFETY: We require the caller to pass valid start/end pointers. 166 return generic::fwd_byte_by_byte(start, end, |b| { 167 b == self.0.needle1() 168 }); 169 } 170 // SAFETY: Building a `One` means it's safe to call 'sse2' routines. 171 // Also, we've checked that our haystack is big enough to run on the 172 // vector routine. Pointer validity is caller's responsibility. 173 // 174 // Note that we could call `self.0.find_raw` directly here. But that 175 // means we'd have to annotate this routine with `target_feature`. 176 // Which is fine, because this routine is `unsafe` anyway and the 177 // `target_feature` obligation is met by virtue of building a `One`. 178 // The real problem is that a routine with a `target_feature` 179 // annotation generally can't be inlined into caller code unless the 180 // caller code has the same target feature annotations. Which is maybe 181 // okay for SSE2, but we do the same thing for AVX2 where caller code 182 // probably usually doesn't have AVX2 enabled. That means that this 183 // routine can be inlined which will handle some of the short-haystack 184 // cases above without touching the architecture specific code. 185 self.find_raw_impl(start, end) 186 } 187 188 /// Like `rfind`, but accepts and returns raw pointers. 189 /// 190 /// When a match is found, the pointer returned is guaranteed to be 191 /// `>= start` and `< end`. 192 /// 193 /// This routine is useful if you're already using raw pointers and would 194 /// like to avoid converting back to a slice before executing a search. 195 /// 196 /// # Safety 197 /// 198 /// * Both `start` and `end` must be valid for reads. 199 /// * Both `start` and `end` must point to an initialized value. 200 /// * Both `start` and `end` must point to the same allocated object and 201 /// must either be in bounds or at most one byte past the end of the 202 /// allocated object. 203 /// * Both `start` and `end` must be _derived from_ a pointer to the same 204 /// object. 205 /// * The distance between `start` and `end` must not overflow `isize`. 206 /// * The distance being in bounds must not rely on "wrapping around" the 207 /// address space. 208 /// 209 /// Note that callers may pass a pair of pointers such that `start >= end`. 210 /// In that case, `None` will always be returned. 211 #[inline] rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>212 pub unsafe fn rfind_raw( 213 &self, 214 start: *const u8, 215 end: *const u8, 216 ) -> Option<*const u8> { 217 if start >= end { 218 return None; 219 } 220 if end.distance(start) < __m128i::BYTES { 221 // SAFETY: We require the caller to pass valid start/end pointers. 222 return generic::rev_byte_by_byte(start, end, |b| { 223 b == self.0.needle1() 224 }); 225 } 226 // SAFETY: Building a `One` means it's safe to call 'sse2' routines. 227 // Also, we've checked that our haystack is big enough to run on the 228 // vector routine. Pointer validity is caller's responsibility. 229 // 230 // See note in forward routine above for why we don't just call 231 // `self.0.rfind_raw` directly here. 232 self.rfind_raw_impl(start, end) 233 } 234 235 /// Counts all occurrences of this byte in the given haystack represented 236 /// by raw pointers. 237 /// 238 /// This routine is useful if you're already using raw pointers and would 239 /// like to avoid converting back to a slice before executing a search. 240 /// 241 /// # Safety 242 /// 243 /// * Both `start` and `end` must be valid for reads. 244 /// * Both `start` and `end` must point to an initialized value. 245 /// * Both `start` and `end` must point to the same allocated object and 246 /// must either be in bounds or at most one byte past the end of the 247 /// allocated object. 248 /// * Both `start` and `end` must be _derived from_ a pointer to the same 249 /// object. 250 /// * The distance between `start` and `end` must not overflow `isize`. 251 /// * The distance being in bounds must not rely on "wrapping around" the 252 /// address space. 253 /// 254 /// Note that callers may pass a pair of pointers such that `start >= end`. 255 /// In that case, `0` will always be returned. 256 #[inline] count_raw(&self, start: *const u8, end: *const u8) -> usize257 pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { 258 if start >= end { 259 return 0; 260 } 261 if end.distance(start) < __m128i::BYTES { 262 // SAFETY: We require the caller to pass valid start/end pointers. 263 return generic::count_byte_by_byte(start, end, |b| { 264 b == self.0.needle1() 265 }); 266 } 267 // SAFETY: Building a `One` means it's safe to call 'sse2' routines. 268 // Also, we've checked that our haystack is big enough to run on the 269 // vector routine. Pointer validity is caller's responsibility. 270 self.count_raw_impl(start, end) 271 } 272 273 /// Execute a search using SSE2 vectors and routines. 274 /// 275 /// # Safety 276 /// 277 /// Same as [`One::find_raw`], except the distance between `start` and 278 /// `end` must be at least the size of an SSE2 vector (in bytes). 279 /// 280 /// (The target feature safety obligation is automatically fulfilled by 281 /// virtue of being a method on `One`, which can only be constructed 282 /// when it is safe to call `sse2` routines.) 283 #[target_feature(enable = "sse2")] 284 #[inline] find_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>285 unsafe fn find_raw_impl( 286 &self, 287 start: *const u8, 288 end: *const u8, 289 ) -> Option<*const u8> { 290 self.0.find_raw(start, end) 291 } 292 293 /// Execute a search using SSE2 vectors and routines. 294 /// 295 /// # Safety 296 /// 297 /// Same as [`One::rfind_raw`], except the distance between `start` and 298 /// `end` must be at least the size of an SSE2 vector (in bytes). 299 /// 300 /// (The target feature safety obligation is automatically fulfilled by 301 /// virtue of being a method on `One`, which can only be constructed 302 /// when it is safe to call `sse2` routines.) 303 #[target_feature(enable = "sse2")] 304 #[inline] rfind_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>305 unsafe fn rfind_raw_impl( 306 &self, 307 start: *const u8, 308 end: *const u8, 309 ) -> Option<*const u8> { 310 self.0.rfind_raw(start, end) 311 } 312 313 /// Execute a count using SSE2 vectors and routines. 314 /// 315 /// # Safety 316 /// 317 /// Same as [`One::count_raw`], except the distance between `start` and 318 /// `end` must be at least the size of an SSE2 vector (in bytes). 319 /// 320 /// (The target feature safety obligation is automatically fulfilled by 321 /// virtue of being a method on `One`, which can only be constructed 322 /// when it is safe to call `sse2` routines.) 323 #[target_feature(enable = "sse2")] 324 #[inline] count_raw_impl( &self, start: *const u8, end: *const u8, ) -> usize325 unsafe fn count_raw_impl( 326 &self, 327 start: *const u8, 328 end: *const u8, 329 ) -> usize { 330 self.0.count_raw(start, end) 331 } 332 333 /// Returns an iterator over all occurrences of the needle byte in the 334 /// given haystack. 335 /// 336 /// The iterator returned implements `DoubleEndedIterator`. This means it 337 /// can also be used to find occurrences in reverse order. 338 #[inline] iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h>339 pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { 340 OneIter { searcher: self, it: generic::Iter::new(haystack) } 341 } 342 } 343 344 /// An iterator over all occurrences of a single byte in a haystack. 345 /// 346 /// This iterator implements `DoubleEndedIterator`, which means it can also be 347 /// used to find occurrences in reverse order. 348 /// 349 /// This iterator is created by the [`One::iter`] method. 350 /// 351 /// The lifetime parameters are as follows: 352 /// 353 /// * `'a` refers to the lifetime of the underlying [`One`] searcher. 354 /// * `'h` refers to the lifetime of the haystack being searched. 355 #[derive(Clone, Debug)] 356 pub struct OneIter<'a, 'h> { 357 searcher: &'a One, 358 it: generic::Iter<'h>, 359 } 360 361 impl<'a, 'h> Iterator for OneIter<'a, 'h> { 362 type Item = usize; 363 364 #[inline] next(&mut self) -> Option<usize>365 fn next(&mut self) -> Option<usize> { 366 // SAFETY: We rely on the generic iterator to provide valid start 367 // and end pointers, but we guarantee that any pointer returned by 368 // 'find_raw' falls within the bounds of the start and end pointer. 369 unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } 370 } 371 372 #[inline] count(self) -> usize373 fn count(self) -> usize { 374 self.it.count(|s, e| { 375 // SAFETY: We rely on our generic iterator to return valid start 376 // and end pointers. 377 unsafe { self.searcher.count_raw(s, e) } 378 }) 379 } 380 381 #[inline] size_hint(&self) -> (usize, Option<usize>)382 fn size_hint(&self) -> (usize, Option<usize>) { 383 self.it.size_hint() 384 } 385 } 386 387 impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { 388 #[inline] next_back(&mut self) -> Option<usize>389 fn next_back(&mut self) -> Option<usize> { 390 // SAFETY: We rely on the generic iterator to provide valid start 391 // and end pointers, but we guarantee that any pointer returned by 392 // 'rfind_raw' falls within the bounds of the start and end pointer. 393 unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } 394 } 395 } 396 397 impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {} 398 399 /// Finds all occurrences of two bytes in a haystack. 400 /// 401 /// That is, this reports matches of one of two possible bytes. For example, 402 /// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, 403 /// `4` and `5`. 404 #[derive(Clone, Copy, Debug)] 405 pub struct Two(generic::Two<__m128i>); 406 407 impl Two { 408 /// Create a new searcher that finds occurrences of the needle bytes given. 409 /// 410 /// This particular searcher is specialized to use SSE2 vector instructions 411 /// that typically make it quite fast. 412 /// 413 /// If SSE2 is unavailable in the current environment, then `None` is 414 /// returned. 415 #[inline] new(needle1: u8, needle2: u8) -> Option<Two>416 pub fn new(needle1: u8, needle2: u8) -> Option<Two> { 417 if Two::is_available() { 418 // SAFETY: we check that sse2 is available above. 419 unsafe { Some(Two::new_unchecked(needle1, needle2)) } 420 } else { 421 None 422 } 423 } 424 425 /// Create a new finder specific to SSE2 vectors and routines without 426 /// checking that SSE2 is available. 427 /// 428 /// # Safety 429 /// 430 /// Callers must guarantee that it is safe to execute `sse2` instructions 431 /// in the current environment. 432 /// 433 /// Note that it is a common misconception that if one compiles for an 434 /// `x86_64` target, then they therefore automatically have access to SSE2 435 /// instructions. While this is almost always the case, it isn't true in 436 /// 100% of cases. 437 #[target_feature(enable = "sse2")] 438 #[inline] new_unchecked(needle1: u8, needle2: u8) -> Two439 pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two { 440 Two(generic::Two::new(needle1, needle2)) 441 } 442 443 /// Returns true when this implementation is available in the current 444 /// environment. 445 /// 446 /// When this is true, it is guaranteed that [`Two::new`] will return 447 /// a `Some` value. Similarly, when it is false, it is guaranteed that 448 /// `Two::new` will return a `None` value. 449 /// 450 /// Note also that for the lifetime of a single program, if this returns 451 /// true then it will always return true. 452 #[inline] is_available() -> bool453 pub fn is_available() -> bool { 454 #[cfg(target_feature = "sse2")] 455 { 456 true 457 } 458 #[cfg(not(target_feature = "sse2"))] 459 { 460 false 461 } 462 } 463 464 /// Return the first occurrence of one of the needle bytes in the given 465 /// haystack. If no such occurrence exists, then `None` is returned. 466 /// 467 /// The occurrence is reported as an offset into `haystack`. Its maximum 468 /// value is `haystack.len() - 1`. 469 #[inline] find(&self, haystack: &[u8]) -> Option<usize>470 pub fn find(&self, haystack: &[u8]) -> Option<usize> { 471 // SAFETY: `find_raw` guarantees that if a pointer is returned, it 472 // falls within the bounds of the start and end pointers. 473 unsafe { 474 generic::search_slice_with_raw(haystack, |s, e| { 475 self.find_raw(s, e) 476 }) 477 } 478 } 479 480 /// Return the last occurrence of one of the needle bytes in the given 481 /// haystack. If no such occurrence exists, then `None` is returned. 482 /// 483 /// The occurrence is reported as an offset into `haystack`. Its maximum 484 /// value is `haystack.len() - 1`. 485 #[inline] rfind(&self, haystack: &[u8]) -> Option<usize>486 pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { 487 // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it 488 // falls within the bounds of the start and end pointers. 489 unsafe { 490 generic::search_slice_with_raw(haystack, |s, e| { 491 self.rfind_raw(s, e) 492 }) 493 } 494 } 495 496 /// Like `find`, but accepts and returns raw pointers. 497 /// 498 /// When a match is found, the pointer returned is guaranteed to be 499 /// `>= start` and `< end`. 500 /// 501 /// This routine is useful if you're already using raw pointers and would 502 /// like to avoid converting back to a slice before executing a search. 503 /// 504 /// # Safety 505 /// 506 /// * Both `start` and `end` must be valid for reads. 507 /// * Both `start` and `end` must point to an initialized value. 508 /// * Both `start` and `end` must point to the same allocated object and 509 /// must either be in bounds or at most one byte past the end of the 510 /// allocated object. 511 /// * Both `start` and `end` must be _derived from_ a pointer to the same 512 /// object. 513 /// * The distance between `start` and `end` must not overflow `isize`. 514 /// * The distance being in bounds must not rely on "wrapping around" the 515 /// address space. 516 /// 517 /// Note that callers may pass a pair of pointers such that `start >= end`. 518 /// In that case, `None` will always be returned. 519 #[inline] find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>520 pub unsafe fn find_raw( 521 &self, 522 start: *const u8, 523 end: *const u8, 524 ) -> Option<*const u8> { 525 if start >= end { 526 return None; 527 } 528 if end.distance(start) < __m128i::BYTES { 529 // SAFETY: We require the caller to pass valid start/end pointers. 530 return generic::fwd_byte_by_byte(start, end, |b| { 531 b == self.0.needle1() || b == self.0.needle2() 532 }); 533 } 534 // SAFETY: Building a `Two` means it's safe to call 'sse2' routines. 535 // Also, we've checked that our haystack is big enough to run on the 536 // vector routine. Pointer validity is caller's responsibility. 537 // 538 // Note that we could call `self.0.find_raw` directly here. But that 539 // means we'd have to annotate this routine with `target_feature`. 540 // Which is fine, because this routine is `unsafe` anyway and the 541 // `target_feature` obligation is met by virtue of building a `Two`. 542 // The real problem is that a routine with a `target_feature` 543 // annotation generally can't be inlined into caller code unless the 544 // caller code has the same target feature annotations. Which is maybe 545 // okay for SSE2, but we do the same thing for AVX2 where caller code 546 // probably usually doesn't have AVX2 enabled. That means that this 547 // routine can be inlined which will handle some of the short-haystack 548 // cases above without touching the architecture specific code. 549 self.find_raw_impl(start, end) 550 } 551 552 /// Like `rfind`, but accepts and returns raw pointers. 553 /// 554 /// When a match is found, the pointer returned is guaranteed to be 555 /// `>= start` and `< end`. 556 /// 557 /// This routine is useful if you're already using raw pointers and would 558 /// like to avoid converting back to a slice before executing a search. 559 /// 560 /// # Safety 561 /// 562 /// * Both `start` and `end` must be valid for reads. 563 /// * Both `start` and `end` must point to an initialized value. 564 /// * Both `start` and `end` must point to the same allocated object and 565 /// must either be in bounds or at most one byte past the end of the 566 /// allocated object. 567 /// * Both `start` and `end` must be _derived from_ a pointer to the same 568 /// object. 569 /// * The distance between `start` and `end` must not overflow `isize`. 570 /// * The distance being in bounds must not rely on "wrapping around" the 571 /// address space. 572 /// 573 /// Note that callers may pass a pair of pointers such that `start >= end`. 574 /// In that case, `None` will always be returned. 575 #[inline] rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>576 pub unsafe fn rfind_raw( 577 &self, 578 start: *const u8, 579 end: *const u8, 580 ) -> Option<*const u8> { 581 if start >= end { 582 return None; 583 } 584 if end.distance(start) < __m128i::BYTES { 585 // SAFETY: We require the caller to pass valid start/end pointers. 586 return generic::rev_byte_by_byte(start, end, |b| { 587 b == self.0.needle1() || b == self.0.needle2() 588 }); 589 } 590 // SAFETY: Building a `Two` means it's safe to call 'sse2' routines. 591 // Also, we've checked that our haystack is big enough to run on the 592 // vector routine. Pointer validity is caller's responsibility. 593 // 594 // See note in forward routine above for why we don't just call 595 // `self.0.rfind_raw` directly here. 596 self.rfind_raw_impl(start, end) 597 } 598 599 /// Execute a search using SSE2 vectors and routines. 600 /// 601 /// # Safety 602 /// 603 /// Same as [`Two::find_raw`], except the distance between `start` and 604 /// `end` must be at least the size of an SSE2 vector (in bytes). 605 /// 606 /// (The target feature safety obligation is automatically fulfilled by 607 /// virtue of being a method on `Two`, which can only be constructed 608 /// when it is safe to call `sse2` routines.) 609 #[target_feature(enable = "sse2")] 610 #[inline] find_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>611 unsafe fn find_raw_impl( 612 &self, 613 start: *const u8, 614 end: *const u8, 615 ) -> Option<*const u8> { 616 self.0.find_raw(start, end) 617 } 618 619 /// Execute a search using SSE2 vectors and routines. 620 /// 621 /// # Safety 622 /// 623 /// Same as [`Two::rfind_raw`], except the distance between `start` and 624 /// `end` must be at least the size of an SSE2 vector (in bytes). 625 /// 626 /// (The target feature safety obligation is automatically fulfilled by 627 /// virtue of being a method on `Two`, which can only be constructed 628 /// when it is safe to call `sse2` routines.) 629 #[target_feature(enable = "sse2")] 630 #[inline] rfind_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>631 unsafe fn rfind_raw_impl( 632 &self, 633 start: *const u8, 634 end: *const u8, 635 ) -> Option<*const u8> { 636 self.0.rfind_raw(start, end) 637 } 638 639 /// Returns an iterator over all occurrences of the needle bytes in the 640 /// given haystack. 641 /// 642 /// The iterator returned implements `DoubleEndedIterator`. This means it 643 /// can also be used to find occurrences in reverse order. 644 #[inline] iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h>645 pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { 646 TwoIter { searcher: self, it: generic::Iter::new(haystack) } 647 } 648 } 649 650 /// An iterator over all occurrences of two possible bytes in a haystack. 651 /// 652 /// This iterator implements `DoubleEndedIterator`, which means it can also be 653 /// used to find occurrences in reverse order. 654 /// 655 /// This iterator is created by the [`Two::iter`] method. 656 /// 657 /// The lifetime parameters are as follows: 658 /// 659 /// * `'a` refers to the lifetime of the underlying [`Two`] searcher. 660 /// * `'h` refers to the lifetime of the haystack being searched. 661 #[derive(Clone, Debug)] 662 pub struct TwoIter<'a, 'h> { 663 searcher: &'a Two, 664 it: generic::Iter<'h>, 665 } 666 667 impl<'a, 'h> Iterator for TwoIter<'a, 'h> { 668 type Item = usize; 669 670 #[inline] next(&mut self) -> Option<usize>671 fn next(&mut self) -> Option<usize> { 672 // SAFETY: We rely on the generic iterator to provide valid start 673 // and end pointers, but we guarantee that any pointer returned by 674 // 'find_raw' falls within the bounds of the start and end pointer. 675 unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } 676 } 677 678 #[inline] size_hint(&self) -> (usize, Option<usize>)679 fn size_hint(&self) -> (usize, Option<usize>) { 680 self.it.size_hint() 681 } 682 } 683 684 impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { 685 #[inline] next_back(&mut self) -> Option<usize>686 fn next_back(&mut self) -> Option<usize> { 687 // SAFETY: We rely on the generic iterator to provide valid start 688 // and end pointers, but we guarantee that any pointer returned by 689 // 'rfind_raw' falls within the bounds of the start and end pointer. 690 unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } 691 } 692 } 693 694 impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {} 695 696 /// Finds all occurrences of three bytes in a haystack. 697 /// 698 /// That is, this reports matches of one of three possible bytes. For example, 699 /// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets 700 /// `0`, `2`, `3`, `4` and `5`. 701 #[derive(Clone, Copy, Debug)] 702 pub struct Three(generic::Three<__m128i>); 703 704 impl Three { 705 /// Create a new searcher that finds occurrences of the needle bytes given. 706 /// 707 /// This particular searcher is specialized to use SSE2 vector instructions 708 /// that typically make it quite fast. 709 /// 710 /// If SSE2 is unavailable in the current environment, then `None` is 711 /// returned. 712 #[inline] new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three>713 pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three> { 714 if Three::is_available() { 715 // SAFETY: we check that sse2 is available above. 716 unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) } 717 } else { 718 None 719 } 720 } 721 722 /// Create a new finder specific to SSE2 vectors and routines without 723 /// checking that SSE2 is available. 724 /// 725 /// # Safety 726 /// 727 /// Callers must guarantee that it is safe to execute `sse2` instructions 728 /// in the current environment. 729 /// 730 /// Note that it is a common misconception that if one compiles for an 731 /// `x86_64` target, then they therefore automatically have access to SSE2 732 /// instructions. While this is almost always the case, it isn't true in 733 /// 100% of cases. 734 #[target_feature(enable = "sse2")] 735 #[inline] new_unchecked( needle1: u8, needle2: u8, needle3: u8, ) -> Three736 pub unsafe fn new_unchecked( 737 needle1: u8, 738 needle2: u8, 739 needle3: u8, 740 ) -> Three { 741 Three(generic::Three::new(needle1, needle2, needle3)) 742 } 743 744 /// Returns true when this implementation is available in the current 745 /// environment. 746 /// 747 /// When this is true, it is guaranteed that [`Three::new`] will return 748 /// a `Some` value. Similarly, when it is false, it is guaranteed that 749 /// `Three::new` will return a `None` value. 750 /// 751 /// Note also that for the lifetime of a single program, if this returns 752 /// true then it will always return true. 753 #[inline] is_available() -> bool754 pub fn is_available() -> bool { 755 #[cfg(target_feature = "sse2")] 756 { 757 true 758 } 759 #[cfg(not(target_feature = "sse2"))] 760 { 761 false 762 } 763 } 764 765 /// Return the first occurrence of one of the needle bytes in the given 766 /// haystack. If no such occurrence exists, then `None` is returned. 767 /// 768 /// The occurrence is reported as an offset into `haystack`. Its maximum 769 /// value is `haystack.len() - 1`. 770 #[inline] find(&self, haystack: &[u8]) -> Option<usize>771 pub fn find(&self, haystack: &[u8]) -> Option<usize> { 772 // SAFETY: `find_raw` guarantees that if a pointer is returned, it 773 // falls within the bounds of the start and end pointers. 774 unsafe { 775 generic::search_slice_with_raw(haystack, |s, e| { 776 self.find_raw(s, e) 777 }) 778 } 779 } 780 781 /// Return the last occurrence of one of the needle bytes in the given 782 /// haystack. If no such occurrence exists, then `None` is returned. 783 /// 784 /// The occurrence is reported as an offset into `haystack`. Its maximum 785 /// value is `haystack.len() - 1`. 786 #[inline] rfind(&self, haystack: &[u8]) -> Option<usize>787 pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { 788 // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it 789 // falls within the bounds of the start and end pointers. 790 unsafe { 791 generic::search_slice_with_raw(haystack, |s, e| { 792 self.rfind_raw(s, e) 793 }) 794 } 795 } 796 797 /// Like `find`, but accepts and returns raw pointers. 798 /// 799 /// When a match is found, the pointer returned is guaranteed to be 800 /// `>= start` and `< end`. 801 /// 802 /// This routine is useful if you're already using raw pointers and would 803 /// like to avoid converting back to a slice before executing a search. 804 /// 805 /// # Safety 806 /// 807 /// * Both `start` and `end` must be valid for reads. 808 /// * Both `start` and `end` must point to an initialized value. 809 /// * Both `start` and `end` must point to the same allocated object and 810 /// must either be in bounds or at most one byte past the end of the 811 /// allocated object. 812 /// * Both `start` and `end` must be _derived from_ a pointer to the same 813 /// object. 814 /// * The distance between `start` and `end` must not overflow `isize`. 815 /// * The distance being in bounds must not rely on "wrapping around" the 816 /// address space. 817 /// 818 /// Note that callers may pass a pair of pointers such that `start >= end`. 819 /// In that case, `None` will always be returned. 820 #[inline] find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>821 pub unsafe fn find_raw( 822 &self, 823 start: *const u8, 824 end: *const u8, 825 ) -> Option<*const u8> { 826 if start >= end { 827 return None; 828 } 829 if end.distance(start) < __m128i::BYTES { 830 // SAFETY: We require the caller to pass valid start/end pointers. 831 return generic::fwd_byte_by_byte(start, end, |b| { 832 b == self.0.needle1() 833 || b == self.0.needle2() 834 || b == self.0.needle3() 835 }); 836 } 837 // SAFETY: Building a `Three` means it's safe to call 'sse2' routines. 838 // Also, we've checked that our haystack is big enough to run on the 839 // vector routine. Pointer validity is caller's responsibility. 840 // 841 // Note that we could call `self.0.find_raw` directly here. But that 842 // means we'd have to annotate this routine with `target_feature`. 843 // Which is fine, because this routine is `unsafe` anyway and the 844 // `target_feature` obligation is met by virtue of building a `Three`. 845 // The real problem is that a routine with a `target_feature` 846 // annotation generally can't be inlined into caller code unless the 847 // caller code has the same target feature annotations. Which is maybe 848 // okay for SSE2, but we do the same thing for AVX2 where caller code 849 // probably usually doesn't have AVX2 enabled. That means that this 850 // routine can be inlined which will handle some of the short-haystack 851 // cases above without touching the architecture specific code. 852 self.find_raw_impl(start, end) 853 } 854 855 /// Like `rfind`, but accepts and returns raw pointers. 856 /// 857 /// When a match is found, the pointer returned is guaranteed to be 858 /// `>= start` and `< end`. 859 /// 860 /// This routine is useful if you're already using raw pointers and would 861 /// like to avoid converting back to a slice before executing a search. 862 /// 863 /// # Safety 864 /// 865 /// * Both `start` and `end` must be valid for reads. 866 /// * Both `start` and `end` must point to an initialized value. 867 /// * Both `start` and `end` must point to the same allocated object and 868 /// must either be in bounds or at most one byte past the end of the 869 /// allocated object. 870 /// * Both `start` and `end` must be _derived from_ a pointer to the same 871 /// object. 872 /// * The distance between `start` and `end` must not overflow `isize`. 873 /// * The distance being in bounds must not rely on "wrapping around" the 874 /// address space. 875 /// 876 /// Note that callers may pass a pair of pointers such that `start >= end`. 877 /// In that case, `None` will always be returned. 878 #[inline] rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>879 pub unsafe fn rfind_raw( 880 &self, 881 start: *const u8, 882 end: *const u8, 883 ) -> Option<*const u8> { 884 if start >= end { 885 return None; 886 } 887 if end.distance(start) < __m128i::BYTES { 888 // SAFETY: We require the caller to pass valid start/end pointers. 889 return generic::rev_byte_by_byte(start, end, |b| { 890 b == self.0.needle1() 891 || b == self.0.needle2() 892 || b == self.0.needle3() 893 }); 894 } 895 // SAFETY: Building a `Three` means it's safe to call 'sse2' routines. 896 // Also, we've checked that our haystack is big enough to run on the 897 // vector routine. Pointer validity is caller's responsibility. 898 // 899 // See note in forward routine above for why we don't just call 900 // `self.0.rfind_raw` directly here. 901 self.rfind_raw_impl(start, end) 902 } 903 904 /// Execute a search using SSE2 vectors and routines. 905 /// 906 /// # Safety 907 /// 908 /// Same as [`Three::find_raw`], except the distance between `start` and 909 /// `end` must be at least the size of an SSE2 vector (in bytes). 910 /// 911 /// (The target feature safety obligation is automatically fulfilled by 912 /// virtue of being a method on `Three`, which can only be constructed 913 /// when it is safe to call `sse2` routines.) 914 #[target_feature(enable = "sse2")] 915 #[inline] find_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>916 unsafe fn find_raw_impl( 917 &self, 918 start: *const u8, 919 end: *const u8, 920 ) -> Option<*const u8> { 921 self.0.find_raw(start, end) 922 } 923 924 /// Execute a search using SSE2 vectors and routines. 925 /// 926 /// # Safety 927 /// 928 /// Same as [`Three::rfind_raw`], except the distance between `start` and 929 /// `end` must be at least the size of an SSE2 vector (in bytes). 930 /// 931 /// (The target feature safety obligation is automatically fulfilled by 932 /// virtue of being a method on `Three`, which can only be constructed 933 /// when it is safe to call `sse2` routines.) 934 #[target_feature(enable = "sse2")] 935 #[inline] rfind_raw_impl( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>936 unsafe fn rfind_raw_impl( 937 &self, 938 start: *const u8, 939 end: *const u8, 940 ) -> Option<*const u8> { 941 self.0.rfind_raw(start, end) 942 } 943 944 /// Returns an iterator over all occurrences of the needle byte in the 945 /// given haystack. 946 /// 947 /// The iterator returned implements `DoubleEndedIterator`. This means it 948 /// can also be used to find occurrences in reverse order. 949 #[inline] iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h>950 pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { 951 ThreeIter { searcher: self, it: generic::Iter::new(haystack) } 952 } 953 } 954 955 /// An iterator over all occurrences of three possible bytes in a haystack. 956 /// 957 /// This iterator implements `DoubleEndedIterator`, which means it can also be 958 /// used to find occurrences in reverse order. 959 /// 960 /// This iterator is created by the [`Three::iter`] method. 961 /// 962 /// The lifetime parameters are as follows: 963 /// 964 /// * `'a` refers to the lifetime of the underlying [`Three`] searcher. 965 /// * `'h` refers to the lifetime of the haystack being searched. 966 #[derive(Clone, Debug)] 967 pub struct ThreeIter<'a, 'h> { 968 searcher: &'a Three, 969 it: generic::Iter<'h>, 970 } 971 972 impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { 973 type Item = usize; 974 975 #[inline] next(&mut self) -> Option<usize>976 fn next(&mut self) -> Option<usize> { 977 // SAFETY: We rely on the generic iterator to provide valid start 978 // and end pointers, but we guarantee that any pointer returned by 979 // 'find_raw' falls within the bounds of the start and end pointer. 980 unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } 981 } 982 983 #[inline] size_hint(&self) -> (usize, Option<usize>)984 fn size_hint(&self) -> (usize, Option<usize>) { 985 self.it.size_hint() 986 } 987 } 988 989 impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { 990 #[inline] next_back(&mut self) -> Option<usize>991 fn next_back(&mut self) -> Option<usize> { 992 // SAFETY: We rely on the generic iterator to provide valid start 993 // and end pointers, but we guarantee that any pointer returned by 994 // 'rfind_raw' falls within the bounds of the start and end pointer. 995 unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } 996 } 997 } 998 999 impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {} 1000 1001 #[cfg(test)] 1002 mod tests { 1003 use super::*; 1004 1005 define_memchr_quickcheck!(super); 1006 1007 #[test] forward_one()1008 fn forward_one() { 1009 crate::tests::memchr::Runner::new(1).forward_iter( 1010 |haystack, needles| { 1011 Some(One::new(needles[0])?.iter(haystack).collect()) 1012 }, 1013 ) 1014 } 1015 1016 #[test] reverse_one()1017 fn reverse_one() { 1018 crate::tests::memchr::Runner::new(1).reverse_iter( 1019 |haystack, needles| { 1020 Some(One::new(needles[0])?.iter(haystack).rev().collect()) 1021 }, 1022 ) 1023 } 1024 1025 #[test] count_one()1026 fn count_one() { 1027 crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { 1028 Some(One::new(needles[0])?.iter(haystack).count()) 1029 }) 1030 } 1031 1032 #[test] forward_two()1033 fn forward_two() { 1034 crate::tests::memchr::Runner::new(2).forward_iter( 1035 |haystack, needles| { 1036 let n1 = needles.get(0).copied()?; 1037 let n2 = needles.get(1).copied()?; 1038 Some(Two::new(n1, n2)?.iter(haystack).collect()) 1039 }, 1040 ) 1041 } 1042 1043 #[test] reverse_two()1044 fn reverse_two() { 1045 crate::tests::memchr::Runner::new(2).reverse_iter( 1046 |haystack, needles| { 1047 let n1 = needles.get(0).copied()?; 1048 let n2 = needles.get(1).copied()?; 1049 Some(Two::new(n1, n2)?.iter(haystack).rev().collect()) 1050 }, 1051 ) 1052 } 1053 1054 #[test] forward_three()1055 fn forward_three() { 1056 crate::tests::memchr::Runner::new(3).forward_iter( 1057 |haystack, needles| { 1058 let n1 = needles.get(0).copied()?; 1059 let n2 = needles.get(1).copied()?; 1060 let n3 = needles.get(2).copied()?; 1061 Some(Three::new(n1, n2, n3)?.iter(haystack).collect()) 1062 }, 1063 ) 1064 } 1065 1066 #[test] reverse_three()1067 fn reverse_three() { 1068 crate::tests::memchr::Runner::new(3).reverse_iter( 1069 |haystack, needles| { 1070 let n1 = needles.get(0).copied()?; 1071 let n2 = needles.get(1).copied()?; 1072 let n3 = needles.get(2).copied()?; 1073 Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect()) 1074 }, 1075 ) 1076 } 1077 } 1078