1 /*! 2 An implementation of the [Shift-Or substring search algorithm][shiftor]. 3 4 [shiftor]: https://en.wikipedia.org/wiki/Bitap_algorithm 5 */ 6 7 use alloc::boxed::Box; 8 9 /// The type of our mask. 10 /// 11 /// While we don't expose anyway to configure this in the public API, if one 12 /// really needs less memory usage or support for longer needles, then it is 13 /// suggested to copy the code from this module and modify it to fit your 14 /// needs. The code below is written to be correct regardless of whether Mask 15 /// is a u8, u16, u32, u64 or u128. 16 type Mask = u16; 17 18 /// A forward substring searcher using the Shift-Or algorithm. 19 #[derive(Debug)] 20 pub struct Finder { 21 masks: Box<[Mask; 256]>, 22 needle_len: usize, 23 } 24 25 impl Finder { 26 const MAX_NEEDLE_LEN: usize = (Mask::BITS - 1) as usize; 27 28 /// Create a new Shift-Or forward searcher for the given `needle`. 29 /// 30 /// The needle may be empty. The empty needle matches at every byte offset. 31 #[inline] new(needle: &[u8]) -> Option<Finder>32 pub fn new(needle: &[u8]) -> Option<Finder> { 33 let needle_len = needle.len(); 34 if needle_len > Finder::MAX_NEEDLE_LEN { 35 // A match is found when bit 7 is set in 'result' in the search 36 // routine below. So our needle can't be bigger than 7. We could 37 // permit bigger needles by using u16, u32 or u64 for our mask 38 // entries. But this is all we need for this example. 39 return None; 40 } 41 let mut searcher = Finder { masks: Box::from([!0; 256]), needle_len }; 42 for (i, &byte) in needle.iter().enumerate() { 43 searcher.masks[usize::from(byte)] &= !(1 << i); 44 } 45 Some(searcher) 46 } 47 48 /// Return the first occurrence of the needle given to `Finder::new` in 49 /// the `haystack` given. If no such occurrence exists, then `None` is 50 /// returned. 51 /// 52 /// Unlike most other substring search implementations in this crate, this 53 /// finder does not require passing the needle at search time. A match can 54 /// be determined without the needle at all since the required information 55 /// is already encoded into this finder at construction time. 56 /// 57 /// The maximum value this can return is `haystack.len()`, which can only 58 /// occur when the needle and haystack both have length zero. Otherwise, 59 /// for non-empty haystacks, the maximum value is `haystack.len() - 1`. 60 #[inline] find(&self, haystack: &[u8]) -> Option<usize>61 pub fn find(&self, haystack: &[u8]) -> Option<usize> { 62 if self.needle_len == 0 { 63 return Some(0); 64 } 65 let mut result = !1; 66 for (i, &byte) in haystack.iter().enumerate() { 67 result |= self.masks[usize::from(byte)]; 68 result <<= 1; 69 if result & (1 << self.needle_len) == 0 { 70 return Some(i + 1 - self.needle_len); 71 } 72 } 73 None 74 } 75 } 76 77 #[cfg(test)] 78 mod tests { 79 use super::*; 80 81 define_substring_forward_quickcheck!(|h, n| Some(Finder::new(n)?.find(h))); 82 83 #[test] forward()84 fn forward() { 85 crate::tests::substring::Runner::new() 86 .fwd(|h, n| Some(Finder::new(n)?.find(h))) 87 .run(); 88 } 89 } 90