1 /*! 2 A 128-bit vector implementation of the "packed pair" SIMD algorithm. 3 4 The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main 5 difference is that it (by default) uses a background distribution of byte 6 frequencies to heuristically select the pair of bytes to search for. 7 8 [generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last 9 */ 10 11 use core::arch::wasm32::v128; 12 13 use crate::arch::{all::packedpair::Pair, generic::packedpair}; 14 15 /// A "packed pair" finder that uses 128-bit vector operations. 16 /// 17 /// This finder picks two bytes that it believes have high predictive power 18 /// for indicating an overall match of a needle. Depending on whether 19 /// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets 20 /// where the needle matches or could match. In the prefilter case, candidates 21 /// are reported whenever the [`Pair`] of bytes given matches. 22 #[derive(Clone, Copy, Debug)] 23 pub struct Finder(packedpair::Finder<v128>); 24 25 impl Finder { 26 /// Create a new pair searcher. The searcher returned can either report 27 /// exact matches of `needle` or act as a prefilter and report candidate 28 /// positions of `needle`. 29 /// 30 /// If simd128 is unavailable in the current environment or if a [`Pair`] 31 /// could not be constructed from the needle given, then `None` is 32 /// returned. 33 #[inline] new(needle: &[u8]) -> Option<Finder>34 pub fn new(needle: &[u8]) -> Option<Finder> { 35 Finder::with_pair(needle, Pair::new(needle)?) 36 } 37 38 /// Create a new "packed pair" finder using the pair of bytes given. 39 /// 40 /// This constructor permits callers to control precisely which pair of 41 /// bytes is used as a predicate. 42 /// 43 /// If simd128 is unavailable in the current environment, then `None` is 44 /// returned. 45 #[inline] with_pair(needle: &[u8], pair: Pair) -> Option<Finder>46 pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> { 47 if Finder::is_available() { 48 // SAFETY: we check that simd128 is available above. We are also 49 // guaranteed to have needle.len() > 1 because we have a valid 50 // Pair. 51 unsafe { Some(Finder::with_pair_impl(needle, pair)) } 52 } else { 53 None 54 } 55 } 56 57 /// Create a new `Finder` specific to simd128 vectors and routines. 58 /// 59 /// # Safety 60 /// 61 /// Same as the safety for `packedpair::Finder::new`, and callers must also 62 /// ensure that simd128 is available. 63 #[target_feature(enable = "simd128")] 64 #[inline] with_pair_impl(needle: &[u8], pair: Pair) -> Finder65 unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder { 66 let finder = packedpair::Finder::<v128>::new(needle, pair); 67 Finder(finder) 68 } 69 70 /// Returns true when this implementation is available in the current 71 /// environment. 72 /// 73 /// When this is true, it is guaranteed that [`Finder::with_pair`] will 74 /// return a `Some` value. Similarly, when it is false, it is guaranteed 75 /// that `Finder::with_pair` will return a `None` value. Notice that this 76 /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely, 77 /// even when `Finder::is_available` is true, it is not guaranteed that a 78 /// valid [`Pair`] can be found from the needle given. 79 /// 80 /// Note also that for the lifetime of a single program, if this returns 81 /// true then it will always return true. 82 #[inline] is_available() -> bool83 pub fn is_available() -> bool { 84 // We used to gate on `cfg(target_feature = "simd128")` here, but 85 // we've since required the feature to be enabled at compile time to 86 // even include this module at all. Therefore, it is always enabled 87 // in this context. See the linked issue for why this was changed. 88 // 89 // Ref: https://github.com/BurntSushi/memchr/issues/144 90 true 91 } 92 93 /// Execute a search using wasm32 v128 vectors and routines. 94 /// 95 /// # Panics 96 /// 97 /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. 98 #[inline] find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize>99 pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> { 100 self.find_impl(haystack, needle) 101 } 102 103 /// Execute a search using wasm32 v128 vectors and routines. 104 /// 105 /// # Panics 106 /// 107 /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. 108 #[inline] find_prefilter(&self, haystack: &[u8]) -> Option<usize>109 pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> { 110 self.find_prefilter_impl(haystack) 111 } 112 113 /// Execute a search using wasm32 v128 vectors and routines. 114 /// 115 /// # Panics 116 /// 117 /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. 118 /// 119 /// # Safety 120 /// 121 /// (The target feature safety obligation is automatically fulfilled by 122 /// virtue of being a method on `Finder`, which can only be constructed 123 /// when it is safe to call `simd128` routines.) 124 #[target_feature(enable = "simd128")] 125 #[inline] find_impl(&self, haystack: &[u8], needle: &[u8]) -> Option<usize>126 fn find_impl(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> { 127 // SAFETY: The target feature safety obligation is automatically 128 // fulfilled by virtue of being a method on `Finder`, which can only be 129 // constructed when it is safe to call `simd128` routines. 130 unsafe { self.0.find(haystack, needle) } 131 } 132 133 /// Execute a prefilter search using wasm32 v128 vectors and routines. 134 /// 135 /// # Panics 136 /// 137 /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. 138 /// 139 /// # Safety 140 /// 141 /// (The target feature safety obligation is automatically fulfilled by 142 /// virtue of being a method on `Finder`, which can only be constructed 143 /// when it is safe to call `simd128` routines.) 144 #[target_feature(enable = "simd128")] 145 #[inline] find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize>146 fn find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize> { 147 // SAFETY: The target feature safety obligation is automatically 148 // fulfilled by virtue of being a method on `Finder`, which can only be 149 // constructed when it is safe to call `simd128` routines. 150 unsafe { self.0.find_prefilter(haystack) } 151 } 152 153 /// Returns the pair of offsets (into the needle) used to check as a 154 /// predicate before confirming whether a needle exists at a particular 155 /// position. 156 #[inline] pair(&self) -> &Pair157 pub fn pair(&self) -> &Pair { 158 self.0.pair() 159 } 160 161 /// Returns the minimum haystack length that this `Finder` can search. 162 /// 163 /// Using a haystack with length smaller than this in a search will result 164 /// in a panic. The reason for this restriction is that this finder is 165 /// meant to be a low-level component that is part of a larger substring 166 /// strategy. In that sense, it avoids trying to handle all cases and 167 /// instead only handles the cases that it can handle very well. 168 #[inline] min_haystack_len(&self) -> usize169 pub fn min_haystack_len(&self) -> usize { 170 self.0.min_haystack_len() 171 } 172 } 173 174 #[cfg(test)] 175 mod tests { 176 use super::*; 177 find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>>178 fn find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>> { 179 let f = Finder::new(needle)?; 180 if haystack.len() < f.min_haystack_len() { 181 return None; 182 } 183 Some(f.find(haystack, needle)) 184 } 185 186 define_substring_forward_quickcheck!(find); 187 188 #[test] forward_substring()189 fn forward_substring() { 190 crate::tests::substring::Runner::new().fwd(find).run() 191 } 192 193 #[test] forward_packedpair()194 fn forward_packedpair() { 195 fn find( 196 haystack: &[u8], 197 needle: &[u8], 198 index1: u8, 199 index2: u8, 200 ) -> Option<Option<usize>> { 201 let pair = Pair::with_indices(needle, index1, index2)?; 202 let f = Finder::with_pair(needle, pair)?; 203 if haystack.len() < f.min_haystack_len() { 204 return None; 205 } 206 Some(f.find(haystack, needle)) 207 } 208 crate::tests::packedpair::Runner::new().fwd(find).run() 209 } 210 211 #[test] forward_packedpair_prefilter()212 fn forward_packedpair_prefilter() { 213 fn find( 214 haystack: &[u8], 215 needle: &[u8], 216 index1: u8, 217 index2: u8, 218 ) -> Option<Option<usize>> { 219 let pair = Pair::with_indices(needle, index1, index2)?; 220 let f = Finder::with_pair(needle, pair)?; 221 if haystack.len() < f.min_haystack_len() { 222 return None; 223 } 224 Some(f.find_prefilter(haystack)) 225 } 226 crate::tests::packedpair::Runner::new().fwd(find).run() 227 } 228 } 229