1 /*! 2 A 256-bit vector implementation of the "packed pair" SIMD algorithm. 3 4 The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main 5 difference is that it (by default) uses a background distribution of byte 6 frequencies to heuristically select the pair of bytes to search for. 7 8 [generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last 9 */ 10 11 use core::arch::x86_64::{__m128i, __m256i}; 12 13 use crate::arch::{all::packedpair::Pair, generic::packedpair}; 14 15 /// A "packed pair" finder that uses 256-bit vector operations. 16 /// 17 /// This finder picks two bytes that it believes have high predictive power 18 /// for indicating an overall match of a needle. Depending on whether 19 /// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets 20 /// where the needle matches or could match. In the prefilter case, candidates 21 /// are reported whenever the [`Pair`] of bytes given matches. 22 #[derive(Clone, Copy, Debug)] 23 pub struct Finder { 24 sse2: packedpair::Finder<__m128i>, 25 avx2: packedpair::Finder<__m256i>, 26 } 27 28 impl Finder { 29 /// Create a new pair searcher. The searcher returned can either report 30 /// exact matches of `needle` or act as a prefilter and report candidate 31 /// positions of `needle`. 32 /// 33 /// If AVX2 is unavailable in the current environment or if a [`Pair`] 34 /// could not be constructed from the needle given, then `None` is 35 /// returned. 36 #[inline] new(needle: &[u8]) -> Option<Finder>37 pub fn new(needle: &[u8]) -> Option<Finder> { 38 Finder::with_pair(needle, Pair::new(needle)?) 39 } 40 41 /// Create a new "packed pair" finder using the pair of bytes given. 42 /// 43 /// This constructor permits callers to control precisely which pair of 44 /// bytes is used as a predicate. 45 /// 46 /// If AVX2 is unavailable in the current environment, then `None` is 47 /// returned. 48 #[inline] with_pair(needle: &[u8], pair: Pair) -> Option<Finder>49 pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> { 50 if Finder::is_available() { 51 // SAFETY: we check that sse2/avx2 is available above. We are also 52 // guaranteed to have needle.len() > 1 because we have a valid 53 // Pair. 54 unsafe { Some(Finder::with_pair_impl(needle, pair)) } 55 } else { 56 None 57 } 58 } 59 60 /// Create a new `Finder` specific to SSE2 vectors and routines. 61 /// 62 /// # Safety 63 /// 64 /// Same as the safety for `packedpair::Finder::new`, and callers must also 65 /// ensure that both SSE2 and AVX2 are available. 66 #[target_feature(enable = "sse2", enable = "avx2")] 67 #[inline] with_pair_impl(needle: &[u8], pair: Pair) -> Finder68 unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder { 69 let sse2 = packedpair::Finder::<__m128i>::new(needle, pair); 70 let avx2 = packedpair::Finder::<__m256i>::new(needle, pair); 71 Finder { sse2, avx2 } 72 } 73 74 /// Returns true when this implementation is available in the current 75 /// environment. 76 /// 77 /// When this is true, it is guaranteed that [`Finder::with_pair`] will 78 /// return a `Some` value. Similarly, when it is false, it is guaranteed 79 /// that `Finder::with_pair` will return a `None` value. Notice that this 80 /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely, 81 /// even when `Finder::is_available` is true, it is not guaranteed that a 82 /// valid [`Pair`] can be found from the needle given. 83 /// 84 /// Note also that for the lifetime of a single program, if this returns 85 /// true then it will always return true. 86 #[inline] is_available() -> bool87 pub fn is_available() -> bool { 88 #[cfg(not(target_feature = "sse2"))] 89 { 90 false 91 } 92 #[cfg(target_feature = "sse2")] 93 { 94 #[cfg(target_feature = "avx2")] 95 { 96 true 97 } 98 #[cfg(not(target_feature = "avx2"))] 99 { 100 #[cfg(feature = "std")] 101 { 102 std::is_x86_feature_detected!("avx2") 103 } 104 #[cfg(not(feature = "std"))] 105 { 106 false 107 } 108 } 109 } 110 } 111 112 /// Execute a search using AVX2 vectors and routines. 113 /// 114 /// # Panics 115 /// 116 /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. 117 #[inline] find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize>118 pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> { 119 // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines. 120 unsafe { self.find_impl(haystack, needle) } 121 } 122 123 /// Run this finder on the given haystack as a prefilter. 124 /// 125 /// If a candidate match is found, then an offset where the needle *could* 126 /// begin in the haystack is returned. 127 /// 128 /// # Panics 129 /// 130 /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. 131 #[inline] find_prefilter(&self, haystack: &[u8]) -> Option<usize>132 pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> { 133 // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines. 134 unsafe { self.find_prefilter_impl(haystack) } 135 } 136 137 /// Execute a search using AVX2 vectors and routines. 138 /// 139 /// # Panics 140 /// 141 /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. 142 /// 143 /// # Safety 144 /// 145 /// (The target feature safety obligation is automatically fulfilled by 146 /// virtue of being a method on `Finder`, which can only be constructed 147 /// when it is safe to call `sse2` and `avx2` routines.) 148 #[target_feature(enable = "sse2", enable = "avx2")] 149 #[inline] find_impl( &self, haystack: &[u8], needle: &[u8], ) -> Option<usize>150 unsafe fn find_impl( 151 &self, 152 haystack: &[u8], 153 needle: &[u8], 154 ) -> Option<usize> { 155 if haystack.len() < self.avx2.min_haystack_len() { 156 self.sse2.find(haystack, needle) 157 } else { 158 self.avx2.find(haystack, needle) 159 } 160 } 161 162 /// Execute a prefilter search using AVX2 vectors and routines. 163 /// 164 /// # Panics 165 /// 166 /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. 167 /// 168 /// # Safety 169 /// 170 /// (The target feature safety obligation is automatically fulfilled by 171 /// virtue of being a method on `Finder`, which can only be constructed 172 /// when it is safe to call `sse2` and `avx2` routines.) 173 #[target_feature(enable = "sse2", enable = "avx2")] 174 #[inline] find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize>175 unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize> { 176 if haystack.len() < self.avx2.min_haystack_len() { 177 self.sse2.find_prefilter(haystack) 178 } else { 179 self.avx2.find_prefilter(haystack) 180 } 181 } 182 183 /// Returns the pair of offsets (into the needle) used to check as a 184 /// predicate before confirming whether a needle exists at a particular 185 /// position. 186 #[inline] pair(&self) -> &Pair187 pub fn pair(&self) -> &Pair { 188 self.avx2.pair() 189 } 190 191 /// Returns the minimum haystack length that this `Finder` can search. 192 /// 193 /// Using a haystack with length smaller than this in a search will result 194 /// in a panic. The reason for this restriction is that this finder is 195 /// meant to be a low-level component that is part of a larger substring 196 /// strategy. In that sense, it avoids trying to handle all cases and 197 /// instead only handles the cases that it can handle very well. 198 #[inline] min_haystack_len(&self) -> usize199 pub fn min_haystack_len(&self) -> usize { 200 // The caller doesn't need to care about AVX2's min_haystack_len 201 // since this implementation will automatically switch to the SSE2 202 // implementation if the haystack is too short for AVX2. Therefore, the 203 // caller only needs to care about SSE2's min_haystack_len. 204 // 205 // This does assume that SSE2's min_haystack_len is less than or 206 // equal to AVX2's min_haystack_len. In practice, this is true and 207 // there is no way it could be false based on how this Finder is 208 // implemented. Namely, both SSE2 and AVX2 use the same `Pair`. If 209 // they used different pairs, then it's possible (although perhaps 210 // pathological) for SSE2's min_haystack_len to be bigger than AVX2's. 211 self.sse2.min_haystack_len() 212 } 213 } 214 215 #[cfg(test)] 216 mod tests { 217 use super::*; 218 find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>>219 fn find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>> { 220 let f = Finder::new(needle)?; 221 if haystack.len() < f.min_haystack_len() { 222 return None; 223 } 224 Some(f.find(haystack, needle)) 225 } 226 227 define_substring_forward_quickcheck!(find); 228 229 #[test] forward_substring()230 fn forward_substring() { 231 crate::tests::substring::Runner::new().fwd(find).run() 232 } 233 234 #[test] forward_packedpair()235 fn forward_packedpair() { 236 fn find( 237 haystack: &[u8], 238 needle: &[u8], 239 index1: u8, 240 index2: u8, 241 ) -> Option<Option<usize>> { 242 let pair = Pair::with_indices(needle, index1, index2)?; 243 let f = Finder::with_pair(needle, pair)?; 244 if haystack.len() < f.min_haystack_len() { 245 return None; 246 } 247 Some(f.find(haystack, needle)) 248 } 249 crate::tests::packedpair::Runner::new().fwd(find).run() 250 } 251 252 #[test] forward_packedpair_prefilter()253 fn forward_packedpair_prefilter() { 254 fn find( 255 haystack: &[u8], 256 needle: &[u8], 257 index1: u8, 258 index2: u8, 259 ) -> Option<Option<usize>> { 260 if !cfg!(target_feature = "sse2") { 261 return None; 262 } 263 let pair = Pair::with_indices(needle, index1, index2)?; 264 let f = Finder::with_pair(needle, pair)?; 265 if haystack.len() < f.min_haystack_len() { 266 return None; 267 } 268 Some(f.find_prefilter(haystack)) 269 } 270 crate::tests::packedpair::Runner::new().fwd(find).run() 271 } 272 } 273