1 /*! 2 A 128-bit vector implementation of the "packed pair" SIMD algorithm. 3 4 The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main 5 difference is that it (by default) uses a background distribution of byte 6 frequencies to heuristically select the pair of bytes to search for. 7 8 [generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last 9 */ 10 11 use core::arch::aarch64::uint8x16_t; 12 13 use crate::arch::{all::packedpair::Pair, generic::packedpair}; 14 15 /// A "packed pair" finder that uses 128-bit vector operations. 16 /// 17 /// This finder picks two bytes that it believes have high predictive power 18 /// for indicating an overall match of a needle. Depending on whether 19 /// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets 20 /// where the needle matches or could match. In the prefilter case, candidates 21 /// are reported whenever the [`Pair`] of bytes given matches. 22 #[derive(Clone, Copy, Debug)] 23 pub struct Finder(packedpair::Finder<uint8x16_t>); 24 25 /// A "packed pair" finder that uses 128-bit vector operations. 26 /// 27 /// This finder picks two bytes that it believes have high predictive power 28 /// for indicating an overall match of a needle. Depending on whether 29 /// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets 30 /// where the needle matches or could match. In the prefilter case, candidates 31 /// are reported whenever the [`Pair`] of bytes given matches. 32 impl Finder { 33 /// Create a new pair searcher. The searcher returned can either report 34 /// exact matches of `needle` or act as a prefilter and report candidate 35 /// positions of `needle`. 36 /// 37 /// If neon is unavailable in the current environment or if a [`Pair`] 38 /// could not be constructed from the needle given, then `None` is 39 /// returned. 40 #[inline] new(needle: &[u8]) -> Option<Finder>41 pub fn new(needle: &[u8]) -> Option<Finder> { 42 Finder::with_pair(needle, Pair::new(needle)?) 43 } 44 45 /// Create a new "packed pair" finder using the pair of bytes given. 46 /// 47 /// This constructor permits callers to control precisely which pair of 48 /// bytes is used as a predicate. 49 /// 50 /// If neon is unavailable in the current environment, then `None` is 51 /// returned. 52 #[inline] with_pair(needle: &[u8], pair: Pair) -> Option<Finder>53 pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> { 54 if Finder::is_available() { 55 // SAFETY: we check that sse2 is available above. We are also 56 // guaranteed to have needle.len() > 1 because we have a valid 57 // Pair. 58 unsafe { Some(Finder::with_pair_impl(needle, pair)) } 59 } else { 60 None 61 } 62 } 63 64 /// Create a new `Finder` specific to neon vectors and routines. 65 /// 66 /// # Safety 67 /// 68 /// Same as the safety for `packedpair::Finder::new`, and callers must also 69 /// ensure that neon is available. 70 #[target_feature(enable = "neon")] 71 #[inline] with_pair_impl(needle: &[u8], pair: Pair) -> Finder72 unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder { 73 let finder = packedpair::Finder::<uint8x16_t>::new(needle, pair); 74 Finder(finder) 75 } 76 77 /// Returns true when this implementation is available in the current 78 /// environment. 79 /// 80 /// When this is true, it is guaranteed that [`Finder::with_pair`] will 81 /// return a `Some` value. Similarly, when it is false, it is guaranteed 82 /// that `Finder::with_pair` will return a `None` value. Notice that this 83 /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely, 84 /// even when `Finder::is_available` is true, it is not guaranteed that a 85 /// valid [`Pair`] can be found from the needle given. 86 /// 87 /// Note also that for the lifetime of a single program, if this returns 88 /// true then it will always return true. 89 #[inline] is_available() -> bool90 pub fn is_available() -> bool { 91 #[cfg(target_feature = "neon")] 92 { 93 true 94 } 95 #[cfg(not(target_feature = "neon"))] 96 { 97 false 98 } 99 } 100 101 /// Execute a search using neon vectors and routines. 102 /// 103 /// # Panics 104 /// 105 /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. 106 #[inline] find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize>107 pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> { 108 // SAFETY: Building a `Finder` means it's safe to call 'neon' routines. 109 unsafe { self.find_impl(haystack, needle) } 110 } 111 112 /// Execute a search using neon vectors and routines. 113 /// 114 /// # Panics 115 /// 116 /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. 117 #[inline] find_prefilter(&self, haystack: &[u8]) -> Option<usize>118 pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> { 119 // SAFETY: Building a `Finder` means it's safe to call 'neon' routines. 120 unsafe { self.find_prefilter_impl(haystack) } 121 } 122 123 /// Execute a search using neon vectors and routines. 124 /// 125 /// # Panics 126 /// 127 /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. 128 /// 129 /// # Safety 130 /// 131 /// (The target feature safety obligation is automatically fulfilled by 132 /// virtue of being a method on `Finder`, which can only be constructed 133 /// when it is safe to call `neon` routines.) 134 #[target_feature(enable = "neon")] 135 #[inline] find_impl( &self, haystack: &[u8], needle: &[u8], ) -> Option<usize>136 unsafe fn find_impl( 137 &self, 138 haystack: &[u8], 139 needle: &[u8], 140 ) -> Option<usize> { 141 self.0.find(haystack, needle) 142 } 143 144 /// Execute a prefilter search using neon vectors and routines. 145 /// 146 /// # Panics 147 /// 148 /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. 149 /// 150 /// # Safety 151 /// 152 /// (The target feature safety obligation is automatically fulfilled by 153 /// virtue of being a method on `Finder`, which can only be constructed 154 /// when it is safe to call `neon` routines.) 155 #[target_feature(enable = "neon")] 156 #[inline] find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize>157 unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize> { 158 self.0.find_prefilter(haystack) 159 } 160 161 /// Returns the pair of offsets (into the needle) used to check as a 162 /// predicate before confirming whether a needle exists at a particular 163 /// position. 164 #[inline] pair(&self) -> &Pair165 pub fn pair(&self) -> &Pair { 166 self.0.pair() 167 } 168 169 /// Returns the minimum haystack length that this `Finder` can search. 170 /// 171 /// Using a haystack with length smaller than this in a search will result 172 /// in a panic. The reason for this restriction is that this finder is 173 /// meant to be a low-level component that is part of a larger substring 174 /// strategy. In that sense, it avoids trying to handle all cases and 175 /// instead only handles the cases that it can handle very well. 176 #[inline] min_haystack_len(&self) -> usize177 pub fn min_haystack_len(&self) -> usize { 178 self.0.min_haystack_len() 179 } 180 } 181 182 #[cfg(test)] 183 mod tests { 184 use super::*; 185 find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>>186 fn find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>> { 187 let f = Finder::new(needle)?; 188 if haystack.len() < f.min_haystack_len() { 189 return None; 190 } 191 Some(f.find(haystack, needle)) 192 } 193 194 define_substring_forward_quickcheck!(find); 195 196 #[test] forward_substring()197 fn forward_substring() { 198 crate::tests::substring::Runner::new().fwd(find).run() 199 } 200 201 #[test] forward_packedpair()202 fn forward_packedpair() { 203 fn find( 204 haystack: &[u8], 205 needle: &[u8], 206 index1: u8, 207 index2: u8, 208 ) -> Option<Option<usize>> { 209 let pair = Pair::with_indices(needle, index1, index2)?; 210 let f = Finder::with_pair(needle, pair)?; 211 if haystack.len() < f.min_haystack_len() { 212 return None; 213 } 214 Some(f.find(haystack, needle)) 215 } 216 crate::tests::packedpair::Runner::new().fwd(find).run() 217 } 218 219 #[test] forward_packedpair_prefilter()220 fn forward_packedpair_prefilter() { 221 fn find( 222 haystack: &[u8], 223 needle: &[u8], 224 index1: u8, 225 index2: u8, 226 ) -> Option<Option<usize>> { 227 let pair = Pair::with_indices(needle, index1, index2)?; 228 let f = Finder::with_pair(needle, pair)?; 229 if haystack.len() < f.min_haystack_len() { 230 return None; 231 } 232 Some(f.find_prefilter(haystack)) 233 } 234 crate::tests::packedpair::Runner::new().fwd(find).run() 235 } 236 } 237