1 /*!
2 A 128-bit vector implementation of the "packed pair" SIMD algorithm.
3 
4 The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main
5 difference is that it (by default) uses a background distribution of byte
6 frequencies to heuristically select the pair of bytes to search for.
7 
8 [generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last
9 */
10 
11 use core::arch::wasm32::v128;
12 
13 use crate::arch::{all::packedpair::Pair, generic::packedpair};
14 
15 /// A "packed pair" finder that uses 128-bit vector operations.
16 ///
17 /// This finder picks two bytes that it believes have high predictive power
18 /// for indicating an overall match of a needle. Depending on whether
19 /// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets
20 /// where the needle matches or could match. In the prefilter case, candidates
21 /// are reported whenever the [`Pair`] of bytes given matches.
22 #[derive(Clone, Copy, Debug)]
23 pub struct Finder(packedpair::Finder<v128>);
24 
25 impl Finder {
26     /// Create a new pair searcher. The searcher returned can either report
27     /// exact matches of `needle` or act as a prefilter and report candidate
28     /// positions of `needle`.
29     ///
30     /// If simd128 is unavailable in the current environment or if a [`Pair`]
31     /// could not be constructed from the needle given, then `None` is
32     /// returned.
33     #[inline]
new(needle: &[u8]) -> Option<Finder>34     pub fn new(needle: &[u8]) -> Option<Finder> {
35         Finder::with_pair(needle, Pair::new(needle)?)
36     }
37 
38     /// Create a new "packed pair" finder using the pair of bytes given.
39     ///
40     /// This constructor permits callers to control precisely which pair of
41     /// bytes is used as a predicate.
42     ///
43     /// If simd128 is unavailable in the current environment, then `None` is
44     /// returned.
45     #[inline]
with_pair(needle: &[u8], pair: Pair) -> Option<Finder>46     pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> {
47         if Finder::is_available() {
48             // SAFETY: we check that simd128 is available above. We are also
49             // guaranteed to have needle.len() > 1 because we have a valid
50             // Pair.
51             unsafe { Some(Finder::with_pair_impl(needle, pair)) }
52         } else {
53             None
54         }
55     }
56 
57     /// Create a new `Finder` specific to simd128 vectors and routines.
58     ///
59     /// # Safety
60     ///
61     /// Same as the safety for `packedpair::Finder::new`, and callers must also
62     /// ensure that simd128 is available.
63     #[target_feature(enable = "simd128")]
64     #[inline]
with_pair_impl(needle: &[u8], pair: Pair) -> Finder65     unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder {
66         let finder = packedpair::Finder::<v128>::new(needle, pair);
67         Finder(finder)
68     }
69 
70     /// Returns true when this implementation is available in the current
71     /// environment.
72     ///
73     /// When this is true, it is guaranteed that [`Finder::with_pair`] will
74     /// return a `Some` value. Similarly, when it is false, it is guaranteed
75     /// that `Finder::with_pair` will return a `None` value. Notice that this
76     /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely,
77     /// even when `Finder::is_available` is true, it is not guaranteed that a
78     /// valid [`Pair`] can be found from the needle given.
79     ///
80     /// Note also that for the lifetime of a single program, if this returns
81     /// true then it will always return true.
82     #[inline]
is_available() -> bool83     pub fn is_available() -> bool {
84         // We used to gate on `cfg(target_feature = "simd128")` here, but
85         // we've since required the feature to be enabled at compile time to
86         // even include this module at all. Therefore, it is always enabled
87         // in this context. See the linked issue for why this was changed.
88         //
89         // Ref: https://github.com/BurntSushi/memchr/issues/144
90         true
91     }
92 
93     /// Execute a search using wasm32 v128 vectors and routines.
94     ///
95     /// # Panics
96     ///
97     /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
98     #[inline]
find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize>99     pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> {
100         self.find_impl(haystack, needle)
101     }
102 
103     /// Execute a search using wasm32 v128 vectors and routines.
104     ///
105     /// # Panics
106     ///
107     /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
108     #[inline]
find_prefilter(&self, haystack: &[u8]) -> Option<usize>109     pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> {
110         self.find_prefilter_impl(haystack)
111     }
112 
113     /// Execute a search using wasm32 v128 vectors and routines.
114     ///
115     /// # Panics
116     ///
117     /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
118     ///
119     /// # Safety
120     ///
121     /// (The target feature safety obligation is automatically fulfilled by
122     /// virtue of being a method on `Finder`, which can only be constructed
123     /// when it is safe to call `simd128` routines.)
124     #[target_feature(enable = "simd128")]
125     #[inline]
find_impl(&self, haystack: &[u8], needle: &[u8]) -> Option<usize>126     fn find_impl(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> {
127         // SAFETY: The target feature safety obligation is automatically
128         // fulfilled by virtue of being a method on `Finder`, which can only be
129         // constructed when it is safe to call `simd128` routines.
130         unsafe { self.0.find(haystack, needle) }
131     }
132 
133     /// Execute a prefilter search using wasm32 v128 vectors and routines.
134     ///
135     /// # Panics
136     ///
137     /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
138     ///
139     /// # Safety
140     ///
141     /// (The target feature safety obligation is automatically fulfilled by
142     /// virtue of being a method on `Finder`, which can only be constructed
143     /// when it is safe to call `simd128` routines.)
144     #[target_feature(enable = "simd128")]
145     #[inline]
find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize>146     fn find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize> {
147         // SAFETY: The target feature safety obligation is automatically
148         // fulfilled by virtue of being a method on `Finder`, which can only be
149         // constructed when it is safe to call `simd128` routines.
150         unsafe { self.0.find_prefilter(haystack) }
151     }
152 
153     /// Returns the pair of offsets (into the needle) used to check as a
154     /// predicate before confirming whether a needle exists at a particular
155     /// position.
156     #[inline]
pair(&self) -> &Pair157     pub fn pair(&self) -> &Pair {
158         self.0.pair()
159     }
160 
161     /// Returns the minimum haystack length that this `Finder` can search.
162     ///
163     /// Using a haystack with length smaller than this in a search will result
164     /// in a panic. The reason for this restriction is that this finder is
165     /// meant to be a low-level component that is part of a larger substring
166     /// strategy. In that sense, it avoids trying to handle all cases and
167     /// instead only handles the cases that it can handle very well.
168     #[inline]
min_haystack_len(&self) -> usize169     pub fn min_haystack_len(&self) -> usize {
170         self.0.min_haystack_len()
171     }
172 }
173 
174 #[cfg(test)]
175 mod tests {
176     use super::*;
177 
find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>>178     fn find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>> {
179         let f = Finder::new(needle)?;
180         if haystack.len() < f.min_haystack_len() {
181             return None;
182         }
183         Some(f.find(haystack, needle))
184     }
185 
186     define_substring_forward_quickcheck!(find);
187 
188     #[test]
forward_substring()189     fn forward_substring() {
190         crate::tests::substring::Runner::new().fwd(find).run()
191     }
192 
193     #[test]
forward_packedpair()194     fn forward_packedpair() {
195         fn find(
196             haystack: &[u8],
197             needle: &[u8],
198             index1: u8,
199             index2: u8,
200         ) -> Option<Option<usize>> {
201             let pair = Pair::with_indices(needle, index1, index2)?;
202             let f = Finder::with_pair(needle, pair)?;
203             if haystack.len() < f.min_haystack_len() {
204                 return None;
205             }
206             Some(f.find(haystack, needle))
207         }
208         crate::tests::packedpair::Runner::new().fwd(find).run()
209     }
210 
211     #[test]
forward_packedpair_prefilter()212     fn forward_packedpair_prefilter() {
213         fn find(
214             haystack: &[u8],
215             needle: &[u8],
216             index1: u8,
217             index2: u8,
218         ) -> Option<Option<usize>> {
219             let pair = Pair::with_indices(needle, index1, index2)?;
220             let f = Finder::with_pair(needle, pair)?;
221             if haystack.len() < f.min_haystack_len() {
222                 return None;
223             }
224             Some(f.find_prefilter(haystack))
225         }
226         crate::tests::packedpair::Runner::new().fwd(find).run()
227     }
228 }
229