1 use alloc::{boxed::Box, vec, vec::Vec};
2 
3 /// A set of "packed pair" test seeds. Each seed serves as the base for the
4 /// generation of many other tests. In essence, the seed captures the pair of
5 /// bytes we used for a predicate and first byte among our needle. The tests
6 /// generated from each seed essentially vary the length of the needle and
7 /// haystack, while using the rare/first byte configuration from the seed.
8 ///
9 /// The purpose of this is to test many different needle/haystack lengths.
10 /// In particular, some of the vector optimizations might only have bugs
11 /// in haystacks of a certain size.
12 const SEEDS: &[Seed] = &[
13     // Why not use different 'first' bytes? It seemed like a good idea to be
14     // able to configure it, but when I wrote the test generator below, it
15     // didn't seem necessary to use for reasons that I forget.
16     Seed { first: b'x', index1: b'y', index2: b'z' },
17     Seed { first: b'x', index1: b'x', index2: b'z' },
18     Seed { first: b'x', index1: b'y', index2: b'x' },
19     Seed { first: b'x', index1: b'x', index2: b'x' },
20     Seed { first: b'x', index1: b'y', index2: b'y' },
21 ];
22 
23 /// Runs a host of "packed pair" search tests.
24 ///
25 /// These tests specifically look for the occurrence of a possible substring
26 /// match based on a pair of bytes matching at the right offsets.
27 pub(crate) struct Runner {
28     fwd: Option<
29         Box<
30             dyn FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static,
31         >,
32     >,
33 }
34 
35 impl Runner {
36     /// Create a new test runner for "packed pair" substring search.
new() -> Runner37     pub(crate) fn new() -> Runner {
38         Runner { fwd: None }
39     }
40 
41     /// Run all tests. This panics on the first failure.
42     ///
43     /// If the implementation being tested returns `None` for a particular
44     /// haystack/needle combination, then that test is skipped.
45     ///
46     /// This runs tests on both the forward and reverse implementations given.
47     /// If either (or both) are missing, then tests for that implementation are
48     /// skipped.
run(self)49     pub(crate) fn run(self) {
50         if let Some(mut fwd) = self.fwd {
51             for seed in SEEDS.iter() {
52                 for t in seed.generate() {
53                     match fwd(&t.haystack, &t.needle, t.index1, t.index2) {
54                         None => continue,
55                         Some(result) => {
56                             assert_eq!(
57                                 t.fwd, result,
58                                 "FORWARD, needle: {:?}, haystack: {:?}, \
59                                  index1: {:?}, index2: {:?}",
60                                 t.needle, t.haystack, t.index1, t.index2,
61                             )
62                         }
63                     }
64                 }
65             }
66         }
67     }
68 
69     /// Set the implementation for forward "packed pair" substring search.
70     ///
71     /// If the closure returns `None`, then it is assumed that the given
72     /// test cannot be applied to the particular implementation and it is
73     /// skipped. For example, if a particular implementation only supports
74     /// needles or haystacks for some minimum length.
75     ///
76     /// If this is not set, then forward "packed pair" search is not tested.
fwd( mut self, search: impl FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static, ) -> Runner77     pub(crate) fn fwd(
78         mut self,
79         search: impl FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static,
80     ) -> Runner {
81         self.fwd = Some(Box::new(search));
82         self
83     }
84 }
85 
86 /// A test that represents the input and expected output to a "packed pair"
87 /// search function. The test should be able to run with any "packed pair"
88 /// implementation and get the expected output.
89 struct Test {
90     haystack: Vec<u8>,
91     needle: Vec<u8>,
92     index1: u8,
93     index2: u8,
94     fwd: Option<usize>,
95 }
96 
97 impl Test {
98     /// Create a new "packed pair" test from a seed and some given offsets to
99     /// the pair of bytes to use as a predicate in the seed's needle.
100     ///
101     /// If a valid test could not be constructed, then None is returned.
102     /// (Currently, we take the approach of massaging tests to be valid
103     /// instead of rejecting them outright.)
new( seed: Seed, index1: usize, index2: usize, haystack_len: usize, needle_len: usize, fwd: Option<usize>, ) -> Option<Test>104     fn new(
105         seed: Seed,
106         index1: usize,
107         index2: usize,
108         haystack_len: usize,
109         needle_len: usize,
110         fwd: Option<usize>,
111     ) -> Option<Test> {
112         let mut index1: u8 = index1.try_into().unwrap();
113         let mut index2: u8 = index2.try_into().unwrap();
114         // The '#' byte is never used in a haystack (unless we're expecting
115         // a match), while the '@' byte is never used in a needle.
116         let mut haystack = vec![b'@'; haystack_len];
117         let mut needle = vec![b'#'; needle_len];
118         needle[0] = seed.first;
119         needle[index1 as usize] = seed.index1;
120         needle[index2 as usize] = seed.index2;
121         // If we're expecting a match, then make sure the needle occurs
122         // in the haystack at the expected position.
123         if let Some(i) = fwd {
124             haystack[i..i + needle.len()].copy_from_slice(&needle);
125         }
126         // If the operations above lead to rare offsets pointing to the
127         // non-first occurrence of a byte, then adjust it. This might lead
128         // to redundant tests, but it's simpler than trying to change the
129         // generation process I think.
130         if let Some(i) = crate::memchr(seed.index1, &needle) {
131             index1 = u8::try_from(i).unwrap();
132         }
133         if let Some(i) = crate::memchr(seed.index2, &needle) {
134             index2 = u8::try_from(i).unwrap();
135         }
136         Some(Test { haystack, needle, index1, index2, fwd })
137     }
138 }
139 
140 /// Data that describes a single prefilter test seed.
141 #[derive(Clone, Copy)]
142 struct Seed {
143     first: u8,
144     index1: u8,
145     index2: u8,
146 }
147 
148 impl Seed {
149     const NEEDLE_LENGTH_LIMIT: usize = {
150         #[cfg(not(miri))]
151         {
152             33
153         }
154         #[cfg(miri)]
155         {
156             5
157         }
158     };
159 
160     const HAYSTACK_LENGTH_LIMIT: usize = {
161         #[cfg(not(miri))]
162         {
163             65
164         }
165         #[cfg(miri)]
166         {
167             8
168         }
169     };
170 
171     /// Generate a series of prefilter tests from this seed.
generate(self) -> impl Iterator<Item = Test>172     fn generate(self) -> impl Iterator<Item = Test> {
173         let len_start = 2;
174         // The iterator below generates *a lot* of tests. The number of
175         // tests was chosen somewhat empirically to be "bearable" when
176         // running the test suite.
177         //
178         // We use an iterator here because the collective haystacks of all
179         // these test cases add up to enough memory to OOM a conservative
180         // sandbox or a small laptop.
181         (len_start..=Seed::NEEDLE_LENGTH_LIMIT).flat_map(move |needle_len| {
182             let index_start = len_start - 1;
183             (index_start..needle_len).flat_map(move |index1| {
184                 (index1..needle_len).flat_map(move |index2| {
185                     (needle_len..=Seed::HAYSTACK_LENGTH_LIMIT).flat_map(
186                         move |haystack_len| {
187                             Test::new(
188                                 self,
189                                 index1,
190                                 index2,
191                                 haystack_len,
192                                 needle_len,
193                                 None,
194                             )
195                             .into_iter()
196                             .chain(
197                                 (0..=(haystack_len - needle_len)).flat_map(
198                                     move |output| {
199                                         Test::new(
200                                             self,
201                                             index1,
202                                             index2,
203                                             haystack_len,
204                                             needle_len,
205                                             Some(output),
206                                         )
207                                     },
208                                 ),
209                             )
210                         },
211                     )
212                 })
213             })
214         })
215     }
216 }
217