xref: /aosp_15_r20/external/cronet/third_party/rust/chromium_crates_io/vendor/memchr-2.7.2/src/tests/memchr/mod.rs (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 use alloc::{
2     string::{String, ToString},
3     vec,
4     vec::Vec,
5 };
6 
7 use crate::ext::Byte;
8 
9 pub(crate) mod naive;
10 #[macro_use]
11 pub(crate) mod prop;
12 
13 const SEEDS: &'static [Seed] = &[
14     Seed { haystack: "a", needles: &[b'a'], positions: &[0] },
15     Seed { haystack: "aa", needles: &[b'a'], positions: &[0, 1] },
16     Seed { haystack: "aaa", needles: &[b'a'], positions: &[0, 1, 2] },
17     Seed { haystack: "", needles: &[b'a'], positions: &[] },
18     Seed { haystack: "z", needles: &[b'a'], positions: &[] },
19     Seed { haystack: "zz", needles: &[b'a'], positions: &[] },
20     Seed { haystack: "zza", needles: &[b'a'], positions: &[2] },
21     Seed { haystack: "zaza", needles: &[b'a'], positions: &[1, 3] },
22     Seed { haystack: "zzza", needles: &[b'a'], positions: &[3] },
23     Seed { haystack: "\x00a", needles: &[b'a'], positions: &[1] },
24     Seed { haystack: "\x00", needles: &[b'\x00'], positions: &[0] },
25     Seed { haystack: "\x00\x00", needles: &[b'\x00'], positions: &[0, 1] },
26     Seed { haystack: "\x00a\x00", needles: &[b'\x00'], positions: &[0, 2] },
27     Seed { haystack: "zzzzzzzzzzzzzzzza", needles: &[b'a'], positions: &[16] },
28     Seed {
29         haystack: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza",
30         needles: &[b'a'],
31         positions: &[32],
32     },
33     // two needles (applied to memchr2 + memchr3)
34     Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] },
35     Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] },
36     Seed { haystack: "az", needles: &[b'x', b'y'], positions: &[] },
37     Seed { haystack: "az", needles: &[b'a', b'y'], positions: &[0] },
38     Seed { haystack: "az", needles: &[b'x', b'z'], positions: &[1] },
39     Seed { haystack: "yyyyaz", needles: &[b'a', b'z'], positions: &[4, 5] },
40     Seed { haystack: "yyyyaz", needles: &[b'z', b'a'], positions: &[4, 5] },
41     // three needles (applied to memchr3)
42     Seed {
43         haystack: "xyz",
44         needles: &[b'x', b'y', b'z'],
45         positions: &[0, 1, 2],
46     },
47     Seed {
48         haystack: "zxy",
49         needles: &[b'x', b'y', b'z'],
50         positions: &[0, 1, 2],
51     },
52     Seed { haystack: "zxy", needles: &[b'x', b'a', b'z'], positions: &[0, 1] },
53     Seed { haystack: "zxy", needles: &[b't', b'a', b'z'], positions: &[0] },
54     Seed { haystack: "yxz", needles: &[b't', b'a', b'z'], positions: &[2] },
55 ];
56 
57 /// Runs a host of substring search tests.
58 ///
59 /// This has support for "partial" substring search implementations only work
60 /// for a subset of needles/haystacks. For example, the "packed pair" substring
61 /// search implementation only works for haystacks of some minimum length based
62 /// of the pair of bytes selected and the size of the vector used.
63 pub(crate) struct Runner {
64     needle_len: usize,
65 }
66 
67 impl Runner {
68     /// Create a new test runner for forward and reverse byte search
69     /// implementations.
70     ///
71     /// The `needle_len` given must be at most `3` and at least `1`. It
72     /// corresponds to the number of needle bytes to search for.
new(needle_len: usize) -> Runner73     pub(crate) fn new(needle_len: usize) -> Runner {
74         assert!(needle_len >= 1, "needle_len must be at least 1");
75         assert!(needle_len <= 3, "needle_len must be at most 3");
76         Runner { needle_len }
77     }
78 
79     /// Run all tests. This panics on the first failure.
80     ///
81     /// If the implementation being tested returns `None` for a particular
82     /// haystack/needle combination, then that test is skipped.
forward_iter<F>(self, mut test: F) where F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static,83     pub(crate) fn forward_iter<F>(self, mut test: F)
84     where
85         F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static,
86     {
87         for seed in SEEDS.iter() {
88             if seed.needles.len() > self.needle_len {
89                 continue;
90             }
91             for t in seed.generate() {
92                 let results = match test(t.haystack.as_bytes(), &t.needles) {
93                     None => continue,
94                     Some(results) => results,
95                 };
96                 assert_eq!(
97                     t.expected,
98                     results,
99                     "needles: {:?}, haystack: {:?}",
100                     t.needles
101                         .iter()
102                         .map(|&b| b.to_char())
103                         .collect::<Vec<char>>(),
104                     t.haystack,
105                 );
106             }
107         }
108     }
109 
110     /// Run all tests in the reverse direction. This panics on the first
111     /// failure.
112     ///
113     /// If the implementation being tested returns `None` for a particular
114     /// haystack/needle combination, then that test is skipped.
reverse_iter<F>(self, mut test: F) where F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static,115     pub(crate) fn reverse_iter<F>(self, mut test: F)
116     where
117         F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static,
118     {
119         for seed in SEEDS.iter() {
120             if seed.needles.len() > self.needle_len {
121                 continue;
122             }
123             for t in seed.generate() {
124                 let mut results = match test(t.haystack.as_bytes(), &t.needles)
125                 {
126                     None => continue,
127                     Some(results) => results,
128                 };
129                 results.reverse();
130                 assert_eq!(
131                     t.expected,
132                     results,
133                     "needles: {:?}, haystack: {:?}",
134                     t.needles
135                         .iter()
136                         .map(|&b| b.to_char())
137                         .collect::<Vec<char>>(),
138                     t.haystack,
139                 );
140             }
141         }
142     }
143 
144     /// Run all tests as counting tests. This panics on the first failure.
145     ///
146     /// That is, this only checks that the number of matches is correct and
147     /// not whether the offsets of each match are.
count_iter<F>(self, mut test: F) where F: FnMut(&[u8], &[u8]) -> Option<usize> + 'static,148     pub(crate) fn count_iter<F>(self, mut test: F)
149     where
150         F: FnMut(&[u8], &[u8]) -> Option<usize> + 'static,
151     {
152         for seed in SEEDS.iter() {
153             if seed.needles.len() > self.needle_len {
154                 continue;
155             }
156             for t in seed.generate() {
157                 let got = match test(t.haystack.as_bytes(), &t.needles) {
158                     None => continue,
159                     Some(got) => got,
160                 };
161                 assert_eq!(
162                     t.expected.len(),
163                     got,
164                     "needles: {:?}, haystack: {:?}",
165                     t.needles
166                         .iter()
167                         .map(|&b| b.to_char())
168                         .collect::<Vec<char>>(),
169                     t.haystack,
170                 );
171             }
172         }
173     }
174 
175     /// Like `Runner::forward`, but for a function that returns only the next
176     /// match and not all matches.
177     ///
178     /// If the function returns `None`, then it is skipped.
forward_oneshot<F>(self, mut test: F) where F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static,179     pub(crate) fn forward_oneshot<F>(self, mut test: F)
180     where
181         F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static,
182     {
183         self.forward_iter(move |haystack, needles| {
184             let mut start = 0;
185             let mut results = vec![];
186             while let Some(i) = test(&haystack[start..], needles)? {
187                 results.push(start + i);
188                 start += i + 1;
189             }
190             Some(results)
191         })
192     }
193 
194     /// Like `Runner::reverse`, but for a function that returns only the last
195     /// match and not all matches.
196     ///
197     /// If the function returns `None`, then it is skipped.
reverse_oneshot<F>(self, mut test: F) where F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static,198     pub(crate) fn reverse_oneshot<F>(self, mut test: F)
199     where
200         F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static,
201     {
202         self.reverse_iter(move |haystack, needles| {
203             let mut end = haystack.len();
204             let mut results = vec![];
205             while let Some(i) = test(&haystack[..end], needles)? {
206                 results.push(i);
207                 end = i;
208             }
209             Some(results)
210         })
211     }
212 }
213 
214 /// A single test for memr?chr{,2,3}.
215 #[derive(Clone, Debug)]
216 struct Test {
217     /// The string to search in.
218     haystack: String,
219     /// The needles to look for.
220     needles: Vec<u8>,
221     /// The offsets that are expected to be found for all needles in the
222     /// forward direction.
223     expected: Vec<usize>,
224 }
225 
226 impl Test {
new(seed: &Seed) -> Test227     fn new(seed: &Seed) -> Test {
228         Test {
229             haystack: seed.haystack.to_string(),
230             needles: seed.needles.to_vec(),
231             expected: seed.positions.to_vec(),
232         }
233     }
234 }
235 
236 /// Data that can be expanded into many memchr tests by padding out the corpus.
237 #[derive(Clone, Debug)]
238 struct Seed {
239     /// The thing to search. We use `&str` instead of `&[u8]` because they
240     /// are nicer to write in tests, and we don't miss much since memchr
241     /// doesn't care about UTF-8.
242     ///
243     /// Corpora cannot contain either '%' or '#'. We use these bytes when
244     /// expanding test cases into many test cases, and we assume they are not
245     /// used. If they are used, `memchr_tests` will panic.
246     haystack: &'static str,
247     /// The needles to search for. This is intended to be an alternation of
248     /// needles. The number of needles may cause this test to be skipped for
249     /// some memchr variants. For example, a test with 2 needles cannot be used
250     /// to test `memchr`, but can be used to test `memchr2` and `memchr3`.
251     /// However, a test with only 1 needle can be used to test all of `memchr`,
252     /// `memchr2` and `memchr3`. We achieve this by filling in the needles with
253     /// bytes that we never used in the corpus (such as '#').
254     needles: &'static [u8],
255     /// The positions expected to match for all of the needles.
256     positions: &'static [usize],
257 }
258 
259 impl Seed {
260     /// Controls how much we expand the haystack on either side for each test.
261     /// We lower this on Miri because otherwise running the tests would take
262     /// forever.
263     const EXPAND_LEN: usize = {
264         #[cfg(not(miri))]
265         {
266             515
267         }
268         #[cfg(miri)]
269         {
270             6
271         }
272     };
273 
274     /// Expand this test into many variations of the same test.
275     ///
276     /// In particular, this will generate more tests with larger corpus sizes.
277     /// The expected positions are updated to maintain the integrity of the
278     /// test.
279     ///
280     /// This is important in testing a memchr implementation, because there are
281     /// often different cases depending on the length of the corpus.
282     ///
283     /// Note that we extend the corpus by adding `%` bytes, which we
284     /// don't otherwise use as a needle.
generate(&self) -> impl Iterator<Item = Test>285     fn generate(&self) -> impl Iterator<Item = Test> {
286         let mut more = vec![];
287 
288         // Add bytes to the start of the corpus.
289         for i in 0..Seed::EXPAND_LEN {
290             let mut t = Test::new(self);
291             let mut new: String = core::iter::repeat('%').take(i).collect();
292             new.push_str(&t.haystack);
293             t.haystack = new;
294             t.expected = t.expected.into_iter().map(|p| p + i).collect();
295             more.push(t);
296         }
297         // Add bytes to the end of the corpus.
298         for i in 1..Seed::EXPAND_LEN {
299             let mut t = Test::new(self);
300             let padding: String = core::iter::repeat('%').take(i).collect();
301             t.haystack.push_str(&padding);
302             more.push(t);
303         }
304 
305         more.into_iter()
306     }
307 }
308