1 use alloc::{ 2 string::{String, ToString}, 3 vec, 4 vec::Vec, 5 }; 6 7 use crate::ext::Byte; 8 9 pub(crate) mod naive; 10 #[macro_use] 11 pub(crate) mod prop; 12 13 const SEEDS: &'static [Seed] = &[ 14 Seed { haystack: "a", needles: &[b'a'], positions: &[0] }, 15 Seed { haystack: "aa", needles: &[b'a'], positions: &[0, 1] }, 16 Seed { haystack: "aaa", needles: &[b'a'], positions: &[0, 1, 2] }, 17 Seed { haystack: "", needles: &[b'a'], positions: &[] }, 18 Seed { haystack: "z", needles: &[b'a'], positions: &[] }, 19 Seed { haystack: "zz", needles: &[b'a'], positions: &[] }, 20 Seed { haystack: "zza", needles: &[b'a'], positions: &[2] }, 21 Seed { haystack: "zaza", needles: &[b'a'], positions: &[1, 3] }, 22 Seed { haystack: "zzza", needles: &[b'a'], positions: &[3] }, 23 Seed { haystack: "\x00a", needles: &[b'a'], positions: &[1] }, 24 Seed { haystack: "\x00", needles: &[b'\x00'], positions: &[0] }, 25 Seed { haystack: "\x00\x00", needles: &[b'\x00'], positions: &[0, 1] }, 26 Seed { haystack: "\x00a\x00", needles: &[b'\x00'], positions: &[0, 2] }, 27 Seed { haystack: "zzzzzzzzzzzzzzzza", needles: &[b'a'], positions: &[16] }, 28 Seed { 29 haystack: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza", 30 needles: &[b'a'], 31 positions: &[32], 32 }, 33 // two needles (applied to memchr2 + memchr3) 34 Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] }, 35 Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] }, 36 Seed { haystack: "az", needles: &[b'x', b'y'], positions: &[] }, 37 Seed { haystack: "az", needles: &[b'a', b'y'], positions: &[0] }, 38 Seed { haystack: "az", needles: &[b'x', b'z'], positions: &[1] }, 39 Seed { haystack: "yyyyaz", needles: &[b'a', b'z'], positions: &[4, 5] }, 40 Seed { haystack: "yyyyaz", needles: &[b'z', b'a'], positions: &[4, 5] }, 41 // three needles (applied to memchr3) 42 Seed { 43 haystack: "xyz", 44 needles: &[b'x', b'y', b'z'], 45 positions: &[0, 1, 2], 46 }, 47 Seed { 48 haystack: "zxy", 49 needles: &[b'x', b'y', b'z'], 50 positions: &[0, 1, 2], 51 }, 52 Seed { haystack: "zxy", needles: &[b'x', b'a', b'z'], positions: &[0, 1] }, 53 Seed { haystack: "zxy", needles: &[b't', b'a', b'z'], positions: &[0] }, 54 Seed { haystack: "yxz", needles: &[b't', b'a', b'z'], positions: &[2] }, 55 ]; 56 57 /// Runs a host of substring search tests. 58 /// 59 /// This has support for "partial" substring search implementations only work 60 /// for a subset of needles/haystacks. For example, the "packed pair" substring 61 /// search implementation only works for haystacks of some minimum length based 62 /// of the pair of bytes selected and the size of the vector used. 63 pub(crate) struct Runner { 64 needle_len: usize, 65 } 66 67 impl Runner { 68 /// Create a new test runner for forward and reverse byte search 69 /// implementations. 70 /// 71 /// The `needle_len` given must be at most `3` and at least `1`. It 72 /// corresponds to the number of needle bytes to search for. new(needle_len: usize) -> Runner73 pub(crate) fn new(needle_len: usize) -> Runner { 74 assert!(needle_len >= 1, "needle_len must be at least 1"); 75 assert!(needle_len <= 3, "needle_len must be at most 3"); 76 Runner { needle_len } 77 } 78 79 /// Run all tests. This panics on the first failure. 80 /// 81 /// If the implementation being tested returns `None` for a particular 82 /// haystack/needle combination, then that test is skipped. forward_iter<F>(self, mut test: F) where F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static,83 pub(crate) fn forward_iter<F>(self, mut test: F) 84 where 85 F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static, 86 { 87 for seed in SEEDS.iter() { 88 if seed.needles.len() > self.needle_len { 89 continue; 90 } 91 for t in seed.generate() { 92 let results = match test(t.haystack.as_bytes(), &t.needles) { 93 None => continue, 94 Some(results) => results, 95 }; 96 assert_eq!( 97 t.expected, 98 results, 99 "needles: {:?}, haystack: {:?}", 100 t.needles 101 .iter() 102 .map(|&b| b.to_char()) 103 .collect::<Vec<char>>(), 104 t.haystack, 105 ); 106 } 107 } 108 } 109 110 /// Run all tests in the reverse direction. This panics on the first 111 /// failure. 112 /// 113 /// If the implementation being tested returns `None` for a particular 114 /// haystack/needle combination, then that test is skipped. reverse_iter<F>(self, mut test: F) where F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static,115 pub(crate) fn reverse_iter<F>(self, mut test: F) 116 where 117 F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static, 118 { 119 for seed in SEEDS.iter() { 120 if seed.needles.len() > self.needle_len { 121 continue; 122 } 123 for t in seed.generate() { 124 let mut results = match test(t.haystack.as_bytes(), &t.needles) 125 { 126 None => continue, 127 Some(results) => results, 128 }; 129 results.reverse(); 130 assert_eq!( 131 t.expected, 132 results, 133 "needles: {:?}, haystack: {:?}", 134 t.needles 135 .iter() 136 .map(|&b| b.to_char()) 137 .collect::<Vec<char>>(), 138 t.haystack, 139 ); 140 } 141 } 142 } 143 144 /// Run all tests as counting tests. This panics on the first failure. 145 /// 146 /// That is, this only checks that the number of matches is correct and 147 /// not whether the offsets of each match are. count_iter<F>(self, mut test: F) where F: FnMut(&[u8], &[u8]) -> Option<usize> + 'static,148 pub(crate) fn count_iter<F>(self, mut test: F) 149 where 150 F: FnMut(&[u8], &[u8]) -> Option<usize> + 'static, 151 { 152 for seed in SEEDS.iter() { 153 if seed.needles.len() > self.needle_len { 154 continue; 155 } 156 for t in seed.generate() { 157 let got = match test(t.haystack.as_bytes(), &t.needles) { 158 None => continue, 159 Some(got) => got, 160 }; 161 assert_eq!( 162 t.expected.len(), 163 got, 164 "needles: {:?}, haystack: {:?}", 165 t.needles 166 .iter() 167 .map(|&b| b.to_char()) 168 .collect::<Vec<char>>(), 169 t.haystack, 170 ); 171 } 172 } 173 } 174 175 /// Like `Runner::forward`, but for a function that returns only the next 176 /// match and not all matches. 177 /// 178 /// If the function returns `None`, then it is skipped. forward_oneshot<F>(self, mut test: F) where F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static,179 pub(crate) fn forward_oneshot<F>(self, mut test: F) 180 where 181 F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static, 182 { 183 self.forward_iter(move |haystack, needles| { 184 let mut start = 0; 185 let mut results = vec![]; 186 while let Some(i) = test(&haystack[start..], needles)? { 187 results.push(start + i); 188 start += i + 1; 189 } 190 Some(results) 191 }) 192 } 193 194 /// Like `Runner::reverse`, but for a function that returns only the last 195 /// match and not all matches. 196 /// 197 /// If the function returns `None`, then it is skipped. reverse_oneshot<F>(self, mut test: F) where F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static,198 pub(crate) fn reverse_oneshot<F>(self, mut test: F) 199 where 200 F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static, 201 { 202 self.reverse_iter(move |haystack, needles| { 203 let mut end = haystack.len(); 204 let mut results = vec![]; 205 while let Some(i) = test(&haystack[..end], needles)? { 206 results.push(i); 207 end = i; 208 } 209 Some(results) 210 }) 211 } 212 } 213 214 /// A single test for memr?chr{,2,3}. 215 #[derive(Clone, Debug)] 216 struct Test { 217 /// The string to search in. 218 haystack: String, 219 /// The needles to look for. 220 needles: Vec<u8>, 221 /// The offsets that are expected to be found for all needles in the 222 /// forward direction. 223 expected: Vec<usize>, 224 } 225 226 impl Test { new(seed: &Seed) -> Test227 fn new(seed: &Seed) -> Test { 228 Test { 229 haystack: seed.haystack.to_string(), 230 needles: seed.needles.to_vec(), 231 expected: seed.positions.to_vec(), 232 } 233 } 234 } 235 236 /// Data that can be expanded into many memchr tests by padding out the corpus. 237 #[derive(Clone, Debug)] 238 struct Seed { 239 /// The thing to search. We use `&str` instead of `&[u8]` because they 240 /// are nicer to write in tests, and we don't miss much since memchr 241 /// doesn't care about UTF-8. 242 /// 243 /// Corpora cannot contain either '%' or '#'. We use these bytes when 244 /// expanding test cases into many test cases, and we assume they are not 245 /// used. If they are used, `memchr_tests` will panic. 246 haystack: &'static str, 247 /// The needles to search for. This is intended to be an alternation of 248 /// needles. The number of needles may cause this test to be skipped for 249 /// some memchr variants. For example, a test with 2 needles cannot be used 250 /// to test `memchr`, but can be used to test `memchr2` and `memchr3`. 251 /// However, a test with only 1 needle can be used to test all of `memchr`, 252 /// `memchr2` and `memchr3`. We achieve this by filling in the needles with 253 /// bytes that we never used in the corpus (such as '#'). 254 needles: &'static [u8], 255 /// The positions expected to match for all of the needles. 256 positions: &'static [usize], 257 } 258 259 impl Seed { 260 /// Controls how much we expand the haystack on either side for each test. 261 /// We lower this on Miri because otherwise running the tests would take 262 /// forever. 263 const EXPAND_LEN: usize = { 264 #[cfg(not(miri))] 265 { 266 515 267 } 268 #[cfg(miri)] 269 { 270 6 271 } 272 }; 273 274 /// Expand this test into many variations of the same test. 275 /// 276 /// In particular, this will generate more tests with larger corpus sizes. 277 /// The expected positions are updated to maintain the integrity of the 278 /// test. 279 /// 280 /// This is important in testing a memchr implementation, because there are 281 /// often different cases depending on the length of the corpus. 282 /// 283 /// Note that we extend the corpus by adding `%` bytes, which we 284 /// don't otherwise use as a needle. generate(&self) -> impl Iterator<Item = Test>285 fn generate(&self) -> impl Iterator<Item = Test> { 286 let mut more = vec![]; 287 288 // Add bytes to the start of the corpus. 289 for i in 0..Seed::EXPAND_LEN { 290 let mut t = Test::new(self); 291 let mut new: String = core::iter::repeat('%').take(i).collect(); 292 new.push_str(&t.haystack); 293 t.haystack = new; 294 t.expected = t.expected.into_iter().map(|p| p + i).collect(); 295 more.push(t); 296 } 297 // Add bytes to the end of the corpus. 298 for i in 1..Seed::EXPAND_LEN { 299 let mut t = Test::new(self); 300 let padding: String = core::iter::repeat('%').take(i).collect(); 301 t.haystack.push_str(&padding); 302 more.push(t); 303 } 304 305 more.into_iter() 306 } 307 } 308