1 use alloc::{boxed::Box, vec, vec::Vec}; 2 3 /// A set of "packed pair" test seeds. Each seed serves as the base for the 4 /// generation of many other tests. In essence, the seed captures the pair of 5 /// bytes we used for a predicate and first byte among our needle. The tests 6 /// generated from each seed essentially vary the length of the needle and 7 /// haystack, while using the rare/first byte configuration from the seed. 8 /// 9 /// The purpose of this is to test many different needle/haystack lengths. 10 /// In particular, some of the vector optimizations might only have bugs 11 /// in haystacks of a certain size. 12 const SEEDS: &[Seed] = &[ 13 // Why not use different 'first' bytes? It seemed like a good idea to be 14 // able to configure it, but when I wrote the test generator below, it 15 // didn't seem necessary to use for reasons that I forget. 16 Seed { first: b'x', index1: b'y', index2: b'z' }, 17 Seed { first: b'x', index1: b'x', index2: b'z' }, 18 Seed { first: b'x', index1: b'y', index2: b'x' }, 19 Seed { first: b'x', index1: b'x', index2: b'x' }, 20 Seed { first: b'x', index1: b'y', index2: b'y' }, 21 ]; 22 23 /// Runs a host of "packed pair" search tests. 24 /// 25 /// These tests specifically look for the occurrence of a possible substring 26 /// match based on a pair of bytes matching at the right offsets. 27 pub(crate) struct Runner { 28 fwd: Option< 29 Box< 30 dyn FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static, 31 >, 32 >, 33 } 34 35 impl Runner { 36 /// Create a new test runner for "packed pair" substring search. new() -> Runner37 pub(crate) fn new() -> Runner { 38 Runner { fwd: None } 39 } 40 41 /// Run all tests. This panics on the first failure. 42 /// 43 /// If the implementation being tested returns `None` for a particular 44 /// haystack/needle combination, then that test is skipped. 45 /// 46 /// This runs tests on both the forward and reverse implementations given. 47 /// If either (or both) are missing, then tests for that implementation are 48 /// skipped. run(self)49 pub(crate) fn run(self) { 50 if let Some(mut fwd) = self.fwd { 51 for seed in SEEDS.iter() { 52 for t in seed.generate() { 53 match fwd(&t.haystack, &t.needle, t.index1, t.index2) { 54 None => continue, 55 Some(result) => { 56 assert_eq!( 57 t.fwd, result, 58 "FORWARD, needle: {:?}, haystack: {:?}, \ 59 index1: {:?}, index2: {:?}", 60 t.needle, t.haystack, t.index1, t.index2, 61 ) 62 } 63 } 64 } 65 } 66 } 67 } 68 69 /// Set the implementation for forward "packed pair" substring search. 70 /// 71 /// If the closure returns `None`, then it is assumed that the given 72 /// test cannot be applied to the particular implementation and it is 73 /// skipped. For example, if a particular implementation only supports 74 /// needles or haystacks for some minimum length. 75 /// 76 /// If this is not set, then forward "packed pair" search is not tested. fwd( mut self, search: impl FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static, ) -> Runner77 pub(crate) fn fwd( 78 mut self, 79 search: impl FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static, 80 ) -> Runner { 81 self.fwd = Some(Box::new(search)); 82 self 83 } 84 } 85 86 /// A test that represents the input and expected output to a "packed pair" 87 /// search function. The test should be able to run with any "packed pair" 88 /// implementation and get the expected output. 89 struct Test { 90 haystack: Vec<u8>, 91 needle: Vec<u8>, 92 index1: u8, 93 index2: u8, 94 fwd: Option<usize>, 95 } 96 97 impl Test { 98 /// Create a new "packed pair" test from a seed and some given offsets to 99 /// the pair of bytes to use as a predicate in the seed's needle. 100 /// 101 /// If a valid test could not be constructed, then None is returned. 102 /// (Currently, we take the approach of massaging tests to be valid 103 /// instead of rejecting them outright.) new( seed: Seed, index1: usize, index2: usize, haystack_len: usize, needle_len: usize, fwd: Option<usize>, ) -> Option<Test>104 fn new( 105 seed: Seed, 106 index1: usize, 107 index2: usize, 108 haystack_len: usize, 109 needle_len: usize, 110 fwd: Option<usize>, 111 ) -> Option<Test> { 112 let mut index1: u8 = index1.try_into().unwrap(); 113 let mut index2: u8 = index2.try_into().unwrap(); 114 // The '#' byte is never used in a haystack (unless we're expecting 115 // a match), while the '@' byte is never used in a needle. 116 let mut haystack = vec![b'@'; haystack_len]; 117 let mut needle = vec![b'#'; needle_len]; 118 needle[0] = seed.first; 119 needle[index1 as usize] = seed.index1; 120 needle[index2 as usize] = seed.index2; 121 // If we're expecting a match, then make sure the needle occurs 122 // in the haystack at the expected position. 123 if let Some(i) = fwd { 124 haystack[i..i + needle.len()].copy_from_slice(&needle); 125 } 126 // If the operations above lead to rare offsets pointing to the 127 // non-first occurrence of a byte, then adjust it. This might lead 128 // to redundant tests, but it's simpler than trying to change the 129 // generation process I think. 130 if let Some(i) = crate::memchr(seed.index1, &needle) { 131 index1 = u8::try_from(i).unwrap(); 132 } 133 if let Some(i) = crate::memchr(seed.index2, &needle) { 134 index2 = u8::try_from(i).unwrap(); 135 } 136 Some(Test { haystack, needle, index1, index2, fwd }) 137 } 138 } 139 140 /// Data that describes a single prefilter test seed. 141 #[derive(Clone, Copy)] 142 struct Seed { 143 first: u8, 144 index1: u8, 145 index2: u8, 146 } 147 148 impl Seed { 149 const NEEDLE_LENGTH_LIMIT: usize = { 150 #[cfg(not(miri))] 151 { 152 33 153 } 154 #[cfg(miri)] 155 { 156 5 157 } 158 }; 159 160 const HAYSTACK_LENGTH_LIMIT: usize = { 161 #[cfg(not(miri))] 162 { 163 65 164 } 165 #[cfg(miri)] 166 { 167 8 168 } 169 }; 170 171 /// Generate a series of prefilter tests from this seed. generate(self) -> impl Iterator<Item = Test>172 fn generate(self) -> impl Iterator<Item = Test> { 173 let len_start = 2; 174 // The iterator below generates *a lot* of tests. The number of 175 // tests was chosen somewhat empirically to be "bearable" when 176 // running the test suite. 177 // 178 // We use an iterator here because the collective haystacks of all 179 // these test cases add up to enough memory to OOM a conservative 180 // sandbox or a small laptop. 181 (len_start..=Seed::NEEDLE_LENGTH_LIMIT).flat_map(move |needle_len| { 182 let index_start = len_start - 1; 183 (index_start..needle_len).flat_map(move |index1| { 184 (index1..needle_len).flat_map(move |index2| { 185 (needle_len..=Seed::HAYSTACK_LENGTH_LIMIT).flat_map( 186 move |haystack_len| { 187 Test::new( 188 self, 189 index1, 190 index2, 191 haystack_len, 192 needle_len, 193 None, 194 ) 195 .into_iter() 196 .chain( 197 (0..=(haystack_len - needle_len)).flat_map( 198 move |output| { 199 Test::new( 200 self, 201 index1, 202 index2, 203 haystack_len, 204 needle_len, 205 Some(output), 206 ) 207 }, 208 ), 209 ) 210 }, 211 ) 212 }) 213 }) 214 }) 215 } 216 } 217