1 use crate::convert::*; 2 use crate::operations::folded_multiply; 3 use crate::operations::read_small; 4 use crate::operations::MULTIPLE; 5 use crate::random_state::PI; 6 use crate::RandomState; 7 use core::hash::Hasher; 8 9 const ROT: u32 = 23; //17 10 11 /// A `Hasher` for hashing an arbitrary stream of bytes. 12 /// 13 /// Instances of [`AHasher`] represent state that is updated while hashing data. 14 /// 15 /// Each method updates the internal state based on the new data provided. Once 16 /// all of the data has been provided, the resulting hash can be obtained by calling 17 /// `finish()` 18 /// 19 /// [Clone] is also provided in case you wish to calculate hashes for two different items that 20 /// start with the same data. 21 /// 22 #[derive(Debug, Clone)] 23 pub struct AHasher { 24 buffer: u64, 25 pad: u64, 26 extra_keys: [u64; 2], 27 } 28 29 impl AHasher { 30 /// Creates a new hasher keyed to the provided key. 31 #[inline] 32 #[allow(dead_code)] // Is not called if non-fallback hash is used. new_with_keys(key1: u128, key2: u128) -> AHasher33 pub(crate) fn new_with_keys(key1: u128, key2: u128) -> AHasher { 34 let pi: [u128; 2] = PI.convert(); 35 let key1: [u64; 2] = (key1 ^ pi[0]).convert(); 36 let key2: [u64; 2] = (key2 ^ pi[1]).convert(); 37 AHasher { 38 buffer: key1[0], 39 pad: key1[1], 40 extra_keys: key2, 41 } 42 } 43 44 #[allow(unused)] // False positive test_with_keys(key1: u128, key2: u128) -> Self45 pub(crate) fn test_with_keys(key1: u128, key2: u128) -> Self { 46 let key1: [u64; 2] = key1.convert(); 47 let key2: [u64; 2] = key2.convert(); 48 Self { 49 buffer: key1[0], 50 pad: key1[1], 51 extra_keys: key2, 52 } 53 } 54 55 #[inline] 56 #[allow(dead_code)] // Is not called if non-fallback hash is used. from_random_state(rand_state: &RandomState) -> AHasher57 pub(crate) fn from_random_state(rand_state: &RandomState) -> AHasher { 58 AHasher { 59 buffer: rand_state.k1, 60 pad: rand_state.k0, 61 extra_keys: [rand_state.k2, rand_state.k3], 62 } 63 } 64 65 /// This update function has the goal of updating the buffer with a single multiply 66 /// FxHash does this but is vulnerable to attack. To avoid this input needs to be masked to with an 67 /// unpredictable value. Other hashes such as murmurhash have taken this approach but were found vulnerable 68 /// to attack. The attack was based on the idea of reversing the pre-mixing (Which is necessarily 69 /// reversible otherwise bits would be lost) then placing a difference in the highest bit before the 70 /// multiply used to mix the data. Because a multiply can never affect the bits to the right of it, a 71 /// subsequent update that also differed in this bit could result in a predictable collision. 72 /// 73 /// This version avoids this vulnerability while still only using a single multiply. It takes advantage 74 /// of the fact that when a 64 bit multiply is performed the upper 64 bits are usually computed and thrown 75 /// away. Instead it creates two 128 bit values where the upper 64 bits are zeros and multiplies them. 76 /// (The compiler is smart enough to turn this into a 64 bit multiplication in the assembly) 77 /// Then the upper bits are xored with the lower bits to produce a single 64 bit result. 78 /// 79 /// To understand why this is a good scrambling function it helps to understand multiply-with-carry PRNGs: 80 /// https://en.wikipedia.org/wiki/Multiply-with-carry_pseudorandom_number_generator 81 /// If the multiple is chosen well, this creates a long period, decent quality PRNG. 82 /// Notice that this function is equivalent to this except the `buffer`/`state` is being xored with each 83 /// new block of data. In the event that data is all zeros, it is exactly equivalent to a MWC PRNG. 84 /// 85 /// This is impervious to attack because every bit buffer at the end is dependent on every bit in 86 /// `new_data ^ buffer`. For example suppose two inputs differed in only the 5th bit. Then when the 87 /// multiplication is performed the `result` will differ in bits 5-69. More specifically it will differ by 88 /// 2^5 * MULTIPLE. However in the next step bits 65-128 are turned into a separate 64 bit value. So the 89 /// differing bits will be in the lower 6 bits of this value. The two intermediate values that differ in 90 /// bits 5-63 and in bits 0-5 respectively get added together. Producing an output that differs in every 91 /// bit. The addition carries in the multiplication and at the end additionally mean that the even if an 92 /// attacker somehow knew part of (but not all) the contents of the buffer before hand, 93 /// they would not be able to predict any of the bits in the buffer at the end. 94 #[inline(always)] update(&mut self, new_data: u64)95 fn update(&mut self, new_data: u64) { 96 self.buffer = folded_multiply(new_data ^ self.buffer, MULTIPLE); 97 } 98 99 /// Similar to the above this function performs an update using a "folded multiply". 100 /// However it takes in 128 bits of data instead of 64. Both halves must be masked. 101 /// 102 /// This makes it impossible for an attacker to place a single bit difference between 103 /// two blocks so as to cancel each other. 104 /// 105 /// However this is not sufficient. to prevent (a,b) from hashing the same as (b,a) the buffer itself must 106 /// be updated between calls in a way that does not commute. To achieve this XOR and Rotate are used. 107 /// Add followed by xor is not the same as xor followed by add, and rotate ensures that the same out bits 108 /// can't be changed by the same set of input bits. To cancel this sequence with subsequent input would require 109 /// knowing the keys. 110 #[inline(always)] large_update(&mut self, new_data: u128)111 fn large_update(&mut self, new_data: u128) { 112 let block: [u64; 2] = new_data.convert(); 113 let combined = folded_multiply(block[0] ^ self.extra_keys[0], block[1] ^ self.extra_keys[1]); 114 self.buffer = (self.buffer.wrapping_add(self.pad) ^ combined).rotate_left(ROT); 115 } 116 117 #[inline] 118 #[cfg(feature = "specialize")] short_finish(&self) -> u64119 fn short_finish(&self) -> u64 { 120 folded_multiply(self.buffer, self.pad) 121 } 122 } 123 124 /// Provides [Hasher] methods to hash all of the primitive types. 125 /// 126 /// [Hasher]: core::hash::Hasher 127 impl Hasher for AHasher { 128 #[inline] write_u8(&mut self, i: u8)129 fn write_u8(&mut self, i: u8) { 130 self.update(i as u64); 131 } 132 133 #[inline] write_u16(&mut self, i: u16)134 fn write_u16(&mut self, i: u16) { 135 self.update(i as u64); 136 } 137 138 #[inline] write_u32(&mut self, i: u32)139 fn write_u32(&mut self, i: u32) { 140 self.update(i as u64); 141 } 142 143 #[inline] write_u64(&mut self, i: u64)144 fn write_u64(&mut self, i: u64) { 145 self.update(i as u64); 146 } 147 148 #[inline] write_u128(&mut self, i: u128)149 fn write_u128(&mut self, i: u128) { 150 self.large_update(i); 151 } 152 153 #[inline] 154 #[cfg(any( 155 target_pointer_width = "64", 156 target_pointer_width = "32", 157 target_pointer_width = "16" 158 ))] write_usize(&mut self, i: usize)159 fn write_usize(&mut self, i: usize) { 160 self.write_u64(i as u64); 161 } 162 163 #[inline] 164 #[cfg(target_pointer_width = "128")] write_usize(&mut self, i: usize)165 fn write_usize(&mut self, i: usize) { 166 self.write_u128(i as u128); 167 } 168 169 #[inline] 170 #[allow(clippy::collapsible_if)] write(&mut self, input: &[u8])171 fn write(&mut self, input: &[u8]) { 172 let mut data = input; 173 let length = data.len() as u64; 174 //Needs to be an add rather than an xor because otherwise it could be canceled with carefully formed input. 175 self.buffer = self.buffer.wrapping_add(length).wrapping_mul(MULTIPLE); 176 //A 'binary search' on sizes reduces the number of comparisons. 177 if data.len() > 8 { 178 if data.len() > 16 { 179 let tail = data.read_last_u128(); 180 self.large_update(tail); 181 while data.len() > 16 { 182 let (block, rest) = data.read_u128(); 183 self.large_update(block); 184 data = rest; 185 } 186 } else { 187 self.large_update([data.read_u64().0, data.read_last_u64()].convert()); 188 } 189 } else { 190 let value = read_small(data); 191 self.large_update(value.convert()); 192 } 193 } 194 195 #[inline] finish(&self) -> u64196 fn finish(&self) -> u64 { 197 let rot = (self.buffer & 63) as u32; 198 folded_multiply(self.buffer, self.pad).rotate_left(rot) 199 } 200 } 201 202 #[cfg(feature = "specialize")] 203 pub(crate) struct AHasherU64 { 204 pub(crate) buffer: u64, 205 pub(crate) pad: u64, 206 } 207 208 /// A specialized hasher for only primitives under 64 bits. 209 #[cfg(feature = "specialize")] 210 impl Hasher for AHasherU64 { 211 #[inline] finish(&self) -> u64212 fn finish(&self) -> u64 { 213 folded_multiply(self.buffer, self.pad) 214 //self.buffer 215 } 216 217 #[inline] write(&mut self, _bytes: &[u8])218 fn write(&mut self, _bytes: &[u8]) { 219 unreachable!("Specialized hasher was called with a different type of object") 220 } 221 222 #[inline] write_u8(&mut self, i: u8)223 fn write_u8(&mut self, i: u8) { 224 self.write_u64(i as u64); 225 } 226 227 #[inline] write_u16(&mut self, i: u16)228 fn write_u16(&mut self, i: u16) { 229 self.write_u64(i as u64); 230 } 231 232 #[inline] write_u32(&mut self, i: u32)233 fn write_u32(&mut self, i: u32) { 234 self.write_u64(i as u64); 235 } 236 237 #[inline] write_u64(&mut self, i: u64)238 fn write_u64(&mut self, i: u64) { 239 self.buffer = folded_multiply(i ^ self.buffer, MULTIPLE); 240 } 241 242 #[inline] write_u128(&mut self, _i: u128)243 fn write_u128(&mut self, _i: u128) { 244 unreachable!("Specialized hasher was called with a different type of object") 245 } 246 247 #[inline] write_usize(&mut self, _i: usize)248 fn write_usize(&mut self, _i: usize) { 249 unreachable!("Specialized hasher was called with a different type of object") 250 } 251 } 252 253 #[cfg(feature = "specialize")] 254 pub(crate) struct AHasherFixed(pub AHasher); 255 256 /// A specialized hasher for fixed size primitives larger than 64 bits. 257 #[cfg(feature = "specialize")] 258 impl Hasher for AHasherFixed { 259 #[inline] finish(&self) -> u64260 fn finish(&self) -> u64 { 261 self.0.short_finish() 262 } 263 264 #[inline] write(&mut self, bytes: &[u8])265 fn write(&mut self, bytes: &[u8]) { 266 self.0.write(bytes) 267 } 268 269 #[inline] write_u8(&mut self, i: u8)270 fn write_u8(&mut self, i: u8) { 271 self.write_u64(i as u64); 272 } 273 274 #[inline] write_u16(&mut self, i: u16)275 fn write_u16(&mut self, i: u16) { 276 self.write_u64(i as u64); 277 } 278 279 #[inline] write_u32(&mut self, i: u32)280 fn write_u32(&mut self, i: u32) { 281 self.write_u64(i as u64); 282 } 283 284 #[inline] write_u64(&mut self, i: u64)285 fn write_u64(&mut self, i: u64) { 286 self.0.write_u64(i); 287 } 288 289 #[inline] write_u128(&mut self, i: u128)290 fn write_u128(&mut self, i: u128) { 291 self.0.write_u128(i); 292 } 293 294 #[inline] write_usize(&mut self, i: usize)295 fn write_usize(&mut self, i: usize) { 296 self.0.write_usize(i); 297 } 298 } 299 300 #[cfg(feature = "specialize")] 301 pub(crate) struct AHasherStr(pub AHasher); 302 303 /// A specialized hasher for a single string 304 /// Note that the other types don't panic because the hash impl for String tacks on an unneeded call. (As does vec) 305 #[cfg(feature = "specialize")] 306 impl Hasher for AHasherStr { 307 #[inline] finish(&self) -> u64308 fn finish(&self) -> u64 { 309 self.0.finish() 310 } 311 312 #[inline] write(&mut self, bytes: &[u8])313 fn write(&mut self, bytes: &[u8]) { 314 if bytes.len() > 8 { 315 self.0.write(bytes) 316 } else { 317 let value = read_small(bytes); 318 self.0.buffer = folded_multiply(value[0] ^ self.0.buffer, value[1] ^ self.0.extra_keys[1]); 319 self.0.pad = self.0.pad.wrapping_add(bytes.len() as u64); 320 } 321 } 322 323 #[inline] write_u8(&mut self, _i: u8)324 fn write_u8(&mut self, _i: u8) {} 325 326 #[inline] write_u16(&mut self, _i: u16)327 fn write_u16(&mut self, _i: u16) {} 328 329 #[inline] write_u32(&mut self, _i: u32)330 fn write_u32(&mut self, _i: u32) {} 331 332 #[inline] write_u64(&mut self, _i: u64)333 fn write_u64(&mut self, _i: u64) {} 334 335 #[inline] write_u128(&mut self, _i: u128)336 fn write_u128(&mut self, _i: u128) {} 337 338 #[inline] write_usize(&mut self, _i: usize)339 fn write_usize(&mut self, _i: usize) {} 340 } 341 342 #[cfg(test)] 343 mod tests { 344 use crate::fallback_hash::*; 345 346 #[test] test_hash()347 fn test_hash() { 348 let mut hasher = AHasher::new_with_keys(0, 0); 349 let value: u64 = 1 << 32; 350 hasher.update(value); 351 let result = hasher.buffer; 352 let mut hasher = AHasher::new_with_keys(0, 0); 353 let value2: u64 = 1; 354 hasher.update(value2); 355 let result2 = hasher.buffer; 356 let result: [u8; 8] = result.convert(); 357 let result2: [u8; 8] = result2.convert(); 358 assert_ne!(hex::encode(result), hex::encode(result2)); 359 } 360 361 #[test] test_conversion()362 fn test_conversion() { 363 let input: &[u8] = "dddddddd".as_bytes(); 364 let bytes: u64 = as_array!(input, 8).convert(); 365 assert_eq!(bytes, 0x6464646464646464); 366 } 367 } 368