1 use crate::UnalignedBuffer; 2 use core::{cmp, hash::Hasher}; 3 4 #[cfg(feature = "serialize")] 5 use serde::{Deserialize, Serialize}; 6 7 const CHUNK_SIZE: usize = 32; 8 9 pub const PRIME_1: u64 = 11_400_714_785_074_694_791; 10 pub const PRIME_2: u64 = 14_029_467_366_897_019_727; 11 pub const PRIME_3: u64 = 1_609_587_929_392_839_161; 12 pub const PRIME_4: u64 = 9_650_029_242_287_828_579; 13 pub const PRIME_5: u64 = 2_870_177_450_012_600_261; 14 15 #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] 16 #[derive(Copy, Clone, PartialEq)] 17 struct XxCore { 18 v1: u64, 19 v2: u64, 20 v3: u64, 21 v4: u64, 22 } 23 24 /// Calculates the 64-bit hash. 25 #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] 26 #[derive(Debug, Copy, Clone, PartialEq)] 27 pub struct XxHash64 { 28 total_len: u64, 29 seed: u64, 30 core: XxCore, 31 #[cfg_attr(feature = "serialize", serde(flatten))] 32 buffer: Buffer, 33 } 34 35 impl XxCore { with_seed(seed: u64) -> XxCore36 fn with_seed(seed: u64) -> XxCore { 37 XxCore { 38 v1: seed.wrapping_add(PRIME_1).wrapping_add(PRIME_2), 39 v2: seed.wrapping_add(PRIME_2), 40 v3: seed, 41 v4: seed.wrapping_sub(PRIME_1), 42 } 43 } 44 45 #[inline(always)] ingest_chunks<I>(&mut self, values: I) where I: IntoIterator<Item = [u64; 4]>,46 fn ingest_chunks<I>(&mut self, values: I) 47 where 48 I: IntoIterator<Item = [u64; 4]>, 49 { 50 #[inline(always)] ingest_one_number(mut current_value: u64, mut value: u64) -> u6451 fn ingest_one_number(mut current_value: u64, mut value: u64) -> u64 { 52 value = value.wrapping_mul(PRIME_2); 53 current_value = current_value.wrapping_add(value); 54 current_value = current_value.rotate_left(31); 55 current_value.wrapping_mul(PRIME_1) 56 } 57 58 // By drawing these out, we can avoid going back and forth to 59 // memory. It only really helps for large files, when we need 60 // to iterate multiple times here. 61 62 let mut v1 = self.v1; 63 let mut v2 = self.v2; 64 let mut v3 = self.v3; 65 let mut v4 = self.v4; 66 67 for [n1, n2, n3, n4] in values { 68 v1 = ingest_one_number(v1, n1.to_le()); 69 v2 = ingest_one_number(v2, n2.to_le()); 70 v3 = ingest_one_number(v3, n3.to_le()); 71 v4 = ingest_one_number(v4, n4.to_le()); 72 } 73 74 self.v1 = v1; 75 self.v2 = v2; 76 self.v3 = v3; 77 self.v4 = v4; 78 } 79 80 #[inline(always)] finish(&self) -> u6481 fn finish(&self) -> u64 { 82 // The original code pulls out local vars for v[1234] 83 // here. Performance tests did not show that to be effective 84 // here, presumably because this method is not called in a 85 // tight loop. 86 87 #[allow(unknown_lints, clippy::needless_late_init)] // keeping things parallel 88 let mut hash; 89 90 hash = self.v1.rotate_left(1); 91 hash = hash.wrapping_add(self.v2.rotate_left(7)); 92 hash = hash.wrapping_add(self.v3.rotate_left(12)); 93 hash = hash.wrapping_add(self.v4.rotate_left(18)); 94 95 #[inline(always)] 96 fn mix_one(mut hash: u64, mut value: u64) -> u64 { 97 value = value.wrapping_mul(PRIME_2); 98 value = value.rotate_left(31); 99 value = value.wrapping_mul(PRIME_1); 100 hash ^= value; 101 hash = hash.wrapping_mul(PRIME_1); 102 hash.wrapping_add(PRIME_4) 103 } 104 105 hash = mix_one(hash, self.v1); 106 hash = mix_one(hash, self.v2); 107 hash = mix_one(hash, self.v3); 108 hash = mix_one(hash, self.v4); 109 110 hash 111 } 112 } 113 114 impl core::fmt::Debug for XxCore { fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error>115 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> { 116 write!( 117 f, 118 "XxCore {{ {:016x} {:016x} {:016x} {:016x} }}", 119 self.v1, self.v2, self.v3, self.v4 120 ) 121 } 122 } 123 124 #[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] 125 #[derive(Debug, Copy, Clone, Default, PartialEq)] 126 #[repr(align(8))] 127 #[cfg_attr(feature = "serialize", serde(transparent))] 128 struct AlignToU64<T>(T); 129 130 #[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] 131 #[derive(Debug, Copy, Clone, Default, PartialEq)] 132 struct Buffer { 133 #[cfg_attr(feature = "serialize", serde(rename = "buffer"))] 134 data: AlignToU64<[u8; CHUNK_SIZE]>, 135 #[cfg_attr(feature = "serialize", serde(rename = "buffer_usage"))] 136 len: usize, 137 } 138 139 impl Buffer { data(&self) -> &[u8]140 fn data(&self) -> &[u8] { 141 &self.data.0[..self.len] 142 } 143 144 /// Consumes as much of the parameter as it can, returning the unused part. consume<'a>(&mut self, data: &'a [u8]) -> &'a [u8]145 fn consume<'a>(&mut self, data: &'a [u8]) -> &'a [u8] { 146 let to_use = cmp::min(self.available(), data.len()); 147 let (data, remaining) = data.split_at(to_use); 148 self.data.0[self.len..][..to_use].copy_from_slice(data); 149 self.len += to_use; 150 remaining 151 } 152 set_data(&mut self, data: &[u8])153 fn set_data(&mut self, data: &[u8]) { 154 debug_assert!(self.is_empty()); 155 debug_assert!(data.len() < CHUNK_SIZE); 156 self.data.0[..data.len()].copy_from_slice(data); 157 self.len = data.len(); 158 } 159 available(&self) -> usize160 fn available(&self) -> usize { 161 CHUNK_SIZE - self.len 162 } 163 is_empty(&self) -> bool164 fn is_empty(&self) -> bool { 165 self.len == 0 166 } 167 is_full(&self) -> bool168 fn is_full(&self) -> bool { 169 self.len == CHUNK_SIZE 170 } 171 } 172 173 impl XxHash64 { 174 /// Constructs the hash with an initial seed with_seed(seed: u64) -> XxHash64175 pub fn with_seed(seed: u64) -> XxHash64 { 176 XxHash64 { 177 total_len: 0, 178 seed, 179 core: XxCore::with_seed(seed), 180 buffer: Buffer::default(), 181 } 182 } 183 write(&mut self, bytes: &[u8])184 pub(crate) fn write(&mut self, bytes: &[u8]) { 185 let remaining = self.maybe_consume_bytes(bytes); 186 if !remaining.is_empty() { 187 let mut remaining = UnalignedBuffer::new(remaining); 188 self.core.ingest_chunks(&mut remaining); 189 self.buffer.set_data(remaining.remaining()); 190 } 191 self.total_len += bytes.len() as u64; 192 } 193 194 // Consume bytes and try to make `self.buffer` empty. 195 // If there are not enough bytes, `self.buffer` can be non-empty, and this 196 // function returns an empty slice. maybe_consume_bytes<'a>(&mut self, data: &'a [u8]) -> &'a [u8]197 fn maybe_consume_bytes<'a>(&mut self, data: &'a [u8]) -> &'a [u8] { 198 if self.buffer.is_empty() { 199 data 200 } else { 201 let data = self.buffer.consume(data); 202 if self.buffer.is_full() { 203 let mut u64s = UnalignedBuffer::new(self.buffer.data()); 204 self.core.ingest_chunks(&mut u64s); 205 debug_assert!(u64s.remaining().is_empty()); 206 self.buffer.len = 0; 207 } 208 data 209 } 210 } 211 finish(&self) -> u64212 pub(crate) fn finish(&self) -> u64 { 213 let mut hash = if self.total_len >= CHUNK_SIZE as u64 { 214 // We have processed at least one full chunk 215 self.core.finish() 216 } else { 217 self.seed.wrapping_add(PRIME_5) 218 }; 219 220 hash = hash.wrapping_add(self.total_len); 221 222 let mut buffered_u64s = UnalignedBuffer::<u64>::new(self.buffer.data()); 223 for buffered_u64 in &mut buffered_u64s { 224 let mut k1 = buffered_u64.to_le().wrapping_mul(PRIME_2); 225 k1 = k1.rotate_left(31); 226 k1 = k1.wrapping_mul(PRIME_1); 227 hash ^= k1; 228 hash = hash.rotate_left(27); 229 hash = hash.wrapping_mul(PRIME_1); 230 hash = hash.wrapping_add(PRIME_4); 231 } 232 233 let mut buffered_u32s = UnalignedBuffer::<u32>::new(buffered_u64s.remaining()); 234 for buffered_u32 in &mut buffered_u32s { 235 let k1 = u64::from(buffered_u32.to_le()).wrapping_mul(PRIME_1); 236 hash ^= k1; 237 hash = hash.rotate_left(23); 238 hash = hash.wrapping_mul(PRIME_2); 239 hash = hash.wrapping_add(PRIME_3); 240 } 241 242 let buffered_u8s = buffered_u32s.remaining(); 243 for &buffered_u8 in buffered_u8s { 244 let k1 = u64::from(buffered_u8).wrapping_mul(PRIME_5); 245 hash ^= k1; 246 hash = hash.rotate_left(11); 247 hash = hash.wrapping_mul(PRIME_1); 248 } 249 250 // The final intermixing 251 hash ^= hash >> 33; 252 hash = hash.wrapping_mul(PRIME_2); 253 hash ^= hash >> 29; 254 hash = hash.wrapping_mul(PRIME_3); 255 hash ^= hash >> 32; 256 257 hash 258 } 259 seed(&self) -> u64260 pub fn seed(&self) -> u64 { 261 self.seed 262 } 263 total_len(&self) -> u64264 pub fn total_len(&self) -> u64 { 265 self.total_len 266 } 267 } 268 269 impl Default for XxHash64 { default() -> XxHash64270 fn default() -> XxHash64 { 271 XxHash64::with_seed(0) 272 } 273 } 274 275 impl Hasher for XxHash64 { finish(&self) -> u64276 fn finish(&self) -> u64 { 277 XxHash64::finish(self) 278 } 279 write(&mut self, bytes: &[u8])280 fn write(&mut self, bytes: &[u8]) { 281 XxHash64::write(self, bytes) 282 } 283 } 284 285 #[cfg(feature = "std")] 286 pub use crate::std_support::sixty_four::RandomXxHashBuilder64; 287 288 #[cfg(test)] 289 mod test { 290 use super::{RandomXxHashBuilder64, XxHash64}; 291 use std::collections::HashMap; 292 use std::hash::BuildHasherDefault; 293 use std::prelude::v1::*; 294 295 #[test] ingesting_byte_by_byte_is_equivalent_to_large_chunks()296 fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() { 297 let bytes: Vec<_> = (0..32).map(|_| 0).collect(); 298 299 let mut byte_by_byte = XxHash64::with_seed(0); 300 for byte in bytes.chunks(1) { 301 byte_by_byte.write(byte); 302 } 303 304 let mut one_chunk = XxHash64::with_seed(0); 305 one_chunk.write(&bytes); 306 307 assert_eq!(byte_by_byte.core, one_chunk.core); 308 } 309 310 #[test] hash_of_nothing_matches_c_implementation()311 fn hash_of_nothing_matches_c_implementation() { 312 let mut hasher = XxHash64::with_seed(0); 313 hasher.write(&[]); 314 assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999); 315 } 316 317 #[test] hash_of_single_byte_matches_c_implementation()318 fn hash_of_single_byte_matches_c_implementation() { 319 let mut hasher = XxHash64::with_seed(0); 320 hasher.write(&[42]); 321 assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4); 322 } 323 324 #[test] hash_of_multiple_bytes_matches_c_implementation()325 fn hash_of_multiple_bytes_matches_c_implementation() { 326 let mut hasher = XxHash64::with_seed(0); 327 hasher.write(b"Hello, world!\0"); 328 assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f); 329 } 330 331 #[test] hash_of_multiple_chunks_matches_c_implementation()332 fn hash_of_multiple_chunks_matches_c_implementation() { 333 let bytes: Vec<_> = (0..100).collect(); 334 let mut hasher = XxHash64::with_seed(0); 335 hasher.write(&bytes); 336 assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597); 337 } 338 339 #[test] hash_with_different_seed_matches_c_implementation()340 fn hash_with_different_seed_matches_c_implementation() { 341 let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); 342 hasher.write(&[]); 343 assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672); 344 } 345 346 #[test] hash_with_different_seed_and_multiple_chunks_matches_c_implementation()347 fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() { 348 let bytes: Vec<_> = (0..100).collect(); 349 let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); 350 hasher.write(&bytes); 351 assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1); 352 } 353 354 #[test] can_be_used_in_a_hashmap_with_a_default_seed()355 fn can_be_used_in_a_hashmap_with_a_default_seed() { 356 let mut hash: HashMap<_, _, BuildHasherDefault<XxHash64>> = Default::default(); 357 hash.insert(42, "the answer"); 358 assert_eq!(hash.get(&42), Some(&"the answer")); 359 } 360 361 #[test] can_be_used_in_a_hashmap_with_a_random_seed()362 fn can_be_used_in_a_hashmap_with_a_random_seed() { 363 let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default(); 364 hash.insert(42, "the answer"); 365 assert_eq!(hash.get(&42), Some(&"the answer")); 366 } 367 368 #[cfg(feature = "serialize")] 369 type TestResult<T = ()> = Result<T, Box<dyn std::error::Error>>; 370 371 #[cfg(feature = "serialize")] 372 #[test] test_serialization_cycle() -> TestResult373 fn test_serialization_cycle() -> TestResult { 374 let mut hasher = XxHash64::with_seed(0); 375 hasher.write(b"Hello, world!\0"); 376 hasher.finish(); 377 378 let serialized = serde_json::to_string(&hasher)?; 379 let unserialized: XxHash64 = serde_json::from_str(&serialized)?; 380 assert_eq!(hasher, unserialized); 381 Ok(()) 382 } 383 384 #[cfg(feature = "serialize")] 385 #[test] test_serialization_stability() -> TestResult386 fn test_serialization_stability() -> TestResult { 387 let mut hasher = XxHash64::with_seed(0); 388 hasher.write(b"Hello, world!\0"); 389 hasher.finish(); 390 391 let serialized = r#"{ 392 "total_len": 14, 393 "seed": 0, 394 "core": { 395 "v1": 6983438078262162902, 396 "v2": 14029467366897019727, 397 "v3": 0, 398 "v4": 7046029288634856825 399 }, 400 "buffer": [ 401 72, 101, 108, 108, 111, 44, 32, 119, 402 111, 114, 108, 100, 33, 0, 0, 0, 403 0, 0, 0, 0, 0, 0, 0, 0, 404 0, 0, 0, 0, 0, 0, 0, 0 405 ], 406 "buffer_usage": 14 407 }"#; 408 409 let unserialized: XxHash64 = serde_json::from_str(serialized).unwrap(); 410 assert_eq!(hasher, unserialized); 411 Ok(()) 412 } 413 } 414