1 use crate::UnalignedBuffer;
2 use core::{cmp, hash::Hasher};
3 
4 #[cfg(feature = "serialize")]
5 use serde::{Deserialize, Serialize};
6 
7 const CHUNK_SIZE: usize = 32;
8 
9 pub const PRIME_1: u64 = 11_400_714_785_074_694_791;
10 pub const PRIME_2: u64 = 14_029_467_366_897_019_727;
11 pub const PRIME_3: u64 = 1_609_587_929_392_839_161;
12 pub const PRIME_4: u64 = 9_650_029_242_287_828_579;
13 pub const PRIME_5: u64 = 2_870_177_450_012_600_261;
14 
15 #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
16 #[derive(Copy, Clone, PartialEq)]
17 struct XxCore {
18     v1: u64,
19     v2: u64,
20     v3: u64,
21     v4: u64,
22 }
23 
24 /// Calculates the 64-bit hash.
25 #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
26 #[derive(Debug, Copy, Clone, PartialEq)]
27 pub struct XxHash64 {
28     total_len: u64,
29     seed: u64,
30     core: XxCore,
31     #[cfg_attr(feature = "serialize", serde(flatten))]
32     buffer: Buffer,
33 }
34 
35 impl XxCore {
with_seed(seed: u64) -> XxCore36     fn with_seed(seed: u64) -> XxCore {
37         XxCore {
38             v1: seed.wrapping_add(PRIME_1).wrapping_add(PRIME_2),
39             v2: seed.wrapping_add(PRIME_2),
40             v3: seed,
41             v4: seed.wrapping_sub(PRIME_1),
42         }
43     }
44 
45     #[inline(always)]
ingest_chunks<I>(&mut self, values: I) where I: IntoIterator<Item = [u64; 4]>,46     fn ingest_chunks<I>(&mut self, values: I)
47     where
48         I: IntoIterator<Item = [u64; 4]>,
49     {
50         #[inline(always)]
ingest_one_number(mut current_value: u64, mut value: u64) -> u6451         fn ingest_one_number(mut current_value: u64, mut value: u64) -> u64 {
52             value = value.wrapping_mul(PRIME_2);
53             current_value = current_value.wrapping_add(value);
54             current_value = current_value.rotate_left(31);
55             current_value.wrapping_mul(PRIME_1)
56         }
57 
58         // By drawing these out, we can avoid going back and forth to
59         // memory. It only really helps for large files, when we need
60         // to iterate multiple times here.
61 
62         let mut v1 = self.v1;
63         let mut v2 = self.v2;
64         let mut v3 = self.v3;
65         let mut v4 = self.v4;
66 
67         for [n1, n2, n3, n4] in values {
68             v1 = ingest_one_number(v1, n1.to_le());
69             v2 = ingest_one_number(v2, n2.to_le());
70             v3 = ingest_one_number(v3, n3.to_le());
71             v4 = ingest_one_number(v4, n4.to_le());
72         }
73 
74         self.v1 = v1;
75         self.v2 = v2;
76         self.v3 = v3;
77         self.v4 = v4;
78     }
79 
80     #[inline(always)]
finish(&self) -> u6481     fn finish(&self) -> u64 {
82         // The original code pulls out local vars for v[1234]
83         // here. Performance tests did not show that to be effective
84         // here, presumably because this method is not called in a
85         // tight loop.
86 
87         #[allow(unknown_lints, clippy::needless_late_init)] // keeping things parallel
88         let mut hash;
89 
90         hash = self.v1.rotate_left(1);
91         hash = hash.wrapping_add(self.v2.rotate_left(7));
92         hash = hash.wrapping_add(self.v3.rotate_left(12));
93         hash = hash.wrapping_add(self.v4.rotate_left(18));
94 
95         #[inline(always)]
96         fn mix_one(mut hash: u64, mut value: u64) -> u64 {
97             value = value.wrapping_mul(PRIME_2);
98             value = value.rotate_left(31);
99             value = value.wrapping_mul(PRIME_1);
100             hash ^= value;
101             hash = hash.wrapping_mul(PRIME_1);
102             hash.wrapping_add(PRIME_4)
103         }
104 
105         hash = mix_one(hash, self.v1);
106         hash = mix_one(hash, self.v2);
107         hash = mix_one(hash, self.v3);
108         hash = mix_one(hash, self.v4);
109 
110         hash
111     }
112 }
113 
114 impl core::fmt::Debug for XxCore {
fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error>115     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> {
116         write!(
117             f,
118             "XxCore {{ {:016x} {:016x} {:016x} {:016x} }}",
119             self.v1, self.v2, self.v3, self.v4
120         )
121     }
122 }
123 
124 #[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))]
125 #[derive(Debug, Copy, Clone, Default, PartialEq)]
126 #[repr(align(8))]
127 #[cfg_attr(feature = "serialize", serde(transparent))]
128 struct AlignToU64<T>(T);
129 
130 #[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))]
131 #[derive(Debug, Copy, Clone, Default, PartialEq)]
132 struct Buffer {
133     #[cfg_attr(feature = "serialize", serde(rename = "buffer"))]
134     data: AlignToU64<[u8; CHUNK_SIZE]>,
135     #[cfg_attr(feature = "serialize", serde(rename = "buffer_usage"))]
136     len: usize,
137 }
138 
139 impl Buffer {
data(&self) -> &[u8]140     fn data(&self) -> &[u8] {
141         &self.data.0[..self.len]
142     }
143 
144     /// Consumes as much of the parameter as it can, returning the unused part.
consume<'a>(&mut self, data: &'a [u8]) -> &'a [u8]145     fn consume<'a>(&mut self, data: &'a [u8]) -> &'a [u8] {
146         let to_use = cmp::min(self.available(), data.len());
147         let (data, remaining) = data.split_at(to_use);
148         self.data.0[self.len..][..to_use].copy_from_slice(data);
149         self.len += to_use;
150         remaining
151     }
152 
set_data(&mut self, data: &[u8])153     fn set_data(&mut self, data: &[u8]) {
154         debug_assert!(self.is_empty());
155         debug_assert!(data.len() < CHUNK_SIZE);
156         self.data.0[..data.len()].copy_from_slice(data);
157         self.len = data.len();
158     }
159 
available(&self) -> usize160     fn available(&self) -> usize {
161         CHUNK_SIZE - self.len
162     }
163 
is_empty(&self) -> bool164     fn is_empty(&self) -> bool {
165         self.len == 0
166     }
167 
is_full(&self) -> bool168     fn is_full(&self) -> bool {
169         self.len == CHUNK_SIZE
170     }
171 }
172 
173 impl XxHash64 {
174     /// Constructs the hash with an initial seed
with_seed(seed: u64) -> XxHash64175     pub fn with_seed(seed: u64) -> XxHash64 {
176         XxHash64 {
177             total_len: 0,
178             seed,
179             core: XxCore::with_seed(seed),
180             buffer: Buffer::default(),
181         }
182     }
183 
write(&mut self, bytes: &[u8])184     pub(crate) fn write(&mut self, bytes: &[u8]) {
185         let remaining = self.maybe_consume_bytes(bytes);
186         if !remaining.is_empty() {
187             let mut remaining = UnalignedBuffer::new(remaining);
188             self.core.ingest_chunks(&mut remaining);
189             self.buffer.set_data(remaining.remaining());
190         }
191         self.total_len += bytes.len() as u64;
192     }
193 
194     // Consume bytes and try to make `self.buffer` empty.
195     // If there are not enough bytes, `self.buffer` can be non-empty, and this
196     // function returns an empty slice.
maybe_consume_bytes<'a>(&mut self, data: &'a [u8]) -> &'a [u8]197     fn maybe_consume_bytes<'a>(&mut self, data: &'a [u8]) -> &'a [u8] {
198         if self.buffer.is_empty() {
199             data
200         } else {
201             let data = self.buffer.consume(data);
202             if self.buffer.is_full() {
203                 let mut u64s = UnalignedBuffer::new(self.buffer.data());
204                 self.core.ingest_chunks(&mut u64s);
205                 debug_assert!(u64s.remaining().is_empty());
206                 self.buffer.len = 0;
207             }
208             data
209         }
210     }
211 
finish(&self) -> u64212     pub(crate) fn finish(&self) -> u64 {
213         let mut hash = if self.total_len >= CHUNK_SIZE as u64 {
214             // We have processed at least one full chunk
215             self.core.finish()
216         } else {
217             self.seed.wrapping_add(PRIME_5)
218         };
219 
220         hash = hash.wrapping_add(self.total_len);
221 
222         let mut buffered_u64s = UnalignedBuffer::<u64>::new(self.buffer.data());
223         for buffered_u64 in &mut buffered_u64s {
224             let mut k1 = buffered_u64.to_le().wrapping_mul(PRIME_2);
225             k1 = k1.rotate_left(31);
226             k1 = k1.wrapping_mul(PRIME_1);
227             hash ^= k1;
228             hash = hash.rotate_left(27);
229             hash = hash.wrapping_mul(PRIME_1);
230             hash = hash.wrapping_add(PRIME_4);
231         }
232 
233         let mut buffered_u32s = UnalignedBuffer::<u32>::new(buffered_u64s.remaining());
234         for buffered_u32 in &mut buffered_u32s {
235             let k1 = u64::from(buffered_u32.to_le()).wrapping_mul(PRIME_1);
236             hash ^= k1;
237             hash = hash.rotate_left(23);
238             hash = hash.wrapping_mul(PRIME_2);
239             hash = hash.wrapping_add(PRIME_3);
240         }
241 
242         let buffered_u8s = buffered_u32s.remaining();
243         for &buffered_u8 in buffered_u8s {
244             let k1 = u64::from(buffered_u8).wrapping_mul(PRIME_5);
245             hash ^= k1;
246             hash = hash.rotate_left(11);
247             hash = hash.wrapping_mul(PRIME_1);
248         }
249 
250         // The final intermixing
251         hash ^= hash >> 33;
252         hash = hash.wrapping_mul(PRIME_2);
253         hash ^= hash >> 29;
254         hash = hash.wrapping_mul(PRIME_3);
255         hash ^= hash >> 32;
256 
257         hash
258     }
259 
seed(&self) -> u64260     pub fn seed(&self) -> u64 {
261         self.seed
262     }
263 
total_len(&self) -> u64264     pub fn total_len(&self) -> u64 {
265         self.total_len
266     }
267 }
268 
269 impl Default for XxHash64 {
default() -> XxHash64270     fn default() -> XxHash64 {
271         XxHash64::with_seed(0)
272     }
273 }
274 
275 impl Hasher for XxHash64 {
finish(&self) -> u64276     fn finish(&self) -> u64 {
277         XxHash64::finish(self)
278     }
279 
write(&mut self, bytes: &[u8])280     fn write(&mut self, bytes: &[u8]) {
281         XxHash64::write(self, bytes)
282     }
283 }
284 
285 #[cfg(feature = "std")]
286 pub use crate::std_support::sixty_four::RandomXxHashBuilder64;
287 
288 #[cfg(test)]
289 mod test {
290     use super::{RandomXxHashBuilder64, XxHash64};
291     use std::collections::HashMap;
292     use std::hash::BuildHasherDefault;
293     use std::prelude::v1::*;
294 
295     #[test]
ingesting_byte_by_byte_is_equivalent_to_large_chunks()296     fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() {
297         let bytes: Vec<_> = (0..32).map(|_| 0).collect();
298 
299         let mut byte_by_byte = XxHash64::with_seed(0);
300         for byte in bytes.chunks(1) {
301             byte_by_byte.write(byte);
302         }
303 
304         let mut one_chunk = XxHash64::with_seed(0);
305         one_chunk.write(&bytes);
306 
307         assert_eq!(byte_by_byte.core, one_chunk.core);
308     }
309 
310     #[test]
hash_of_nothing_matches_c_implementation()311     fn hash_of_nothing_matches_c_implementation() {
312         let mut hasher = XxHash64::with_seed(0);
313         hasher.write(&[]);
314         assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999);
315     }
316 
317     #[test]
hash_of_single_byte_matches_c_implementation()318     fn hash_of_single_byte_matches_c_implementation() {
319         let mut hasher = XxHash64::with_seed(0);
320         hasher.write(&[42]);
321         assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4);
322     }
323 
324     #[test]
hash_of_multiple_bytes_matches_c_implementation()325     fn hash_of_multiple_bytes_matches_c_implementation() {
326         let mut hasher = XxHash64::with_seed(0);
327         hasher.write(b"Hello, world!\0");
328         assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f);
329     }
330 
331     #[test]
hash_of_multiple_chunks_matches_c_implementation()332     fn hash_of_multiple_chunks_matches_c_implementation() {
333         let bytes: Vec<_> = (0..100).collect();
334         let mut hasher = XxHash64::with_seed(0);
335         hasher.write(&bytes);
336         assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597);
337     }
338 
339     #[test]
hash_with_different_seed_matches_c_implementation()340     fn hash_with_different_seed_matches_c_implementation() {
341         let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
342         hasher.write(&[]);
343         assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672);
344     }
345 
346     #[test]
hash_with_different_seed_and_multiple_chunks_matches_c_implementation()347     fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() {
348         let bytes: Vec<_> = (0..100).collect();
349         let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
350         hasher.write(&bytes);
351         assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1);
352     }
353 
354     #[test]
can_be_used_in_a_hashmap_with_a_default_seed()355     fn can_be_used_in_a_hashmap_with_a_default_seed() {
356         let mut hash: HashMap<_, _, BuildHasherDefault<XxHash64>> = Default::default();
357         hash.insert(42, "the answer");
358         assert_eq!(hash.get(&42), Some(&"the answer"));
359     }
360 
361     #[test]
can_be_used_in_a_hashmap_with_a_random_seed()362     fn can_be_used_in_a_hashmap_with_a_random_seed() {
363         let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default();
364         hash.insert(42, "the answer");
365         assert_eq!(hash.get(&42), Some(&"the answer"));
366     }
367 
368     #[cfg(feature = "serialize")]
369     type TestResult<T = ()> = Result<T, Box<dyn std::error::Error>>;
370 
371     #[cfg(feature = "serialize")]
372     #[test]
test_serialization_cycle() -> TestResult373     fn test_serialization_cycle() -> TestResult {
374         let mut hasher = XxHash64::with_seed(0);
375         hasher.write(b"Hello, world!\0");
376         hasher.finish();
377 
378         let serialized = serde_json::to_string(&hasher)?;
379         let unserialized: XxHash64 = serde_json::from_str(&serialized)?;
380         assert_eq!(hasher, unserialized);
381         Ok(())
382     }
383 
384     #[cfg(feature = "serialize")]
385     #[test]
test_serialization_stability() -> TestResult386     fn test_serialization_stability() -> TestResult {
387         let mut hasher = XxHash64::with_seed(0);
388         hasher.write(b"Hello, world!\0");
389         hasher.finish();
390 
391         let serialized = r#"{
392             "total_len": 14,
393             "seed": 0,
394             "core": {
395               "v1": 6983438078262162902,
396               "v2": 14029467366897019727,
397               "v3": 0,
398               "v4": 7046029288634856825
399             },
400             "buffer": [
401               72,  101, 108, 108, 111, 44, 32, 119,
402               111, 114, 108, 100, 33,  0,  0,  0,
403               0,   0,   0,   0,   0,   0,  0,  0,
404               0,   0,   0,   0,   0,   0,  0,  0
405             ],
406             "buffer_usage": 14
407         }"#;
408 
409         let unserialized: XxHash64 = serde_json::from_str(serialized).unwrap();
410         assert_eq!(hasher, unserialized);
411         Ok(())
412     }
413 }
414