xref: /aosp_15_r20/external/cronet/base/third_party/cityhash_v103/src/city_v103.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright (c) 2011 Google, Inc.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 //
21 // CityHash, by Geoff Pike and Jyrki Alakuijala
22 //
23 // This file provides CityHash64() and related functions.
24 //
25 // It's probably possible to create even faster hash functions by
26 // writing a program that systematically explores some of the space of
27 // possible hash functions, by using SIMD instructions, or by
28 // compromising on hash quality.
29 
30 #include "base/third_party/cityhash_v103/src/city_v103.h"
31 
32 #include <string.h>  // for memcpy and memset
33 #include <algorithm>
34 
35 #include "base/compiler_specific.h"
36 #include "build/build_config.h"
37 
38 namespace base {
39 namespace internal {
40 namespace cityhash_v103 {
41 
UNALIGNED_LOAD64(const char * p)42 static uint64 UNALIGNED_LOAD64(const char* p) {
43   uint64 result;
44   memcpy(&result, p, sizeof(result));
45   return result;
46 }
47 
UNALIGNED_LOAD32(const char * p)48 static uint32 UNALIGNED_LOAD32(const char* p) {
49   uint32 result;
50   memcpy(&result, p, sizeof(result));
51   return result;
52 }
53 
54 #if defined(ARCH_CPU_LITTLE_ENDIAN)
55 
56 #define uint32_in_expected_order(x) (x)
57 #define uint64_in_expected_order(x) (x)
58 
59 #else
60 
61 #if defined(__clang__)
62 
63 // Use builtins where possible. On Windows for instance, this may prevent a
64 // function call instead of emitting a single instruction.
65 #define bswap_32(x) __builtin_bswap32(x)
66 #define bswap_64(x) __builtin_bswap64(x)
67 
68 #elif _MSC_VER
69 #include <stdlib.h>
70 #define bswap_32(x) _byteswap_ulong(x)
71 #define bswap_64(x) _byteswap_uint64(x)
72 
73 #elif defined(__APPLE__)
74 // Mac OS X / Darwin features
75 #include <libkern/OSByteOrder.h>
76 #define bswap_32(x) OSSwapInt32(x)
77 #define bswap_64(x) OSSwapInt64(x)
78 
79 #else
80 #include <byteswap.h>
81 #endif
82 
83 #define uint32_in_expected_order(x) (bswap_32(x))
84 #define uint64_in_expected_order(x) (bswap_64(x))
85 
86 #endif  // defined(ARCH_CPU_LITTLE_ENDIAN)
87 
88 #if !defined(LIKELY)
89 #if HAVE_BUILTIN_EXPECT
90 #define LIKELY(x) (__builtin_expect(!!(x), 1))
91 #else
92 #define LIKELY(x) (x)
93 #endif
94 #endif
95 
Fetch64(const char * p)96 static uint64 Fetch64(const char* p) {
97   return uint64_in_expected_order(UNALIGNED_LOAD64(p));
98 }
99 
Fetch32(const char * p)100 static uint32 Fetch32(const char* p) {
101   return uint32_in_expected_order(UNALIGNED_LOAD32(p));
102 }
103 
104 // Some primes between 2^63 and 2^64 for various uses.
105 static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
106 static const uint64 k1 = 0xb492b66fbe98f273ULL;
107 static const uint64 k2 = 0x9ae16a3b2f90404fULL;
108 static const uint64 k3 = 0xc949d7c7509e6557ULL;
109 
110 // Bitwise right rotate.  Normally this will compile to a single
111 // instruction, especially if the shift is a manifest constant.
Rotate(uint64 val,int shift)112 static uint64 Rotate(uint64 val, int shift) {
113   // Avoid shifting by 64: doing so yields an undefined result.
114   return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
115 }
116 
117 // Equivalent to Rotate(), but requires the second arg to be non-zero.
118 // On x86-64, and probably others, it's possible for this to compile
119 // to a single instruction if both args are already in registers.
RotateByAtLeast1(uint64 val,size_t shift)120 static uint64 RotateByAtLeast1(uint64 val, size_t shift) {
121   return (val >> shift) | (val << (64 - shift));
122 }
123 
ShiftMix(uint64 val)124 static uint64 ShiftMix(uint64 val) {
125   return val ^ (val >> 47);
126 }
127 
HashLen16(uint64 u,uint64 v)128 static uint64 HashLen16(uint64 u, uint64 v) {
129   return Hash128to64(uint128(u, v));
130 }
131 
HashLen0to16(const char * s,size_t len)132 static uint64 HashLen0to16(const char* s, size_t len) {
133   if (len > 8) {
134     uint64 a = Fetch64(s);
135     uint64 b = Fetch64(s + len - 8);
136     return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b;
137   }
138   if (len >= 4) {
139     uint64 a = Fetch32(s);
140     return HashLen16(len + (a << 3), Fetch32(s + len - 4));
141   }
142   if (len > 0) {
143     uint8 a = static_cast<uint8>(s[0]);
144     uint8 b = static_cast<uint8>(s[len >> 1]);
145     uint8 c = static_cast<uint8>(s[len - 1]);
146     uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
147     uint32 z = static_cast<uint32>(len) + (static_cast<uint32>(c) << 2);
148     return ShiftMix(y * k2 ^ z * k3) * k2;
149   }
150   return k2;
151 }
152 
153 // This probably works well for 16-byte strings as well, but it may be overkill
154 // in that case.
HashLen17to32(const char * s,size_t len)155 static uint64 HashLen17to32(const char* s, size_t len) {
156   uint64 a = Fetch64(s) * k1;
157   uint64 b = Fetch64(s + 8);
158   uint64 c = Fetch64(s + len - 8) * k2;
159   uint64 d = Fetch64(s + len - 16) * k0;
160   return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d,
161                    a + Rotate(b ^ k3, 20) - c + len);
162 }
163 
164 // Return a 16-byte hash for 48 bytes.  Quick and dirty.
165 // Callers do best to use "random-looking" values for a and b.
WeakHashLen32WithSeeds(uint64 w,uint64 x,uint64 y,uint64 z,uint64 a,uint64 b)166 static std::pair<uint64, uint64> WeakHashLen32WithSeeds(uint64 w,
167                                                         uint64 x,
168                                                         uint64 y,
169                                                         uint64 z,
170                                                         uint64 a,
171                                                         uint64 b) {
172   a += w;
173   b = Rotate(b + a + z, 21);
174   uint64 c = a;
175   a += x;
176   a += y;
177   b += Rotate(a, 44);
178   return std::make_pair(a + z, b + c);
179 }
180 
181 // Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
WeakHashLen32WithSeeds(const char * s,uint64 a,uint64 b)182 static std::pair<uint64, uint64> WeakHashLen32WithSeeds(const char* s,
183                                                         uint64 a,
184                                                         uint64 b) {
185   return WeakHashLen32WithSeeds(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16),
186                                 Fetch64(s + 24), a, b);
187 }
188 
189 // Return an 8-byte hash for 33 to 64 bytes.
HashLen33to64(const char * s,size_t len)190 static uint64 HashLen33to64(const char* s, size_t len) {
191   uint64 z = Fetch64(s + 24);
192   uint64 a = Fetch64(s) + (len + Fetch64(s + len - 16)) * k0;
193   uint64 b = Rotate(a + z, 52);
194   uint64 c = Rotate(a, 37);
195   a += Fetch64(s + 8);
196   c += Rotate(a, 7);
197   a += Fetch64(s + 16);
198   uint64 vf = a + z;
199   uint64 vs = b + Rotate(a, 31) + c;
200   a = Fetch64(s + 16) + Fetch64(s + len - 32);
201   z = Fetch64(s + len - 8);
202   b = Rotate(a + z, 52);
203   c = Rotate(a, 37);
204   a += Fetch64(s + len - 24);
205   c += Rotate(a, 7);
206   a += Fetch64(s + len - 16);
207   uint64 wf = a + z;
208   uint64 ws = b + Rotate(a, 31) + c;
209   uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
210   return ShiftMix(r * k0 + vs) * k2;
211 }
212 
CityHash64(const char * s,size_t len)213 uint64 CityHash64(const char* s, size_t len) {
214   if (len <= 32) {
215     if (len <= 16) {
216       return HashLen0to16(s, len);
217     } else {
218       return HashLen17to32(s, len);
219     }
220   } else if (len <= 64) {
221     return HashLen33to64(s, len);
222   }
223 
224   // For strings over 64 bytes we hash the end first, and then as we
225   // loop we keep 56 bytes of state: v, w, x, y, and z.
226   uint64 x = Fetch64(s + len - 40);
227   uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
228   uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
229   std::pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
230   std::pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
231   x = x * k1 + Fetch64(s);
232 
233   // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
234   len = (len - 1) & ~static_cast<size_t>(63);
235   do {
236     x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
237     y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
238     x ^= w.second;
239     y += v.first + Fetch64(s + 40);
240     z = Rotate(z + w.first, 33) * k1;
241     v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
242     w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
243     std::swap(z, x);
244     s += 64;
245     len -= 64;
246   } while (len != 0);
247   return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
248                    HashLen16(v.second, w.second) + x);
249 }
250 
CityHash64WithSeed(const char * s,size_t len,uint64 seed)251 uint64 CityHash64WithSeed(const char* s, size_t len, uint64 seed) {
252   return CityHash64WithSeeds(s, len, k2, seed);
253 }
254 
CityHash64WithSeeds(const char * s,size_t len,uint64 seed0,uint64 seed1)255 uint64 CityHash64WithSeeds(const char* s,
256                            size_t len,
257                            uint64 seed0,
258                            uint64 seed1) {
259   return HashLen16(CityHash64(s, len) - seed0, seed1);
260 }
261 
262 // A subroutine for CityHash128().  Returns a decent 128-bit hash for strings
263 // of any length representable in signed long.  Based on City and Murmur.
CityMurmur(const char * s,size_t len,uint128 seed)264 static uint128 CityMurmur(const char* s, size_t len, uint128 seed) {
265   uint64 a = Uint128Low64(seed);
266   uint64 b = Uint128High64(seed);
267   uint64 c = 0;
268   uint64 d = 0;
269   if (len <= 16) {
270     a = ShiftMix(a * k1) * k1;
271     c = b * k1 + HashLen0to16(s, len);
272     d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
273   } else {
274     c = HashLen16(Fetch64(s + len - 8) + k1, a);
275     d = HashLen16(b + len, c + Fetch64(s + len - 16));
276     a += d;
277     // len > 16 here, so do...while is safe
278     do {
279       a ^= ShiftMix(Fetch64(s) * k1) * k1;
280       a *= k1;
281       b ^= a;
282       c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
283       c *= k1;
284       d ^= c;
285       s += 16;
286       len -= 16;
287     } while (len > 16);
288   }
289   a = HashLen16(a, c);
290   b = HashLen16(d, b);
291   return uint128(a ^ b, HashLen16(b, a));
292 }
293 
CityHash128WithSeed(const char * s,size_t len,uint128 seed)294 uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) {
295   if (len < 128) {
296     return CityMurmur(s, len, seed);
297   }
298 
299   // We expect len >= 128 to be the common case.  Keep 56 bytes of state:
300   // v, w, x, y, and z.
301   std::pair<uint64, uint64> v, w;
302   uint64 x = Uint128Low64(seed);
303   uint64 y = Uint128High64(seed);
304   uint64 z = len * k1;
305   v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
306   v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
307   w.first = Rotate(y + z, 35) * k1 + x;
308   w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
309 
310   // This is the same inner loop as CityHash64(), manually unrolled.
311   do {
312     x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
313     y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
314     x ^= w.second;
315     y += v.first + Fetch64(s + 40);
316     z = Rotate(z + w.first, 33) * k1;
317     v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
318     w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
319     std::swap(z, x);
320     s += 64;
321     x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
322     y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
323     x ^= w.second;
324     y += v.first + Fetch64(s + 40);
325     z = Rotate(z + w.first, 33) * k1;
326     v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
327     w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
328     std::swap(z, x);
329     s += 64;
330     len -= 128;
331   } while (LIKELY(len >= 128));
332   x += Rotate(v.first + z, 49) * k0;
333   z += Rotate(w.first, 37) * k0;
334   // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
335   for (size_t tail_done = 0; tail_done < len;) {
336     tail_done += 32;
337     y = Rotate(x + y, 42) * k0 + v.second;
338     w.first += Fetch64(s + len - tail_done + 16);
339     x = x * k0 + w.first;
340     z += w.second + Fetch64(s + len - tail_done);
341     w.second += v.first;
342     v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
343   }
344   // At this point our 56 bytes of state should contain more than
345   // enough information for a strong 128-bit hash.  We use two
346   // different 56-byte-to-8-byte hashes to get a 16-byte final result.
347   x = HashLen16(x, v.first);
348   y = HashLen16(y + z, w.first);
349   return uint128(HashLen16(x + v.second, w.second) + y,
350                  HashLen16(x + w.second, y + v.second));
351 }
352 
CityHash128(const char * s,size_t len)353 uint128 CityHash128(const char* s, size_t len) {
354   if (len >= 16) {
355     return CityHash128WithSeed(s + 16, len - 16,
356                                uint128(Fetch64(s) ^ k3, Fetch64(s + 8)));
357   } else if (len >= 8) {
358     return CityHash128WithSeed(
359         nullptr, 0,
360         uint128(Fetch64(s) ^ (len * k0), Fetch64(s + len - 8) ^ k1));
361   } else {
362     return CityHash128WithSeed(s, len, uint128(k0, k1));
363   }
364 }
365 
366 }  // namespace cityhash_v103
367 }  // namespace internal
368 }  // namespace base
369