xref: /aosp_15_r20/external/cronet/base/hash/hash.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1*6777b538SAndroid Build Coastguard Worker // Copyright 2014 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker 
5*6777b538SAndroid Build Coastguard Worker #include "base/hash/hash.h"
6*6777b538SAndroid Build Coastguard Worker 
7*6777b538SAndroid Build Coastguard Worker #include <string_view>
8*6777b538SAndroid Build Coastguard Worker 
9*6777b538SAndroid Build Coastguard Worker #include "base/check_op.h"
10*6777b538SAndroid Build Coastguard Worker #include "base/notreached.h"
11*6777b538SAndroid Build Coastguard Worker #include "base/rand_util.h"
12*6777b538SAndroid Build Coastguard Worker #include "base/third_party/cityhash/city.h"
13*6777b538SAndroid Build Coastguard Worker #include "build/build_config.h"
14*6777b538SAndroid Build Coastguard Worker 
15*6777b538SAndroid Build Coastguard Worker // Definition in base/third_party/superfasthash/superfasthash.c. (Third-party
16*6777b538SAndroid Build Coastguard Worker // code did not come with its own header file, so declaring the function here.)
17*6777b538SAndroid Build Coastguard Worker // Note: This algorithm is also in Blink under Source/wtf/StringHasher.h.
18*6777b538SAndroid Build Coastguard Worker extern "C" uint32_t SuperFastHash(const char* data, int len);
19*6777b538SAndroid Build Coastguard Worker 
20*6777b538SAndroid Build Coastguard Worker namespace base {
21*6777b538SAndroid Build Coastguard Worker 
22*6777b538SAndroid Build Coastguard Worker namespace {
23*6777b538SAndroid Build Coastguard Worker 
FastHashImpl(base::span<const uint8_t> data)24*6777b538SAndroid Build Coastguard Worker size_t FastHashImpl(base::span<const uint8_t> data) {
25*6777b538SAndroid Build Coastguard Worker   // We use the updated CityHash within our namespace (not the deprecated
26*6777b538SAndroid Build Coastguard Worker   // version from third_party/smhasher).
27*6777b538SAndroid Build Coastguard Worker   if constexpr (sizeof(size_t) > 4) {
28*6777b538SAndroid Build Coastguard Worker     return base::internal::cityhash_v111::CityHash64(
29*6777b538SAndroid Build Coastguard Worker         reinterpret_cast<const char*>(data.data()), data.size());
30*6777b538SAndroid Build Coastguard Worker   } else {
31*6777b538SAndroid Build Coastguard Worker     return base::internal::cityhash_v111::CityHash32(
32*6777b538SAndroid Build Coastguard Worker         reinterpret_cast<const char*>(data.data()), data.size());
33*6777b538SAndroid Build Coastguard Worker   }
34*6777b538SAndroid Build Coastguard Worker }
35*6777b538SAndroid Build Coastguard Worker 
36*6777b538SAndroid Build Coastguard Worker // Implement hashing for pairs of at-most 32 bit integer values.
37*6777b538SAndroid Build Coastguard Worker // When size_t is 32 bits, we turn the 64-bit hash code into 32 bits by using
38*6777b538SAndroid Build Coastguard Worker // multiply-add hashing. This algorithm, as described in
39*6777b538SAndroid Build Coastguard Worker // Theorem 4.3.3 of the thesis "Über die Komplexität der Multiplikation in
40*6777b538SAndroid Build Coastguard Worker // eingeschränkten Branchingprogrammmodellen" by Woelfel, is:
41*6777b538SAndroid Build Coastguard Worker //
42*6777b538SAndroid Build Coastguard Worker //   h32(x32, y32) = (h64(x32, y32) * rand_odd64 + rand16 * 2^16) % 2^64 / 2^32
43*6777b538SAndroid Build Coastguard Worker //
44*6777b538SAndroid Build Coastguard Worker // Contact [email protected] for any questions.
HashInts32Impl(uint32_t value1,uint32_t value2)45*6777b538SAndroid Build Coastguard Worker size_t HashInts32Impl(uint32_t value1, uint32_t value2) {
46*6777b538SAndroid Build Coastguard Worker   uint64_t value1_64 = value1;
47*6777b538SAndroid Build Coastguard Worker   uint64_t hash64 = (value1_64 << 32) | value2;
48*6777b538SAndroid Build Coastguard Worker 
49*6777b538SAndroid Build Coastguard Worker   if (sizeof(size_t) >= sizeof(uint64_t))
50*6777b538SAndroid Build Coastguard Worker     return static_cast<size_t>(hash64);
51*6777b538SAndroid Build Coastguard Worker 
52*6777b538SAndroid Build Coastguard Worker   uint64_t odd_random = 481046412LL << 32 | 1025306955LL;
53*6777b538SAndroid Build Coastguard Worker   uint32_t shift_random = 10121U << 16;
54*6777b538SAndroid Build Coastguard Worker 
55*6777b538SAndroid Build Coastguard Worker   hash64 = hash64 * odd_random + shift_random;
56*6777b538SAndroid Build Coastguard Worker   size_t high_bits =
57*6777b538SAndroid Build Coastguard Worker       static_cast<size_t>(hash64 >> (8 * (sizeof(uint64_t) - sizeof(size_t))));
58*6777b538SAndroid Build Coastguard Worker   return high_bits;
59*6777b538SAndroid Build Coastguard Worker }
60*6777b538SAndroid Build Coastguard Worker 
61*6777b538SAndroid Build Coastguard Worker // Implement hashing for pairs of up-to 64-bit integer values.
62*6777b538SAndroid Build Coastguard Worker // We use the compound integer hash method to produce a 64-bit hash code, by
63*6777b538SAndroid Build Coastguard Worker // breaking the two 64-bit inputs into 4 32-bit values:
64*6777b538SAndroid Build Coastguard Worker // http://opendatastructures.org/versions/edition-0.1d/ods-java/node33.html#SECTION00832000000000000000
65*6777b538SAndroid Build Coastguard Worker // Then we reduce our result to 32 bits if required, similar to above.
HashInts64Impl(uint64_t value1,uint64_t value2)66*6777b538SAndroid Build Coastguard Worker size_t HashInts64Impl(uint64_t value1, uint64_t value2) {
67*6777b538SAndroid Build Coastguard Worker   uint32_t short_random1 = 842304669U;
68*6777b538SAndroid Build Coastguard Worker   uint32_t short_random2 = 619063811U;
69*6777b538SAndroid Build Coastguard Worker   uint32_t short_random3 = 937041849U;
70*6777b538SAndroid Build Coastguard Worker   uint32_t short_random4 = 3309708029U;
71*6777b538SAndroid Build Coastguard Worker 
72*6777b538SAndroid Build Coastguard Worker   uint32_t value1a = static_cast<uint32_t>(value1 & 0xffffffff);
73*6777b538SAndroid Build Coastguard Worker   uint32_t value1b = static_cast<uint32_t>((value1 >> 32) & 0xffffffff);
74*6777b538SAndroid Build Coastguard Worker   uint32_t value2a = static_cast<uint32_t>(value2 & 0xffffffff);
75*6777b538SAndroid Build Coastguard Worker   uint32_t value2b = static_cast<uint32_t>((value2 >> 32) & 0xffffffff);
76*6777b538SAndroid Build Coastguard Worker 
77*6777b538SAndroid Build Coastguard Worker   uint64_t product1 = static_cast<uint64_t>(value1a) * short_random1;
78*6777b538SAndroid Build Coastguard Worker   uint64_t product2 = static_cast<uint64_t>(value1b) * short_random2;
79*6777b538SAndroid Build Coastguard Worker   uint64_t product3 = static_cast<uint64_t>(value2a) * short_random3;
80*6777b538SAndroid Build Coastguard Worker   uint64_t product4 = static_cast<uint64_t>(value2b) * short_random4;
81*6777b538SAndroid Build Coastguard Worker 
82*6777b538SAndroid Build Coastguard Worker   uint64_t hash64 = product1 + product2 + product3 + product4;
83*6777b538SAndroid Build Coastguard Worker 
84*6777b538SAndroid Build Coastguard Worker   if (sizeof(size_t) >= sizeof(uint64_t))
85*6777b538SAndroid Build Coastguard Worker     return static_cast<size_t>(hash64);
86*6777b538SAndroid Build Coastguard Worker 
87*6777b538SAndroid Build Coastguard Worker   uint64_t odd_random = 1578233944LL << 32 | 194370989LL;
88*6777b538SAndroid Build Coastguard Worker   uint32_t shift_random = 20591U << 16;
89*6777b538SAndroid Build Coastguard Worker 
90*6777b538SAndroid Build Coastguard Worker   hash64 = hash64 * odd_random + shift_random;
91*6777b538SAndroid Build Coastguard Worker   size_t high_bits =
92*6777b538SAndroid Build Coastguard Worker       static_cast<size_t>(hash64 >> (8 * (sizeof(uint64_t) - sizeof(size_t))));
93*6777b538SAndroid Build Coastguard Worker   return high_bits;
94*6777b538SAndroid Build Coastguard Worker }
95*6777b538SAndroid Build Coastguard Worker 
96*6777b538SAndroid Build Coastguard Worker // The random seed is used to perturb the output of base::FastHash() and
97*6777b538SAndroid Build Coastguard Worker // base::HashInts() so that it is only deterministic within the lifetime of a
98*6777b538SAndroid Build Coastguard Worker // process. This prevents inadvertent dependencies on the underlying
99*6777b538SAndroid Build Coastguard Worker // implementation, e.g. anything that persists the hash value and expects it to
100*6777b538SAndroid Build Coastguard Worker // be unchanging will break.
101*6777b538SAndroid Build Coastguard Worker //
102*6777b538SAndroid Build Coastguard Worker // Note: this is the same trick absl uses to generate a random seed. This is
103*6777b538SAndroid Build Coastguard Worker // more robust than using base::RandBytes(), which can fail inside a sandboxed
104*6777b538SAndroid Build Coastguard Worker // environment. Note that without ASLR, the seed won't be quite as random...
105*6777b538SAndroid Build Coastguard Worker #if DCHECK_IS_ON()
106*6777b538SAndroid Build Coastguard Worker constexpr const void* kSeed = &kSeed;
107*6777b538SAndroid Build Coastguard Worker #endif
108*6777b538SAndroid Build Coastguard Worker 
109*6777b538SAndroid Build Coastguard Worker template <typename T>
Scramble(T input)110*6777b538SAndroid Build Coastguard Worker T Scramble(T input) {
111*6777b538SAndroid Build Coastguard Worker #if DCHECK_IS_ON()
112*6777b538SAndroid Build Coastguard Worker   return HashInts64Impl(input, reinterpret_cast<uintptr_t>(kSeed));
113*6777b538SAndroid Build Coastguard Worker #else
114*6777b538SAndroid Build Coastguard Worker   return input;
115*6777b538SAndroid Build Coastguard Worker #endif
116*6777b538SAndroid Build Coastguard Worker }
117*6777b538SAndroid Build Coastguard Worker 
118*6777b538SAndroid Build Coastguard Worker }  // namespace
119*6777b538SAndroid Build Coastguard Worker 
FastHash(base::span<const uint8_t> data)120*6777b538SAndroid Build Coastguard Worker size_t FastHash(base::span<const uint8_t> data) {
121*6777b538SAndroid Build Coastguard Worker   return Scramble(FastHashImpl(data));
122*6777b538SAndroid Build Coastguard Worker }
123*6777b538SAndroid Build Coastguard Worker 
Hash(base::span<const uint8_t> data)124*6777b538SAndroid Build Coastguard Worker uint32_t Hash(base::span<const uint8_t> data) {
125*6777b538SAndroid Build Coastguard Worker   // Currently our in-memory hash is the same as the persistent hash. The
126*6777b538SAndroid Build Coastguard Worker   // split between in-memory and persistent hash functions is maintained to
127*6777b538SAndroid Build Coastguard Worker   // allow the in-memory hash function to be updated in the future.
128*6777b538SAndroid Build Coastguard Worker   return PersistentHash(data);
129*6777b538SAndroid Build Coastguard Worker }
130*6777b538SAndroid Build Coastguard Worker 
Hash(const std::string & str)131*6777b538SAndroid Build Coastguard Worker uint32_t Hash(const std::string& str) {
132*6777b538SAndroid Build Coastguard Worker   return PersistentHash(as_byte_span(str));
133*6777b538SAndroid Build Coastguard Worker }
134*6777b538SAndroid Build Coastguard Worker 
PersistentHash(span<const uint8_t> data)135*6777b538SAndroid Build Coastguard Worker uint32_t PersistentHash(span<const uint8_t> data) {
136*6777b538SAndroid Build Coastguard Worker   // This hash function must not change, since it is designed to be persistable
137*6777b538SAndroid Build Coastguard Worker   // to disk.
138*6777b538SAndroid Build Coastguard Worker   if (data.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
139*6777b538SAndroid Build Coastguard Worker     NOTREACHED();
140*6777b538SAndroid Build Coastguard Worker     return 0;
141*6777b538SAndroid Build Coastguard Worker   }
142*6777b538SAndroid Build Coastguard Worker   return ::SuperFastHash(reinterpret_cast<const char*>(data.data()),
143*6777b538SAndroid Build Coastguard Worker                          static_cast<int>(data.size()));
144*6777b538SAndroid Build Coastguard Worker }
145*6777b538SAndroid Build Coastguard Worker 
PersistentHash(const void * data,size_t length)146*6777b538SAndroid Build Coastguard Worker uint32_t PersistentHash(const void* data, size_t length) {
147*6777b538SAndroid Build Coastguard Worker   return PersistentHash(make_span(static_cast<const uint8_t*>(data), length));
148*6777b538SAndroid Build Coastguard Worker }
149*6777b538SAndroid Build Coastguard Worker 
PersistentHash(std::string_view str)150*6777b538SAndroid Build Coastguard Worker uint32_t PersistentHash(std::string_view str) {
151*6777b538SAndroid Build Coastguard Worker   return PersistentHash(as_bytes(make_span(str)));
152*6777b538SAndroid Build Coastguard Worker }
153*6777b538SAndroid Build Coastguard Worker 
HashInts32(uint32_t value1,uint32_t value2)154*6777b538SAndroid Build Coastguard Worker size_t HashInts32(uint32_t value1, uint32_t value2) {
155*6777b538SAndroid Build Coastguard Worker   return Scramble(HashInts32Impl(value1, value2));
156*6777b538SAndroid Build Coastguard Worker }
157*6777b538SAndroid Build Coastguard Worker 
HashInts64(uint64_t value1,uint64_t value2)158*6777b538SAndroid Build Coastguard Worker size_t HashInts64(uint64_t value1, uint64_t value2) {
159*6777b538SAndroid Build Coastguard Worker   return Scramble(HashInts64Impl(value1, value2));
160*6777b538SAndroid Build Coastguard Worker }
161*6777b538SAndroid Build Coastguard Worker 
162*6777b538SAndroid Build Coastguard Worker }  // namespace base
163