xref: /aosp_15_r20/external/icing/icing/text_classifier/lib3/utils/hash/farmhash.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_TEXT_CLASSIFIER_LIB3_UTILS_HASH_FARMHASH_H_
16 #define ICING_TEXT_CLASSIFIER_LIB3_UTILS_HASH_FARMHASH_H_
17 
18 #include <assert.h>
19 #include <stdint.h>
20 #include <stdlib.h>
21 #include <string.h>   // for memcpy and memset
22 #include <utility>
23 
24 #ifndef NAMESPACE_FOR_HASH_FUNCTIONS
25 #define NAMESPACE_FOR_HASH_FUNCTIONS tc3farmhash
26 #endif
27 
28 namespace NAMESPACE_FOR_HASH_FUNCTIONS {
29 
30 #if defined(FARMHASH_UINT128_T_DEFINED)
Uint128Low64(const uint128_t x)31 inline uint64_t Uint128Low64(const uint128_t x) {
32   return static_cast<uint64_t>(x);
33 }
Uint128High64(const uint128_t x)34 inline uint64_t Uint128High64(const uint128_t x) {
35   return static_cast<uint64_t>(x >> 64);
36 }
Uint128(uint64_t lo,uint64_t hi)37 inline uint128_t Uint128(uint64_t lo, uint64_t hi) {
38   return lo + (((uint128_t)hi) << 64);
39 }
40 #else
41 typedef std::pair<uint64_t, uint64_t> uint128_t;
42 inline uint64_t Uint128Low64(const uint128_t x) { return x.first; }
43 inline uint64_t Uint128High64(const uint128_t x) { return x.second; }
44 inline uint128_t Uint128(uint64_t lo, uint64_t hi) { return uint128_t(lo, hi); }
45 #endif
46 
47 
48 // BASIC STRING HASHING
49 
50 // Hash function for a byte array.
51 // May change from time to time, may differ on different platforms, may differ
52 // depending on NDEBUG.
53 size_t Hash(const char* s, size_t len);
54 
55 // Hash function for a byte array.  Most useful in 32-bit binaries.
56 // May change from time to time, may differ on different platforms, may differ
57 // depending on NDEBUG.
58 uint32_t Hash32(const char* s, size_t len);
59 
60 // Hash function for a byte array.  For convenience, a 32-bit seed is also
61 // hashed into the result.
62 // May change from time to time, may differ on different platforms, may differ
63 // depending on NDEBUG.
64 uint32_t Hash32WithSeed(const char* s, size_t len, uint32_t seed);
65 
66 // Hash 128 input bits down to 64 bits of output.
67 // Hash function for a byte array.
68 // May change from time to time, may differ on different platforms, may differ
69 // depending on NDEBUG.
70 uint64_t Hash64(const char* s, size_t len);
71 
72 // Hash function for a byte array.  For convenience, a 64-bit seed is also
73 // hashed into the result.
74 // May change from time to time, may differ on different platforms, may differ
75 // depending on NDEBUG.
76 uint64_t Hash64WithSeed(const char* s, size_t len, uint64_t seed);
77 
78 // Hash function for a byte array.  For convenience, two seeds are also
79 // hashed into the result.
80 // May change from time to time, may differ on different platforms, may differ
81 // depending on NDEBUG.
82 uint64_t Hash64WithSeeds(const char* s, size_t len,
83                        uint64_t seed0, uint64_t seed1);
84 
85 // Hash function for a byte array.
86 // May change from time to time, may differ on different platforms, may differ
87 // depending on NDEBUG.
88 uint128_t Hash128(const char* s, size_t len);
89 
90 // Hash function for a byte array.  For convenience, a 128-bit seed is also
91 // hashed into the result.
92 // May change from time to time, may differ on different platforms, may differ
93 // depending on NDEBUG.
94 uint128_t Hash128WithSeed(const char* s, size_t len, uint128_t seed);
95 
96 // BASIC NON-STRING HASHING
97 
98 // This is intended to be a reasonably good hash function.
99 // May change from time to time, may differ on different platforms, may differ
100 // depending on NDEBUG.
Hash128to64(uint128_t x)101 inline uint64_t Hash128to64(uint128_t x) {
102   // Murmur-inspired hashing.
103   const uint64_t kMul = 0x9ddfea08eb382d69ULL;
104   uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
105   a ^= (a >> 47);
106   uint64_t b = (Uint128High64(x) ^ a) * kMul;
107   b ^= (b >> 47);
108   b *= kMul;
109   return b;
110 }
111 
112 // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
113 
114 // Fingerprint function for a byte array.  Most useful in 32-bit binaries.
115 uint32_t Fingerprint32(const char* s, size_t len);
116 
117 // Fingerprint function for a byte array.
118 uint64_t Fingerprint64(const char* s, size_t len);
119 
120 // Fingerprint function for a byte array.
121 uint128_t Fingerprint128(const char* s, size_t len);
122 
123 // This is intended to be a good fingerprinting primitive.
124 // See below for more overloads.
Fingerprint(uint128_t x)125 inline uint64_t Fingerprint(uint128_t x) {
126   // Murmur-inspired hashing.
127   const uint64_t kMul = 0x9ddfea08eb382d69ULL;
128   uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
129   a ^= (a >> 47);
130   uint64_t b = (Uint128High64(x) ^ a) * kMul;
131   b ^= (b >> 44);
132   b *= kMul;
133   b ^= (b >> 41);
134   b *= kMul;
135   return b;
136 }
137 
138 // This is intended to be a good fingerprinting primitive.
Fingerprint(uint64_t x)139 inline uint64_t Fingerprint(uint64_t x) {
140   // Murmur-inspired hashing.
141   const uint64_t kMul = 0x9ddfea08eb382d69ULL;
142   uint64_t b = x * kMul;
143   b ^= (b >> 44);
144   b *= kMul;
145   b ^= (b >> 41);
146   b *= kMul;
147   return b;
148 }
149 
150 #ifndef FARMHASH_NO_CXX_STRING
151 
152 // Convenience functions to hash or fingerprint C++ strings.
153 // These require that Str::data() return a pointer to the first char
154 // (as a const char*) and that Str::length() return the string's length;
155 // they work with std::string, for example.
156 
157 // Hash function for a byte array.  Most useful in 32-bit binaries.
158 // May change from time to time, may differ on different platforms, may differ
159 // depending on NDEBUG.
160 template <typename Str>
Hash(const Str & s)161 inline size_t Hash(const Str& s) {
162   assert(sizeof(s[0]) == 1);
163   return Hash(s.data(), s.length());
164 }
165 
166 // Hash function for a byte array.  Most useful in 32-bit binaries.
167 // May change from time to time, may differ on different platforms, may differ
168 // depending on NDEBUG.
169 template <typename Str>
Hash32(const Str & s)170 inline uint32_t Hash32(const Str& s) {
171   assert(sizeof(s[0]) == 1);
172   return Hash32(s.data(), s.length());
173 }
174 
175 // Hash function for a byte array.  For convenience, a 32-bit seed is also
176 // hashed into the result.
177 // May change from time to time, may differ on different platforms, may differ
178 // depending on NDEBUG.
179 template <typename Str>
Hash32WithSeed(const Str & s,uint32_t seed)180 inline uint32_t Hash32WithSeed(const Str& s, uint32_t seed) {
181   assert(sizeof(s[0]) == 1);
182   return Hash32WithSeed(s.data(), s.length(), seed);
183 }
184 
185 // Hash 128 input bits down to 64 bits of output.
186 // Hash function for a byte array.
187 // May change from time to time, may differ on different platforms, may differ
188 // depending on NDEBUG.
189 template <typename Str>
Hash64(const Str & s)190 inline uint64_t Hash64(const Str& s) {
191   assert(sizeof(s[0]) == 1);
192   return Hash64(s.data(), s.length());
193 }
194 
195 // Hash function for a byte array.  For convenience, a 64-bit seed is also
196 // hashed into the result.
197 // May change from time to time, may differ on different platforms, may differ
198 // depending on NDEBUG.
199 template <typename Str>
Hash64WithSeed(const Str & s,uint64_t seed)200 inline uint64_t Hash64WithSeed(const Str& s, uint64_t seed) {
201   assert(sizeof(s[0]) == 1);
202   return Hash64WithSeed(s.data(), s.length(), seed);
203 }
204 
205 // Hash function for a byte array.  For convenience, two seeds are also
206 // hashed into the result.
207 // May change from time to time, may differ on different platforms, may differ
208 // depending on NDEBUG.
209 template <typename Str>
Hash64WithSeeds(const Str & s,uint64_t seed0,uint64_t seed1)210 inline uint64_t Hash64WithSeeds(const Str& s, uint64_t seed0, uint64_t seed1) {
211   assert(sizeof(s[0]) == 1);
212   return Hash64WithSeeds(s.data(), s.length(), seed0, seed1);
213 }
214 
215 // Hash function for a byte array.
216 // May change from time to time, may differ on different platforms, may differ
217 // depending on NDEBUG.
218 template <typename Str>
Hash128(const Str & s)219 inline uint128_t Hash128(const Str& s) {
220   assert(sizeof(s[0]) == 1);
221   return Hash128(s.data(), s.length());
222 }
223 
224 // Hash function for a byte array.  For convenience, a 128-bit seed is also
225 // hashed into the result.
226 // May change from time to time, may differ on different platforms, may differ
227 // depending on NDEBUG.
228 template <typename Str>
Hash128WithSeed(const Str & s,uint128_t seed)229 inline uint128_t Hash128WithSeed(const Str& s, uint128_t seed) {
230   assert(sizeof(s[0]) == 1);
231   return Hash128(s.data(), s.length(), seed);
232 }
233 
234 // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
235 
236 // Fingerprint function for a byte array.  Most useful in 32-bit binaries.
237 template <typename Str>
Fingerprint32(const Str & s)238 inline uint32_t Fingerprint32(const Str& s) {
239   assert(sizeof(s[0]) == 1);
240   return Fingerprint32(s.data(), s.length());
241 }
242 
243 // Fingerprint 128 input bits down to 64 bits of output.
244 // Fingerprint function for a byte array.
245 template <typename Str>
Fingerprint64(const Str & s)246 inline uint64_t Fingerprint64(const Str& s) {
247   assert(sizeof(s[0]) == 1);
248   return Fingerprint64(s.data(), s.length());
249 }
250 
251 // Fingerprint function for a byte array.
252 template <typename Str>
Fingerprint128(const Str & s)253 inline uint128_t Fingerprint128(const Str& s) {
254   assert(sizeof(s[0]) == 1);
255   return Fingerprint128(s.data(), s.length());
256 }
257 
258 #endif
259 
260 }  // namespace NAMESPACE_FOR_HASH_FUNCTIONS
261 
262 #endif  // ICING_TEXT_CLASSIFIER_LIB3_UTILS_HASH_FARMHASH_H_
263