xref: /aosp_15_r20/external/cronet/third_party/abseil-cpp/absl/hash/hash_benchmark.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2018 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <algorithm>
16 #include <cassert>
17 #include <cstddef>
18 #include <cstdint>
19 #include <cstring>
20 #include <string>
21 #include <tuple>
22 #include <type_traits>
23 #include <typeindex>
24 #include <utility>
25 #include <vector>
26 
27 #include "absl/base/attributes.h"
28 #include "absl/container/flat_hash_set.h"
29 #include "absl/hash/hash.h"
30 #include "absl/random/random.h"
31 #include "absl/strings/cord.h"
32 #include "absl/strings/cord_test_helpers.h"
33 #include "absl/strings/string_view.h"
34 #include "benchmark/benchmark.h"
35 
36 namespace {
37 
38 using absl::Hash;
39 
40 template <template <typename> class H, typename T>
RunBenchmark(benchmark::State & state,T value)41 void RunBenchmark(benchmark::State& state, T value) {
42   H<T> h;
43   for (auto _ : state) {
44     benchmark::DoNotOptimize(value);
45     benchmark::DoNotOptimize(h(value));
46   }
47 }
48 
49 }  // namespace
50 
51 template <typename T>
52 using AbslHash = absl::Hash<T>;
53 
54 class TypeErasedInterface {
55  public:
56   virtual ~TypeErasedInterface() = default;
57 
58   template <typename H>
AbslHashValue(H state,const TypeErasedInterface & wrapper)59   friend H AbslHashValue(H state, const TypeErasedInterface& wrapper) {
60     state = H::combine(std::move(state), std::type_index(typeid(wrapper)));
61     wrapper.HashValue(absl::HashState::Create(&state));
62     return state;
63   }
64 
65  private:
66   virtual void HashValue(absl::HashState state) const = 0;
67 };
68 
69 template <typename T>
70 struct TypeErasedAbslHash {
71   class Wrapper : public TypeErasedInterface {
72    public:
Wrapper(const T & value)73     explicit Wrapper(const T& value) : value_(value) {}
74 
75    private:
HashValue(absl::HashState state) const76     void HashValue(absl::HashState state) const override {
77       absl::HashState::combine(std::move(state), value_);
78     }
79 
80     const T& value_;
81   };
82 
operator ()TypeErasedAbslHash83   size_t operator()(const T& value) {
84     return absl::Hash<Wrapper>{}(Wrapper(value));
85   }
86 };
87 
FlatCord(size_t size)88 absl::Cord FlatCord(size_t size) {
89   absl::Cord result(std::string(size, 'a'));
90   result.Flatten();
91   return result;
92 }
93 
FragmentedCord(size_t size)94 absl::Cord FragmentedCord(size_t size) {
95   const size_t orig_size = size;
96   std::vector<std::string> chunks;
97   size_t chunk_size = std::max<size_t>(1, size / 10);
98   while (size > chunk_size) {
99     chunks.push_back(std::string(chunk_size, 'a'));
100     size -= chunk_size;
101   }
102   if (size > 0) {
103     chunks.push_back(std::string(size, 'a'));
104   }
105   absl::Cord result = absl::MakeFragmentedCord(chunks);
106   (void) orig_size;
107   assert(result.size() == orig_size);
108   return result;
109 }
110 
111 template <typename T>
Vector(size_t count)112 std::vector<T> Vector(size_t count) {
113   std::vector<T> result;
114   for (size_t v = 0; v < count; ++v) {
115     result.push_back(v);
116   }
117   return result;
118 }
119 
120 // Bogus type that replicates an unorderd_set's bit mixing, but with
121 // vector-speed iteration. This is intended to measure the overhead of unordered
122 // hashing without counting the speed of unordered_set iteration.
123 template <typename T>
124 struct FastUnorderedSet {
FastUnorderedSetFastUnorderedSet125   explicit FastUnorderedSet(size_t count) {
126     for (size_t v = 0; v < count; ++v) {
127       values.push_back(v);
128     }
129   }
130   std::vector<T> values;
131 
132   template <typename H>
AbslHashValue(H h,const FastUnorderedSet & fus)133   friend H AbslHashValue(H h, const FastUnorderedSet& fus) {
134     return H::combine(H::combine_unordered(std::move(h), fus.values.begin(),
135                                            fus.values.end()),
136                       fus.values.size());
137   }
138 };
139 
140 template <typename T>
FlatHashSet(size_t count)141 absl::flat_hash_set<T> FlatHashSet(size_t count) {
142   absl::flat_hash_set<T> result;
143   for (size_t v = 0; v < count; ++v) {
144     result.insert(v);
145   }
146   return result;
147 }
148 
149 // Generates a benchmark and a codegen method for the provided types.  The
150 // codegen method provides a well known entrypoint for dumping assembly.
151 #define MAKE_BENCHMARK(hash, name, ...)                          \
152   namespace {                                                    \
153   void BM_##hash##_##name(benchmark::State& state) {             \
154     RunBenchmark<hash>(state, __VA_ARGS__);                      \
155   }                                                              \
156   BENCHMARK(BM_##hash##_##name);                                 \
157   }                                                              \
158   size_t Codegen##hash##name(const decltype(__VA_ARGS__)& arg);  \
159   size_t Codegen##hash##name(const decltype(__VA_ARGS__)& arg) { \
160     return hash<decltype(__VA_ARGS__)>{}(arg);                   \
161   }                                                              \
162   bool absl_hash_test_odr_use##hash##name =                      \
163       (benchmark::DoNotOptimize(&Codegen##hash##name), false)
164 
165 MAKE_BENCHMARK(AbslHash, Int32, int32_t{});
166 MAKE_BENCHMARK(AbslHash, Int64, int64_t{});
167 MAKE_BENCHMARK(AbslHash, Double, 1.2);
168 MAKE_BENCHMARK(AbslHash, DoubleZero, 0.0);
169 MAKE_BENCHMARK(AbslHash, PairInt32Int32, std::pair<int32_t, int32_t>{});
170 MAKE_BENCHMARK(AbslHash, PairInt64Int64, std::pair<int64_t, int64_t>{});
171 MAKE_BENCHMARK(AbslHash, TupleInt32BoolInt64,
172                std::tuple<int32_t, bool, int64_t>{});
173 MAKE_BENCHMARK(AbslHash, String_0, std::string());
174 MAKE_BENCHMARK(AbslHash, String_10, std::string(10, 'a'));
175 MAKE_BENCHMARK(AbslHash, String_30, std::string(30, 'a'));
176 MAKE_BENCHMARK(AbslHash, String_90, std::string(90, 'a'));
177 MAKE_BENCHMARK(AbslHash, String_200, std::string(200, 'a'));
178 MAKE_BENCHMARK(AbslHash, String_5000, std::string(5000, 'a'));
179 MAKE_BENCHMARK(AbslHash, Cord_Flat_0, absl::Cord());
180 MAKE_BENCHMARK(AbslHash, Cord_Flat_10, FlatCord(10));
181 MAKE_BENCHMARK(AbslHash, Cord_Flat_30, FlatCord(30));
182 MAKE_BENCHMARK(AbslHash, Cord_Flat_90, FlatCord(90));
183 MAKE_BENCHMARK(AbslHash, Cord_Flat_200, FlatCord(200));
184 MAKE_BENCHMARK(AbslHash, Cord_Flat_5000, FlatCord(5000));
185 MAKE_BENCHMARK(AbslHash, Cord_Fragmented_200, FragmentedCord(200));
186 MAKE_BENCHMARK(AbslHash, Cord_Fragmented_5000, FragmentedCord(5000));
187 MAKE_BENCHMARK(AbslHash, VectorInt64_10, Vector<int64_t>(10));
188 MAKE_BENCHMARK(AbslHash, VectorInt64_100, Vector<int64_t>(100));
189 MAKE_BENCHMARK(AbslHash, VectorInt64_1000, Vector<int64_t>(1000));
190 MAKE_BENCHMARK(AbslHash, VectorDouble_10, Vector<double>(10));
191 MAKE_BENCHMARK(AbslHash, VectorDouble_100, Vector<double>(100));
192 MAKE_BENCHMARK(AbslHash, VectorDouble_1000, Vector<double>(1000));
193 MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_10, FlatHashSet<int64_t>(10));
194 MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_100, FlatHashSet<int64_t>(100));
195 MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_1000, FlatHashSet<int64_t>(1000));
196 MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_10, FlatHashSet<double>(10));
197 MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_100, FlatHashSet<double>(100));
198 MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_1000, FlatHashSet<double>(1000));
199 MAKE_BENCHMARK(AbslHash, FastUnorderedSetInt64_1000,
200                FastUnorderedSet<int64_t>(1000));
201 MAKE_BENCHMARK(AbslHash, FastUnorderedSetDouble_1000,
202                FastUnorderedSet<double>(1000));
203 MAKE_BENCHMARK(AbslHash, PairStringString_0,
204                std::make_pair(std::string(), std::string()));
205 MAKE_BENCHMARK(AbslHash, PairStringString_10,
206                std::make_pair(std::string(10, 'a'), std::string(10, 'b')));
207 MAKE_BENCHMARK(AbslHash, PairStringString_30,
208                std::make_pair(std::string(30, 'a'), std::string(30, 'b')));
209 MAKE_BENCHMARK(AbslHash, PairStringString_90,
210                std::make_pair(std::string(90, 'a'), std::string(90, 'b')));
211 MAKE_BENCHMARK(AbslHash, PairStringString_200,
212                std::make_pair(std::string(200, 'a'), std::string(200, 'b')));
213 MAKE_BENCHMARK(AbslHash, PairStringString_5000,
214                std::make_pair(std::string(5000, 'a'), std::string(5000, 'b')));
215 
216 MAKE_BENCHMARK(TypeErasedAbslHash, Int32, int32_t{});
217 MAKE_BENCHMARK(TypeErasedAbslHash, Int64, int64_t{});
218 MAKE_BENCHMARK(TypeErasedAbslHash, PairInt32Int32,
219                std::pair<int32_t, int32_t>{});
220 MAKE_BENCHMARK(TypeErasedAbslHash, PairInt64Int64,
221                std::pair<int64_t, int64_t>{});
222 MAKE_BENCHMARK(TypeErasedAbslHash, TupleInt32BoolInt64,
223                std::tuple<int32_t, bool, int64_t>{});
224 MAKE_BENCHMARK(TypeErasedAbslHash, String_0, std::string());
225 MAKE_BENCHMARK(TypeErasedAbslHash, String_10, std::string(10, 'a'));
226 MAKE_BENCHMARK(TypeErasedAbslHash, String_30, std::string(30, 'a'));
227 MAKE_BENCHMARK(TypeErasedAbslHash, String_90, std::string(90, 'a'));
228 MAKE_BENCHMARK(TypeErasedAbslHash, String_200, std::string(200, 'a'));
229 MAKE_BENCHMARK(TypeErasedAbslHash, String_5000, std::string(5000, 'a'));
230 MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_10,
231                std::vector<double>(10, 1.1));
232 MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_100,
233                std::vector<double>(100, 1.1));
234 MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_1000,
235                std::vector<double>(1000, 1.1));
236 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_10,
237                FlatHashSet<int64_t>(10));
238 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_100,
239                FlatHashSet<int64_t>(100));
240 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_1000,
241                FlatHashSet<int64_t>(1000));
242 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_10,
243                FlatHashSet<double>(10));
244 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_100,
245                FlatHashSet<double>(100));
246 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_1000,
247                FlatHashSet<double>(1000));
248 MAKE_BENCHMARK(TypeErasedAbslHash, FastUnorderedSetInt64_1000,
249                FastUnorderedSet<int64_t>(1000));
250 MAKE_BENCHMARK(TypeErasedAbslHash, FastUnorderedSetDouble_1000,
251                FastUnorderedSet<double>(1000));
252 
253 // The latency benchmark attempts to model the speed of the hash function in
254 // production. When a hash function is used for hashtable lookups it is rarely
255 // used to hash N items in a tight loop nor on constant sized strings. Instead,
256 // after hashing there is a potential equality test plus a (usually) large
257 // amount of user code. To simulate this effectively we introduce a data
258 // dependency between elements we hash by using the hash of the Nth element as
259 // the selector of the N+1th element to hash. This isolates the hash function
260 // code much like in production. As a bonus we use the hash to generate strings
261 // of size [1,N] (instead of fixed N) to disable perfect branch predictions in
262 // hash function implementations.
263 namespace {
264 // 16kb fits in L1 cache of most CPUs we care about. Keeping memory latency low
265 // will allow us to attribute most time to CPU which means more accurate
266 // measurements.
267 static constexpr size_t kEntropySize = 16 << 10;
268 static char entropy[kEntropySize + 1024];
__anone9a4b86f0302null269 ABSL_ATTRIBUTE_UNUSED static const bool kInitialized = [] {
270   absl::BitGen gen;
271   static_assert(sizeof(entropy) % sizeof(uint64_t) == 0, "");
272   for (int i = 0; i != sizeof(entropy); i += sizeof(uint64_t)) {
273     auto rand = absl::Uniform<uint64_t>(gen);
274     memcpy(&entropy[i], &rand, sizeof(uint64_t));
275   }
276   return true;
277 }();
278 }  // namespace
279 
280 template <class T>
281 struct PodRand {
282   static_assert(std::is_pod<T>::value, "");
283   static_assert(kEntropySize + sizeof(T) < sizeof(entropy), "");
284 
GetPodRand285   T Get(size_t i) const {
286     T v;
287     memcpy(&v, &entropy[i % kEntropySize], sizeof(T));
288     return v;
289   }
290 };
291 
292 template <size_t N>
293 struct StringRand {
294   static_assert(kEntropySize + N < sizeof(entropy), "");
295 
GetStringRand296   absl::string_view Get(size_t i) const {
297     // This has a small bias towards small numbers. Because max N is ~200 this
298     // is very small and prefer to be very fast instead of absolutely accurate.
299     // Also we pass N = 2^K+1 so that mod reduces to a bitand.
300     size_t s = (i % (N - 1)) + 1;
301     return {&entropy[i % kEntropySize], s};
302   }
303 };
304 
305 #define MAKE_LATENCY_BENCHMARK(hash, name, ...)              \
306   namespace {                                                \
307   void BM_latency_##hash##_##name(benchmark::State& state) { \
308     __VA_ARGS__ r;                                           \
309     hash<decltype(r.Get(0))> h;                              \
310     size_t i = 871401241;                                    \
311     for (auto _ : state) {                                   \
312       benchmark::DoNotOptimize(i = h(r.Get(i)));             \
313     }                                                        \
314   }                                                          \
315   BENCHMARK(BM_latency_##hash##_##name);                     \
316   }  // namespace
317 
318 MAKE_LATENCY_BENCHMARK(AbslHash, Int32, PodRand<int32_t>)
319 MAKE_LATENCY_BENCHMARK(AbslHash, Int64, PodRand<int64_t>)
320 MAKE_LATENCY_BENCHMARK(AbslHash, String9, StringRand<9>)
321 MAKE_LATENCY_BENCHMARK(AbslHash, String33, StringRand<33>)
322 MAKE_LATENCY_BENCHMARK(AbslHash, String65, StringRand<65>)
323 MAKE_LATENCY_BENCHMARK(AbslHash, String257, StringRand<257>)
324