1 // Copyright 2018 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <algorithm>
16 #include <cassert>
17 #include <cstddef>
18 #include <cstdint>
19 #include <cstring>
20 #include <string>
21 #include <tuple>
22 #include <type_traits>
23 #include <typeindex>
24 #include <utility>
25 #include <vector>
26
27 #include "absl/base/attributes.h"
28 #include "absl/container/flat_hash_set.h"
29 #include "absl/hash/hash.h"
30 #include "absl/random/random.h"
31 #include "absl/strings/cord.h"
32 #include "absl/strings/cord_test_helpers.h"
33 #include "absl/strings/string_view.h"
34 #include "benchmark/benchmark.h"
35
36 namespace {
37
38 using absl::Hash;
39
40 template <template <typename> class H, typename T>
RunBenchmark(benchmark::State & state,T value)41 void RunBenchmark(benchmark::State& state, T value) {
42 H<T> h;
43 for (auto _ : state) {
44 benchmark::DoNotOptimize(value);
45 benchmark::DoNotOptimize(h(value));
46 }
47 }
48
49 } // namespace
50
51 template <typename T>
52 using AbslHash = absl::Hash<T>;
53
54 class TypeErasedInterface {
55 public:
56 virtual ~TypeErasedInterface() = default;
57
58 template <typename H>
AbslHashValue(H state,const TypeErasedInterface & wrapper)59 friend H AbslHashValue(H state, const TypeErasedInterface& wrapper) {
60 state = H::combine(std::move(state), std::type_index(typeid(wrapper)));
61 wrapper.HashValue(absl::HashState::Create(&state));
62 return state;
63 }
64
65 private:
66 virtual void HashValue(absl::HashState state) const = 0;
67 };
68
69 template <typename T>
70 struct TypeErasedAbslHash {
71 class Wrapper : public TypeErasedInterface {
72 public:
Wrapper(const T & value)73 explicit Wrapper(const T& value) : value_(value) {}
74
75 private:
HashValue(absl::HashState state) const76 void HashValue(absl::HashState state) const override {
77 absl::HashState::combine(std::move(state), value_);
78 }
79
80 const T& value_;
81 };
82
operator ()TypeErasedAbslHash83 size_t operator()(const T& value) {
84 return absl::Hash<Wrapper>{}(Wrapper(value));
85 }
86 };
87
FlatCord(size_t size)88 absl::Cord FlatCord(size_t size) {
89 absl::Cord result(std::string(size, 'a'));
90 result.Flatten();
91 return result;
92 }
93
FragmentedCord(size_t size)94 absl::Cord FragmentedCord(size_t size) {
95 const size_t orig_size = size;
96 std::vector<std::string> chunks;
97 size_t chunk_size = std::max<size_t>(1, size / 10);
98 while (size > chunk_size) {
99 chunks.push_back(std::string(chunk_size, 'a'));
100 size -= chunk_size;
101 }
102 if (size > 0) {
103 chunks.push_back(std::string(size, 'a'));
104 }
105 absl::Cord result = absl::MakeFragmentedCord(chunks);
106 (void) orig_size;
107 assert(result.size() == orig_size);
108 return result;
109 }
110
111 template <typename T>
Vector(size_t count)112 std::vector<T> Vector(size_t count) {
113 std::vector<T> result;
114 for (size_t v = 0; v < count; ++v) {
115 result.push_back(v);
116 }
117 return result;
118 }
119
120 // Bogus type that replicates an unorderd_set's bit mixing, but with
121 // vector-speed iteration. This is intended to measure the overhead of unordered
122 // hashing without counting the speed of unordered_set iteration.
123 template <typename T>
124 struct FastUnorderedSet {
FastUnorderedSetFastUnorderedSet125 explicit FastUnorderedSet(size_t count) {
126 for (size_t v = 0; v < count; ++v) {
127 values.push_back(v);
128 }
129 }
130 std::vector<T> values;
131
132 template <typename H>
AbslHashValue(H h,const FastUnorderedSet & fus)133 friend H AbslHashValue(H h, const FastUnorderedSet& fus) {
134 return H::combine(H::combine_unordered(std::move(h), fus.values.begin(),
135 fus.values.end()),
136 fus.values.size());
137 }
138 };
139
140 template <typename T>
FlatHashSet(size_t count)141 absl::flat_hash_set<T> FlatHashSet(size_t count) {
142 absl::flat_hash_set<T> result;
143 for (size_t v = 0; v < count; ++v) {
144 result.insert(v);
145 }
146 return result;
147 }
148
149 // Generates a benchmark and a codegen method for the provided types. The
150 // codegen method provides a well known entrypoint for dumping assembly.
151 #define MAKE_BENCHMARK(hash, name, ...) \
152 namespace { \
153 void BM_##hash##_##name(benchmark::State& state) { \
154 RunBenchmark<hash>(state, __VA_ARGS__); \
155 } \
156 BENCHMARK(BM_##hash##_##name); \
157 } \
158 size_t Codegen##hash##name(const decltype(__VA_ARGS__)& arg); \
159 size_t Codegen##hash##name(const decltype(__VA_ARGS__)& arg) { \
160 return hash<decltype(__VA_ARGS__)>{}(arg); \
161 } \
162 bool absl_hash_test_odr_use##hash##name = \
163 (benchmark::DoNotOptimize(&Codegen##hash##name), false)
164
165 MAKE_BENCHMARK(AbslHash, Int32, int32_t{});
166 MAKE_BENCHMARK(AbslHash, Int64, int64_t{});
167 MAKE_BENCHMARK(AbslHash, Double, 1.2);
168 MAKE_BENCHMARK(AbslHash, DoubleZero, 0.0);
169 MAKE_BENCHMARK(AbslHash, PairInt32Int32, std::pair<int32_t, int32_t>{});
170 MAKE_BENCHMARK(AbslHash, PairInt64Int64, std::pair<int64_t, int64_t>{});
171 MAKE_BENCHMARK(AbslHash, TupleInt32BoolInt64,
172 std::tuple<int32_t, bool, int64_t>{});
173 MAKE_BENCHMARK(AbslHash, String_0, std::string());
174 MAKE_BENCHMARK(AbslHash, String_10, std::string(10, 'a'));
175 MAKE_BENCHMARK(AbslHash, String_30, std::string(30, 'a'));
176 MAKE_BENCHMARK(AbslHash, String_90, std::string(90, 'a'));
177 MAKE_BENCHMARK(AbslHash, String_200, std::string(200, 'a'));
178 MAKE_BENCHMARK(AbslHash, String_5000, std::string(5000, 'a'));
179 MAKE_BENCHMARK(AbslHash, Cord_Flat_0, absl::Cord());
180 MAKE_BENCHMARK(AbslHash, Cord_Flat_10, FlatCord(10));
181 MAKE_BENCHMARK(AbslHash, Cord_Flat_30, FlatCord(30));
182 MAKE_BENCHMARK(AbslHash, Cord_Flat_90, FlatCord(90));
183 MAKE_BENCHMARK(AbslHash, Cord_Flat_200, FlatCord(200));
184 MAKE_BENCHMARK(AbslHash, Cord_Flat_5000, FlatCord(5000));
185 MAKE_BENCHMARK(AbslHash, Cord_Fragmented_200, FragmentedCord(200));
186 MAKE_BENCHMARK(AbslHash, Cord_Fragmented_5000, FragmentedCord(5000));
187 MAKE_BENCHMARK(AbslHash, VectorInt64_10, Vector<int64_t>(10));
188 MAKE_BENCHMARK(AbslHash, VectorInt64_100, Vector<int64_t>(100));
189 MAKE_BENCHMARK(AbslHash, VectorInt64_1000, Vector<int64_t>(1000));
190 MAKE_BENCHMARK(AbslHash, VectorDouble_10, Vector<double>(10));
191 MAKE_BENCHMARK(AbslHash, VectorDouble_100, Vector<double>(100));
192 MAKE_BENCHMARK(AbslHash, VectorDouble_1000, Vector<double>(1000));
193 MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_10, FlatHashSet<int64_t>(10));
194 MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_100, FlatHashSet<int64_t>(100));
195 MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_1000, FlatHashSet<int64_t>(1000));
196 MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_10, FlatHashSet<double>(10));
197 MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_100, FlatHashSet<double>(100));
198 MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_1000, FlatHashSet<double>(1000));
199 MAKE_BENCHMARK(AbslHash, FastUnorderedSetInt64_1000,
200 FastUnorderedSet<int64_t>(1000));
201 MAKE_BENCHMARK(AbslHash, FastUnorderedSetDouble_1000,
202 FastUnorderedSet<double>(1000));
203 MAKE_BENCHMARK(AbslHash, PairStringString_0,
204 std::make_pair(std::string(), std::string()));
205 MAKE_BENCHMARK(AbslHash, PairStringString_10,
206 std::make_pair(std::string(10, 'a'), std::string(10, 'b')));
207 MAKE_BENCHMARK(AbslHash, PairStringString_30,
208 std::make_pair(std::string(30, 'a'), std::string(30, 'b')));
209 MAKE_BENCHMARK(AbslHash, PairStringString_90,
210 std::make_pair(std::string(90, 'a'), std::string(90, 'b')));
211 MAKE_BENCHMARK(AbslHash, PairStringString_200,
212 std::make_pair(std::string(200, 'a'), std::string(200, 'b')));
213 MAKE_BENCHMARK(AbslHash, PairStringString_5000,
214 std::make_pair(std::string(5000, 'a'), std::string(5000, 'b')));
215
216 MAKE_BENCHMARK(TypeErasedAbslHash, Int32, int32_t{});
217 MAKE_BENCHMARK(TypeErasedAbslHash, Int64, int64_t{});
218 MAKE_BENCHMARK(TypeErasedAbslHash, PairInt32Int32,
219 std::pair<int32_t, int32_t>{});
220 MAKE_BENCHMARK(TypeErasedAbslHash, PairInt64Int64,
221 std::pair<int64_t, int64_t>{});
222 MAKE_BENCHMARK(TypeErasedAbslHash, TupleInt32BoolInt64,
223 std::tuple<int32_t, bool, int64_t>{});
224 MAKE_BENCHMARK(TypeErasedAbslHash, String_0, std::string());
225 MAKE_BENCHMARK(TypeErasedAbslHash, String_10, std::string(10, 'a'));
226 MAKE_BENCHMARK(TypeErasedAbslHash, String_30, std::string(30, 'a'));
227 MAKE_BENCHMARK(TypeErasedAbslHash, String_90, std::string(90, 'a'));
228 MAKE_BENCHMARK(TypeErasedAbslHash, String_200, std::string(200, 'a'));
229 MAKE_BENCHMARK(TypeErasedAbslHash, String_5000, std::string(5000, 'a'));
230 MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_10,
231 std::vector<double>(10, 1.1));
232 MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_100,
233 std::vector<double>(100, 1.1));
234 MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_1000,
235 std::vector<double>(1000, 1.1));
236 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_10,
237 FlatHashSet<int64_t>(10));
238 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_100,
239 FlatHashSet<int64_t>(100));
240 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_1000,
241 FlatHashSet<int64_t>(1000));
242 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_10,
243 FlatHashSet<double>(10));
244 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_100,
245 FlatHashSet<double>(100));
246 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_1000,
247 FlatHashSet<double>(1000));
248 MAKE_BENCHMARK(TypeErasedAbslHash, FastUnorderedSetInt64_1000,
249 FastUnorderedSet<int64_t>(1000));
250 MAKE_BENCHMARK(TypeErasedAbslHash, FastUnorderedSetDouble_1000,
251 FastUnorderedSet<double>(1000));
252
253 // The latency benchmark attempts to model the speed of the hash function in
254 // production. When a hash function is used for hashtable lookups it is rarely
255 // used to hash N items in a tight loop nor on constant sized strings. Instead,
256 // after hashing there is a potential equality test plus a (usually) large
257 // amount of user code. To simulate this effectively we introduce a data
258 // dependency between elements we hash by using the hash of the Nth element as
259 // the selector of the N+1th element to hash. This isolates the hash function
260 // code much like in production. As a bonus we use the hash to generate strings
261 // of size [1,N] (instead of fixed N) to disable perfect branch predictions in
262 // hash function implementations.
263 namespace {
264 // 16kb fits in L1 cache of most CPUs we care about. Keeping memory latency low
265 // will allow us to attribute most time to CPU which means more accurate
266 // measurements.
267 static constexpr size_t kEntropySize = 16 << 10;
268 static char entropy[kEntropySize + 1024];
__anone9a4b86f0302null269 ABSL_ATTRIBUTE_UNUSED static const bool kInitialized = [] {
270 absl::BitGen gen;
271 static_assert(sizeof(entropy) % sizeof(uint64_t) == 0, "");
272 for (int i = 0; i != sizeof(entropy); i += sizeof(uint64_t)) {
273 auto rand = absl::Uniform<uint64_t>(gen);
274 memcpy(&entropy[i], &rand, sizeof(uint64_t));
275 }
276 return true;
277 }();
278 } // namespace
279
280 template <class T>
281 struct PodRand {
282 static_assert(std::is_pod<T>::value, "");
283 static_assert(kEntropySize + sizeof(T) < sizeof(entropy), "");
284
GetPodRand285 T Get(size_t i) const {
286 T v;
287 memcpy(&v, &entropy[i % kEntropySize], sizeof(T));
288 return v;
289 }
290 };
291
292 template <size_t N>
293 struct StringRand {
294 static_assert(kEntropySize + N < sizeof(entropy), "");
295
GetStringRand296 absl::string_view Get(size_t i) const {
297 // This has a small bias towards small numbers. Because max N is ~200 this
298 // is very small and prefer to be very fast instead of absolutely accurate.
299 // Also we pass N = 2^K+1 so that mod reduces to a bitand.
300 size_t s = (i % (N - 1)) + 1;
301 return {&entropy[i % kEntropySize], s};
302 }
303 };
304
305 #define MAKE_LATENCY_BENCHMARK(hash, name, ...) \
306 namespace { \
307 void BM_latency_##hash##_##name(benchmark::State& state) { \
308 __VA_ARGS__ r; \
309 hash<decltype(r.Get(0))> h; \
310 size_t i = 871401241; \
311 for (auto _ : state) { \
312 benchmark::DoNotOptimize(i = h(r.Get(i))); \
313 } \
314 } \
315 BENCHMARK(BM_latency_##hash##_##name); \
316 } // namespace
317
318 MAKE_LATENCY_BENCHMARK(AbslHash, Int32, PodRand<int32_t>)
319 MAKE_LATENCY_BENCHMARK(AbslHash, Int64, PodRand<int64_t>)
320 MAKE_LATENCY_BENCHMARK(AbslHash, String9, StringRand<9>)
321 MAKE_LATENCY_BENCHMARK(AbslHash, String33, StringRand<33>)
322 MAKE_LATENCY_BENCHMARK(AbslHash, String65, StringRand<65>)
323 MAKE_LATENCY_BENCHMARK(AbslHash, String257, StringRand<257>)
324