1 // Copyright 2018 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <array>
16 #include <cmath>
17 #include <numeric>
18 #include <random>
19 #include <tuple>
20 #include <utility>
21 #include <vector>
22
23 #include "absl/base/internal/raw_logging.h"
24 #include "absl/container/internal/hash_function_defaults.h"
25 #include "absl/container/internal/raw_hash_set.h"
26 #include "absl/strings/str_format.h"
27 #include "benchmark/benchmark.h"
28
29 namespace absl {
30 ABSL_NAMESPACE_BEGIN
31 namespace container_internal {
32
33 struct RawHashSetTestOnlyAccess {
34 template <typename C>
GetSlotsabsl::container_internal::RawHashSetTestOnlyAccess35 static auto GetSlots(const C& c) -> decltype(c.slots_) {
36 return c.slots_;
37 }
38 };
39
40 namespace {
41
42 struct IntPolicy {
43 using slot_type = int64_t;
44 using key_type = int64_t;
45 using init_type = int64_t;
46
constructabsl::container_internal::__anon18b301a00111::IntPolicy47 static void construct(void*, int64_t* slot, int64_t v) { *slot = v; }
destroyabsl::container_internal::__anon18b301a00111::IntPolicy48 static void destroy(void*, int64_t*) {}
transferabsl::container_internal::__anon18b301a00111::IntPolicy49 static void transfer(void*, int64_t* new_slot, int64_t* old_slot) {
50 *new_slot = *old_slot;
51 }
52
elementabsl::container_internal::__anon18b301a00111::IntPolicy53 static int64_t& element(slot_type* slot) { return *slot; }
54
55 template <class F>
applyabsl::container_internal::__anon18b301a00111::IntPolicy56 static auto apply(F&& f, int64_t x) -> decltype(std::forward<F>(f)(x, x)) {
57 return std::forward<F>(f)(x, x);
58 }
59 };
60
61 class StringPolicy {
62 template <class F, class K, class V,
63 class = typename std::enable_if<
64 std::is_convertible<const K&, absl::string_view>::value>::type>
65 decltype(std::declval<F>()(
66 std::declval<const absl::string_view&>(), std::piecewise_construct,
67 std::declval<std::tuple<K>>(),
apply_impl(F && f,std::pair<std::tuple<K>,V> p)68 std::declval<V>())) static apply_impl(F&& f,
69 std::pair<std::tuple<K>, V> p) {
70 const absl::string_view& key = std::get<0>(p.first);
71 return std::forward<F>(f)(key, std::piecewise_construct, std::move(p.first),
72 std::move(p.second));
73 }
74
75 public:
76 struct slot_type {
77 struct ctor {};
78
79 template <class... Ts>
slot_typeabsl::container_internal::__anon18b301a00111::StringPolicy::slot_type80 slot_type(ctor, Ts&&... ts) : pair(std::forward<Ts>(ts)...) {}
81
82 std::pair<std::string, std::string> pair;
83 };
84
85 using key_type = std::string;
86 using init_type = std::pair<std::string, std::string>;
87
88 template <class allocator_type, class... Args>
construct(allocator_type * alloc,slot_type * slot,Args...args)89 static void construct(allocator_type* alloc, slot_type* slot, Args... args) {
90 std::allocator_traits<allocator_type>::construct(
91 *alloc, slot, typename slot_type::ctor(), std::forward<Args>(args)...);
92 }
93
94 template <class allocator_type>
destroy(allocator_type * alloc,slot_type * slot)95 static void destroy(allocator_type* alloc, slot_type* slot) {
96 std::allocator_traits<allocator_type>::destroy(*alloc, slot);
97 }
98
99 template <class allocator_type>
transfer(allocator_type * alloc,slot_type * new_slot,slot_type * old_slot)100 static void transfer(allocator_type* alloc, slot_type* new_slot,
101 slot_type* old_slot) {
102 construct(alloc, new_slot, std::move(old_slot->pair));
103 destroy(alloc, old_slot);
104 }
105
element(slot_type * slot)106 static std::pair<std::string, std::string>& element(slot_type* slot) {
107 return slot->pair;
108 }
109
110 template <class F, class... Args>
apply(F && f,Args &&...args)111 static auto apply(F&& f, Args&&... args)
112 -> decltype(apply_impl(std::forward<F>(f),
113 PairArgs(std::forward<Args>(args)...))) {
114 return apply_impl(std::forward<F>(f),
115 PairArgs(std::forward<Args>(args)...));
116 }
117 };
118
119 struct StringHash : container_internal::hash_default_hash<absl::string_view> {
120 using is_transparent = void;
121 };
122 struct StringEq : std::equal_to<absl::string_view> {
123 using is_transparent = void;
124 };
125
126 struct StringTable
127 : raw_hash_set<StringPolicy, StringHash, StringEq, std::allocator<int>> {
128 using Base = typename StringTable::raw_hash_set;
StringTableabsl::container_internal::__anon18b301a00111::StringTable129 StringTable() {}
130 using Base::Base;
131 };
132
133 struct IntTable
134 : raw_hash_set<IntPolicy, container_internal::hash_default_hash<int64_t>,
135 std::equal_to<int64_t>, std::allocator<int64_t>> {
136 using Base = typename IntTable::raw_hash_set;
IntTableabsl::container_internal::__anon18b301a00111::IntTable137 IntTable() {}
138 using Base::Base;
139 };
140
141 struct string_generator {
142 template <class RNG>
operator ()absl::container_internal::__anon18b301a00111::string_generator143 std::string operator()(RNG& rng) const {
144 std::string res;
145 res.resize(12);
146 std::uniform_int_distribution<uint32_t> printable_ascii(0x20, 0x7E);
147 std::generate(res.begin(), res.end(), [&] { return printable_ascii(rng); });
148 return res;
149 }
150
151 size_t size;
152 };
153
154 // Model a cache in steady state.
155 //
156 // On a table of size N, keep deleting the LRU entry and add a random one.
BM_CacheInSteadyState(benchmark::State & state)157 void BM_CacheInSteadyState(benchmark::State& state) {
158 std::random_device rd;
159 std::mt19937 rng(rd());
160 string_generator gen{12};
161 StringTable t;
162 std::deque<std::string> keys;
163 while (t.size() < state.range(0)) {
164 auto x = t.emplace(gen(rng), gen(rng));
165 if (x.second) keys.push_back(x.first->first);
166 }
167 ABSL_RAW_CHECK(state.range(0) >= 10, "");
168 while (state.KeepRunning()) {
169 // Some cache hits.
170 std::deque<std::string>::const_iterator it;
171 for (int i = 0; i != 90; ++i) {
172 if (i % 10 == 0) it = keys.end();
173 ::benchmark::DoNotOptimize(t.find(*--it));
174 }
175 // Some cache misses.
176 for (int i = 0; i != 10; ++i) ::benchmark::DoNotOptimize(t.find(gen(rng)));
177 ABSL_RAW_CHECK(t.erase(keys.front()), keys.front().c_str());
178 keys.pop_front();
179 while (true) {
180 auto x = t.emplace(gen(rng), gen(rng));
181 if (x.second) {
182 keys.push_back(x.first->first);
183 break;
184 }
185 }
186 }
187 state.SetItemsProcessed(state.iterations());
188 state.SetLabel(absl::StrFormat("load_factor=%.2f", t.load_factor()));
189 }
190
191 template <typename Benchmark>
CacheInSteadyStateArgs(Benchmark * bm)192 void CacheInSteadyStateArgs(Benchmark* bm) {
193 // The default.
194 const float max_load_factor = 0.875;
195 // When the cache is at the steady state, the probe sequence will equal
196 // capacity if there is no reclamation of deleted slots. Pick a number large
197 // enough to make the benchmark slow for that case.
198 const size_t capacity = 1 << 10;
199
200 // Check N data points to cover load factors in [0.4, 0.8).
201 const size_t kNumPoints = 10;
202 for (size_t i = 0; i != kNumPoints; ++i)
203 bm->Arg(std::ceil(
204 capacity * (max_load_factor + i * max_load_factor / kNumPoints) / 2));
205 }
206 BENCHMARK(BM_CacheInSteadyState)->Apply(CacheInSteadyStateArgs);
207
BM_EndComparison(benchmark::State & state)208 void BM_EndComparison(benchmark::State& state) {
209 StringTable t = {{"a", "a"}, {"b", "b"}};
210 auto it = t.begin();
211 for (auto i : state) {
212 benchmark::DoNotOptimize(t);
213 benchmark::DoNotOptimize(it);
214 benchmark::DoNotOptimize(it != t.end());
215 }
216 }
217 BENCHMARK(BM_EndComparison);
218
BM_Iteration(benchmark::State & state)219 void BM_Iteration(benchmark::State& state) {
220 std::random_device rd;
221 std::mt19937 rng(rd());
222 string_generator gen{12};
223 StringTable t;
224
225 size_t capacity = state.range(0);
226 size_t size = state.range(1);
227 t.reserve(capacity);
228
229 while (t.size() < size) {
230 t.emplace(gen(rng), gen(rng));
231 }
232
233 for (auto i : state) {
234 benchmark::DoNotOptimize(t);
235 for (auto it = t.begin(); it != t.end(); ++it) {
236 benchmark::DoNotOptimize(*it);
237 }
238 }
239 }
240
241 BENCHMARK(BM_Iteration)
242 ->ArgPair(1, 1)
243 ->ArgPair(2, 2)
244 ->ArgPair(4, 4)
245 ->ArgPair(7, 7)
246 ->ArgPair(10, 10)
247 ->ArgPair(15, 15)
248 ->ArgPair(16, 16)
249 ->ArgPair(54, 54)
250 ->ArgPair(100, 100)
251 ->ArgPair(400, 400)
252 // empty
253 ->ArgPair(0, 0)
254 ->ArgPair(10, 0)
255 ->ArgPair(100, 0)
256 ->ArgPair(1000, 0)
257 ->ArgPair(10000, 0)
258 // sparse
259 ->ArgPair(100, 1)
260 ->ArgPair(1000, 10);
261
BM_CopyCtorSparseInt(benchmark::State & state)262 void BM_CopyCtorSparseInt(benchmark::State& state) {
263 std::random_device rd;
264 std::mt19937 rng(rd());
265 IntTable t;
266 std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
267
268 size_t size = state.range(0);
269 t.reserve(size * 10);
270 while (t.size() < size) {
271 t.emplace(dist(rng));
272 }
273
274 for (auto i : state) {
275 IntTable t2 = t;
276 benchmark::DoNotOptimize(t2);
277 }
278 }
279 BENCHMARK(BM_CopyCtorSparseInt)->Range(128, 4096);
280
BM_CopyCtorInt(benchmark::State & state)281 void BM_CopyCtorInt(benchmark::State& state) {
282 std::random_device rd;
283 std::mt19937 rng(rd());
284 IntTable t;
285 std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
286
287 size_t size = state.range(0);
288 while (t.size() < size) {
289 t.emplace(dist(rng));
290 }
291
292 for (auto i : state) {
293 IntTable t2 = t;
294 benchmark::DoNotOptimize(t2);
295 }
296 }
297 BENCHMARK(BM_CopyCtorInt)->Range(128, 4096);
298
BM_CopyCtorString(benchmark::State & state)299 void BM_CopyCtorString(benchmark::State& state) {
300 std::random_device rd;
301 std::mt19937 rng(rd());
302 StringTable t;
303 std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
304
305 size_t size = state.range(0);
306 while (t.size() < size) {
307 t.emplace(std::to_string(dist(rng)), std::to_string(dist(rng)));
308 }
309
310 for (auto i : state) {
311 StringTable t2 = t;
312 benchmark::DoNotOptimize(t2);
313 }
314 }
315 BENCHMARK(BM_CopyCtorString)->Range(128, 4096);
316
BM_CopyAssign(benchmark::State & state)317 void BM_CopyAssign(benchmark::State& state) {
318 std::random_device rd;
319 std::mt19937 rng(rd());
320 IntTable t;
321 std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
322 while (t.size() < state.range(0)) {
323 t.emplace(dist(rng));
324 }
325
326 IntTable t2;
327 for (auto _ : state) {
328 t2 = t;
329 benchmark::DoNotOptimize(t2);
330 }
331 }
332 BENCHMARK(BM_CopyAssign)->Range(128, 4096);
333
BM_RangeCtor(benchmark::State & state)334 void BM_RangeCtor(benchmark::State& state) {
335 std::random_device rd;
336 std::mt19937 rng(rd());
337 std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
338 std::vector<int> values;
339 const size_t desired_size = state.range(0);
340 while (values.size() < desired_size) {
341 values.emplace_back(dist(rng));
342 }
343
344 for (auto unused : state) {
345 IntTable t{values.begin(), values.end()};
346 benchmark::DoNotOptimize(t);
347 }
348 }
349 BENCHMARK(BM_RangeCtor)->Range(128, 65536);
350
BM_NoOpReserveIntTable(benchmark::State & state)351 void BM_NoOpReserveIntTable(benchmark::State& state) {
352 IntTable t;
353 t.reserve(100000);
354 for (auto _ : state) {
355 benchmark::DoNotOptimize(t);
356 t.reserve(100000);
357 }
358 }
359 BENCHMARK(BM_NoOpReserveIntTable);
360
BM_NoOpReserveStringTable(benchmark::State & state)361 void BM_NoOpReserveStringTable(benchmark::State& state) {
362 StringTable t;
363 t.reserve(100000);
364 for (auto _ : state) {
365 benchmark::DoNotOptimize(t);
366 t.reserve(100000);
367 }
368 }
369 BENCHMARK(BM_NoOpReserveStringTable);
370
BM_ReserveIntTable(benchmark::State & state)371 void BM_ReserveIntTable(benchmark::State& state) {
372 int reserve_size = state.range(0);
373 for (auto _ : state) {
374 state.PauseTiming();
375 IntTable t;
376 state.ResumeTiming();
377 benchmark::DoNotOptimize(t);
378 t.reserve(reserve_size);
379 }
380 }
381 BENCHMARK(BM_ReserveIntTable)->Range(128, 4096);
382
BM_ReserveStringTable(benchmark::State & state)383 void BM_ReserveStringTable(benchmark::State& state) {
384 int reserve_size = state.range(0);
385 for (auto _ : state) {
386 state.PauseTiming();
387 StringTable t;
388 state.ResumeTiming();
389 benchmark::DoNotOptimize(t);
390 t.reserve(reserve_size);
391 }
392 }
393 BENCHMARK(BM_ReserveStringTable)->Range(128, 4096);
394
395 // Like std::iota, except that ctrl_t doesn't support operator++.
396 template <typename CtrlIter>
Iota(CtrlIter begin,CtrlIter end,int value)397 void Iota(CtrlIter begin, CtrlIter end, int value) {
398 for (; begin != end; ++begin, ++value) {
399 *begin = static_cast<ctrl_t>(value);
400 }
401 }
402
BM_Group_Match(benchmark::State & state)403 void BM_Group_Match(benchmark::State& state) {
404 std::array<ctrl_t, Group::kWidth> group;
405 Iota(group.begin(), group.end(), -4);
406 Group g{group.data()};
407 h2_t h = 1;
408 for (auto _ : state) {
409 ::benchmark::DoNotOptimize(h);
410 ::benchmark::DoNotOptimize(g);
411 ::benchmark::DoNotOptimize(g.Match(h));
412 }
413 }
414 BENCHMARK(BM_Group_Match);
415
BM_Group_MaskEmpty(benchmark::State & state)416 void BM_Group_MaskEmpty(benchmark::State& state) {
417 std::array<ctrl_t, Group::kWidth> group;
418 Iota(group.begin(), group.end(), -4);
419 Group g{group.data()};
420 for (auto _ : state) {
421 ::benchmark::DoNotOptimize(g);
422 ::benchmark::DoNotOptimize(g.MaskEmpty());
423 }
424 }
425 BENCHMARK(BM_Group_MaskEmpty);
426
BM_Group_MaskEmptyOrDeleted(benchmark::State & state)427 void BM_Group_MaskEmptyOrDeleted(benchmark::State& state) {
428 std::array<ctrl_t, Group::kWidth> group;
429 Iota(group.begin(), group.end(), -4);
430 Group g{group.data()};
431 for (auto _ : state) {
432 ::benchmark::DoNotOptimize(g);
433 ::benchmark::DoNotOptimize(g.MaskEmptyOrDeleted());
434 }
435 }
436 BENCHMARK(BM_Group_MaskEmptyOrDeleted);
437
BM_Group_CountLeadingEmptyOrDeleted(benchmark::State & state)438 void BM_Group_CountLeadingEmptyOrDeleted(benchmark::State& state) {
439 std::array<ctrl_t, Group::kWidth> group;
440 Iota(group.begin(), group.end(), -2);
441 Group g{group.data()};
442 for (auto _ : state) {
443 ::benchmark::DoNotOptimize(g);
444 ::benchmark::DoNotOptimize(g.CountLeadingEmptyOrDeleted());
445 }
446 }
447 BENCHMARK(BM_Group_CountLeadingEmptyOrDeleted);
448
BM_Group_MatchFirstEmptyOrDeleted(benchmark::State & state)449 void BM_Group_MatchFirstEmptyOrDeleted(benchmark::State& state) {
450 std::array<ctrl_t, Group::kWidth> group;
451 Iota(group.begin(), group.end(), -2);
452 Group g{group.data()};
453 for (auto _ : state) {
454 ::benchmark::DoNotOptimize(g);
455 ::benchmark::DoNotOptimize(g.MaskEmptyOrDeleted().LowestBitSet());
456 }
457 }
458 BENCHMARK(BM_Group_MatchFirstEmptyOrDeleted);
459
BM_DropDeletes(benchmark::State & state)460 void BM_DropDeletes(benchmark::State& state) {
461 constexpr size_t capacity = (1 << 20) - 1;
462 std::vector<ctrl_t> ctrl(capacity + 1 + Group::kWidth);
463 ctrl[capacity] = ctrl_t::kSentinel;
464 std::vector<ctrl_t> pattern = {ctrl_t::kEmpty, static_cast<ctrl_t>(2),
465 ctrl_t::kDeleted, static_cast<ctrl_t>(2),
466 ctrl_t::kEmpty, static_cast<ctrl_t>(1),
467 ctrl_t::kDeleted};
468 for (size_t i = 0; i != capacity; ++i) {
469 ctrl[i] = pattern[i % pattern.size()];
470 }
471 while (state.KeepRunning()) {
472 state.PauseTiming();
473 std::vector<ctrl_t> ctrl_copy = ctrl;
474 state.ResumeTiming();
475 ConvertDeletedToEmptyAndFullToDeleted(ctrl_copy.data(), capacity);
476 ::benchmark::DoNotOptimize(ctrl_copy[capacity]);
477 }
478 }
479 BENCHMARK(BM_DropDeletes);
480
BM_Resize(benchmark::State & state)481 void BM_Resize(benchmark::State& state) {
482 // For now just measure a small cheap hash table since we
483 // are mostly interested in the overhead of type-erasure
484 // in resize().
485 constexpr int kElements = 64;
486 const int kCapacity = kElements * 2;
487
488 IntTable table;
489 for (int i = 0; i < kElements; i++) {
490 table.insert(i);
491 }
492 for (auto unused : state) {
493 table.rehash(0);
494 table.rehash(kCapacity);
495 }
496 }
497 BENCHMARK(BM_Resize);
498
499 } // namespace
500 } // namespace container_internal
501 ABSL_NAMESPACE_END
502 } // namespace absl
503
504 // These methods are here to make it easy to examine the assembly for targeted
505 // parts of the API.
CodegenAbslRawHashSetInt64Find(absl::container_internal::IntTable * table,int64_t key)506 auto CodegenAbslRawHashSetInt64Find(absl::container_internal::IntTable* table,
507 int64_t key) -> decltype(table->find(key)) {
508 return table->find(key);
509 }
510
CodegenAbslRawHashSetInt64FindNeEnd(absl::container_internal::IntTable * table,int64_t key)511 bool CodegenAbslRawHashSetInt64FindNeEnd(
512 absl::container_internal::IntTable* table, int64_t key) {
513 return table->find(key) != table->end();
514 }
515
516 // This is useful because the find isn't inlined but the iterator comparison is.
CodegenAbslRawHashSetStringFindNeEnd(absl::container_internal::StringTable * table,const std::string & key)517 bool CodegenAbslRawHashSetStringFindNeEnd(
518 absl::container_internal::StringTable* table, const std::string& key) {
519 return table->find(key) != table->end();
520 }
521
CodegenAbslRawHashSetInt64Insert(absl::container_internal::IntTable * table,int64_t key)522 auto CodegenAbslRawHashSetInt64Insert(absl::container_internal::IntTable* table,
523 int64_t key)
524 -> decltype(table->insert(key)) {
525 return table->insert(key);
526 }
527
CodegenAbslRawHashSetInt64Contains(absl::container_internal::IntTable * table,int64_t key)528 bool CodegenAbslRawHashSetInt64Contains(
529 absl::container_internal::IntTable* table, int64_t key) {
530 return table->contains(key);
531 }
532
CodegenAbslRawHashSetInt64Iterate(absl::container_internal::IntTable * table)533 void CodegenAbslRawHashSetInt64Iterate(
534 absl::container_internal::IntTable* table) {
535 for (auto x : *table) benchmark::DoNotOptimize(x);
536 }
537
538 int odr =
539 (::benchmark::DoNotOptimize(std::make_tuple(
540 &CodegenAbslRawHashSetInt64Find, &CodegenAbslRawHashSetInt64FindNeEnd,
541 &CodegenAbslRawHashSetStringFindNeEnd,
542 &CodegenAbslRawHashSetInt64Insert, &CodegenAbslRawHashSetInt64Contains,
543 &CodegenAbslRawHashSetInt64Iterate)),
544 1);
545