1 // Copyright 2018 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <array>
16 #include <cmath>
17 #include <numeric>
18 #include <random>
19 #include <tuple>
20 #include <utility>
21 #include <vector>
22 
23 #include "absl/base/internal/raw_logging.h"
24 #include "absl/container/internal/hash_function_defaults.h"
25 #include "absl/container/internal/raw_hash_set.h"
26 #include "absl/strings/str_format.h"
27 #include "benchmark/benchmark.h"
28 
29 namespace absl {
30 ABSL_NAMESPACE_BEGIN
31 namespace container_internal {
32 
33 struct RawHashSetTestOnlyAccess {
34   template <typename C>
GetSlotsabsl::container_internal::RawHashSetTestOnlyAccess35   static auto GetSlots(const C& c) -> decltype(c.slots_) {
36     return c.slots_;
37   }
38 };
39 
40 namespace {
41 
42 struct IntPolicy {
43   using slot_type = int64_t;
44   using key_type = int64_t;
45   using init_type = int64_t;
46 
constructabsl::container_internal::__anon18b301a00111::IntPolicy47   static void construct(void*, int64_t* slot, int64_t v) { *slot = v; }
destroyabsl::container_internal::__anon18b301a00111::IntPolicy48   static void destroy(void*, int64_t*) {}
transferabsl::container_internal::__anon18b301a00111::IntPolicy49   static void transfer(void*, int64_t* new_slot, int64_t* old_slot) {
50     *new_slot = *old_slot;
51   }
52 
elementabsl::container_internal::__anon18b301a00111::IntPolicy53   static int64_t& element(slot_type* slot) { return *slot; }
54 
55   template <class F>
applyabsl::container_internal::__anon18b301a00111::IntPolicy56   static auto apply(F&& f, int64_t x) -> decltype(std::forward<F>(f)(x, x)) {
57     return std::forward<F>(f)(x, x);
58   }
59 };
60 
61 class StringPolicy {
62   template <class F, class K, class V,
63             class = typename std::enable_if<
64                 std::is_convertible<const K&, absl::string_view>::value>::type>
65   decltype(std::declval<F>()(
66       std::declval<const absl::string_view&>(), std::piecewise_construct,
67       std::declval<std::tuple<K>>(),
apply_impl(F && f,std::pair<std::tuple<K>,V> p)68       std::declval<V>())) static apply_impl(F&& f,
69                                             std::pair<std::tuple<K>, V> p) {
70     const absl::string_view& key = std::get<0>(p.first);
71     return std::forward<F>(f)(key, std::piecewise_construct, std::move(p.first),
72                               std::move(p.second));
73   }
74 
75  public:
76   struct slot_type {
77     struct ctor {};
78 
79     template <class... Ts>
slot_typeabsl::container_internal::__anon18b301a00111::StringPolicy::slot_type80     slot_type(ctor, Ts&&... ts) : pair(std::forward<Ts>(ts)...) {}
81 
82     std::pair<std::string, std::string> pair;
83   };
84 
85   using key_type = std::string;
86   using init_type = std::pair<std::string, std::string>;
87 
88   template <class allocator_type, class... Args>
construct(allocator_type * alloc,slot_type * slot,Args...args)89   static void construct(allocator_type* alloc, slot_type* slot, Args... args) {
90     std::allocator_traits<allocator_type>::construct(
91         *alloc, slot, typename slot_type::ctor(), std::forward<Args>(args)...);
92   }
93 
94   template <class allocator_type>
destroy(allocator_type * alloc,slot_type * slot)95   static void destroy(allocator_type* alloc, slot_type* slot) {
96     std::allocator_traits<allocator_type>::destroy(*alloc, slot);
97   }
98 
99   template <class allocator_type>
transfer(allocator_type * alloc,slot_type * new_slot,slot_type * old_slot)100   static void transfer(allocator_type* alloc, slot_type* new_slot,
101                        slot_type* old_slot) {
102     construct(alloc, new_slot, std::move(old_slot->pair));
103     destroy(alloc, old_slot);
104   }
105 
element(slot_type * slot)106   static std::pair<std::string, std::string>& element(slot_type* slot) {
107     return slot->pair;
108   }
109 
110   template <class F, class... Args>
apply(F && f,Args &&...args)111   static auto apply(F&& f, Args&&... args)
112       -> decltype(apply_impl(std::forward<F>(f),
113                              PairArgs(std::forward<Args>(args)...))) {
114     return apply_impl(std::forward<F>(f),
115                       PairArgs(std::forward<Args>(args)...));
116   }
117 };
118 
119 struct StringHash : container_internal::hash_default_hash<absl::string_view> {
120   using is_transparent = void;
121 };
122 struct StringEq : std::equal_to<absl::string_view> {
123   using is_transparent = void;
124 };
125 
126 struct StringTable
127     : raw_hash_set<StringPolicy, StringHash, StringEq, std::allocator<int>> {
128   using Base = typename StringTable::raw_hash_set;
StringTableabsl::container_internal::__anon18b301a00111::StringTable129   StringTable() {}
130   using Base::Base;
131 };
132 
133 struct IntTable
134     : raw_hash_set<IntPolicy, container_internal::hash_default_hash<int64_t>,
135                    std::equal_to<int64_t>, std::allocator<int64_t>> {
136   using Base = typename IntTable::raw_hash_set;
IntTableabsl::container_internal::__anon18b301a00111::IntTable137   IntTable() {}
138   using Base::Base;
139 };
140 
141 struct string_generator {
142   template <class RNG>
operator ()absl::container_internal::__anon18b301a00111::string_generator143   std::string operator()(RNG& rng) const {
144     std::string res;
145     res.resize(12);
146     std::uniform_int_distribution<uint32_t> printable_ascii(0x20, 0x7E);
147     std::generate(res.begin(), res.end(), [&] { return printable_ascii(rng); });
148     return res;
149   }
150 
151   size_t size;
152 };
153 
154 // Model a cache in steady state.
155 //
156 // On a table of size N, keep deleting the LRU entry and add a random one.
BM_CacheInSteadyState(benchmark::State & state)157 void BM_CacheInSteadyState(benchmark::State& state) {
158   std::random_device rd;
159   std::mt19937 rng(rd());
160   string_generator gen{12};
161   StringTable t;
162   std::deque<std::string> keys;
163   while (t.size() < state.range(0)) {
164     auto x = t.emplace(gen(rng), gen(rng));
165     if (x.second) keys.push_back(x.first->first);
166   }
167   ABSL_RAW_CHECK(state.range(0) >= 10, "");
168   while (state.KeepRunning()) {
169     // Some cache hits.
170     std::deque<std::string>::const_iterator it;
171     for (int i = 0; i != 90; ++i) {
172       if (i % 10 == 0) it = keys.end();
173       ::benchmark::DoNotOptimize(t.find(*--it));
174     }
175     // Some cache misses.
176     for (int i = 0; i != 10; ++i) ::benchmark::DoNotOptimize(t.find(gen(rng)));
177     ABSL_RAW_CHECK(t.erase(keys.front()), keys.front().c_str());
178     keys.pop_front();
179     while (true) {
180       auto x = t.emplace(gen(rng), gen(rng));
181       if (x.second) {
182         keys.push_back(x.first->first);
183         break;
184       }
185     }
186   }
187   state.SetItemsProcessed(state.iterations());
188   state.SetLabel(absl::StrFormat("load_factor=%.2f", t.load_factor()));
189 }
190 
191 template <typename Benchmark>
CacheInSteadyStateArgs(Benchmark * bm)192 void CacheInSteadyStateArgs(Benchmark* bm) {
193   // The default.
194   const float max_load_factor = 0.875;
195   // When the cache is at the steady state, the probe sequence will equal
196   // capacity if there is no reclamation of deleted slots. Pick a number large
197   // enough to make the benchmark slow for that case.
198   const size_t capacity = 1 << 10;
199 
200   // Check N data points to cover load factors in [0.4, 0.8).
201   const size_t kNumPoints = 10;
202   for (size_t i = 0; i != kNumPoints; ++i)
203     bm->Arg(std::ceil(
204         capacity * (max_load_factor + i * max_load_factor / kNumPoints) / 2));
205 }
206 BENCHMARK(BM_CacheInSteadyState)->Apply(CacheInSteadyStateArgs);
207 
BM_EndComparison(benchmark::State & state)208 void BM_EndComparison(benchmark::State& state) {
209   StringTable t = {{"a", "a"}, {"b", "b"}};
210   auto it = t.begin();
211   for (auto i : state) {
212     benchmark::DoNotOptimize(t);
213     benchmark::DoNotOptimize(it);
214     benchmark::DoNotOptimize(it != t.end());
215   }
216 }
217 BENCHMARK(BM_EndComparison);
218 
BM_Iteration(benchmark::State & state)219 void BM_Iteration(benchmark::State& state) {
220   std::random_device rd;
221   std::mt19937 rng(rd());
222   string_generator gen{12};
223   StringTable t;
224 
225   size_t capacity = state.range(0);
226   size_t size = state.range(1);
227   t.reserve(capacity);
228 
229   while (t.size() < size) {
230     t.emplace(gen(rng), gen(rng));
231   }
232 
233   for (auto i : state) {
234     benchmark::DoNotOptimize(t);
235     for (auto it = t.begin(); it != t.end(); ++it) {
236       benchmark::DoNotOptimize(*it);
237     }
238   }
239 }
240 
241 BENCHMARK(BM_Iteration)
242     ->ArgPair(1, 1)
243     ->ArgPair(2, 2)
244     ->ArgPair(4, 4)
245     ->ArgPair(7, 7)
246     ->ArgPair(10, 10)
247     ->ArgPair(15, 15)
248     ->ArgPair(16, 16)
249     ->ArgPair(54, 54)
250     ->ArgPair(100, 100)
251     ->ArgPair(400, 400)
252     // empty
253     ->ArgPair(0, 0)
254     ->ArgPair(10, 0)
255     ->ArgPair(100, 0)
256     ->ArgPair(1000, 0)
257     ->ArgPair(10000, 0)
258     // sparse
259     ->ArgPair(100, 1)
260     ->ArgPair(1000, 10);
261 
BM_CopyCtorSparseInt(benchmark::State & state)262 void BM_CopyCtorSparseInt(benchmark::State& state) {
263   std::random_device rd;
264   std::mt19937 rng(rd());
265   IntTable t;
266   std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
267 
268   size_t size = state.range(0);
269   t.reserve(size * 10);
270   while (t.size() < size) {
271     t.emplace(dist(rng));
272   }
273 
274   for (auto i : state) {
275     IntTable t2 = t;
276     benchmark::DoNotOptimize(t2);
277   }
278 }
279 BENCHMARK(BM_CopyCtorSparseInt)->Range(128, 4096);
280 
BM_CopyCtorInt(benchmark::State & state)281 void BM_CopyCtorInt(benchmark::State& state) {
282   std::random_device rd;
283   std::mt19937 rng(rd());
284   IntTable t;
285   std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
286 
287   size_t size = state.range(0);
288   while (t.size() < size) {
289     t.emplace(dist(rng));
290   }
291 
292   for (auto i : state) {
293     IntTable t2 = t;
294     benchmark::DoNotOptimize(t2);
295   }
296 }
297 BENCHMARK(BM_CopyCtorInt)->Range(128, 4096);
298 
BM_CopyCtorString(benchmark::State & state)299 void BM_CopyCtorString(benchmark::State& state) {
300   std::random_device rd;
301   std::mt19937 rng(rd());
302   StringTable t;
303   std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
304 
305   size_t size = state.range(0);
306   while (t.size() < size) {
307     t.emplace(std::to_string(dist(rng)), std::to_string(dist(rng)));
308   }
309 
310   for (auto i : state) {
311     StringTable t2 = t;
312     benchmark::DoNotOptimize(t2);
313   }
314 }
315 BENCHMARK(BM_CopyCtorString)->Range(128, 4096);
316 
BM_CopyAssign(benchmark::State & state)317 void BM_CopyAssign(benchmark::State& state) {
318   std::random_device rd;
319   std::mt19937 rng(rd());
320   IntTable t;
321   std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
322   while (t.size() < state.range(0)) {
323     t.emplace(dist(rng));
324   }
325 
326   IntTable t2;
327   for (auto _ : state) {
328     t2 = t;
329     benchmark::DoNotOptimize(t2);
330   }
331 }
332 BENCHMARK(BM_CopyAssign)->Range(128, 4096);
333 
BM_RangeCtor(benchmark::State & state)334 void BM_RangeCtor(benchmark::State& state) {
335   std::random_device rd;
336   std::mt19937 rng(rd());
337   std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
338   std::vector<int> values;
339   const size_t desired_size = state.range(0);
340   while (values.size() < desired_size) {
341     values.emplace_back(dist(rng));
342   }
343 
344   for (auto unused : state) {
345     IntTable t{values.begin(), values.end()};
346     benchmark::DoNotOptimize(t);
347   }
348 }
349 BENCHMARK(BM_RangeCtor)->Range(128, 65536);
350 
BM_NoOpReserveIntTable(benchmark::State & state)351 void BM_NoOpReserveIntTable(benchmark::State& state) {
352   IntTable t;
353   t.reserve(100000);
354   for (auto _ : state) {
355     benchmark::DoNotOptimize(t);
356     t.reserve(100000);
357   }
358 }
359 BENCHMARK(BM_NoOpReserveIntTable);
360 
BM_NoOpReserveStringTable(benchmark::State & state)361 void BM_NoOpReserveStringTable(benchmark::State& state) {
362   StringTable t;
363   t.reserve(100000);
364   for (auto _ : state) {
365     benchmark::DoNotOptimize(t);
366     t.reserve(100000);
367   }
368 }
369 BENCHMARK(BM_NoOpReserveStringTable);
370 
BM_ReserveIntTable(benchmark::State & state)371 void BM_ReserveIntTable(benchmark::State& state) {
372   int reserve_size = state.range(0);
373   for (auto _ : state) {
374     state.PauseTiming();
375     IntTable t;
376     state.ResumeTiming();
377     benchmark::DoNotOptimize(t);
378     t.reserve(reserve_size);
379   }
380 }
381 BENCHMARK(BM_ReserveIntTable)->Range(128, 4096);
382 
BM_ReserveStringTable(benchmark::State & state)383 void BM_ReserveStringTable(benchmark::State& state) {
384   int reserve_size = state.range(0);
385   for (auto _ : state) {
386     state.PauseTiming();
387     StringTable t;
388     state.ResumeTiming();
389     benchmark::DoNotOptimize(t);
390     t.reserve(reserve_size);
391   }
392 }
393 BENCHMARK(BM_ReserveStringTable)->Range(128, 4096);
394 
395 // Like std::iota, except that ctrl_t doesn't support operator++.
396 template <typename CtrlIter>
Iota(CtrlIter begin,CtrlIter end,int value)397 void Iota(CtrlIter begin, CtrlIter end, int value) {
398   for (; begin != end; ++begin, ++value) {
399     *begin = static_cast<ctrl_t>(value);
400   }
401 }
402 
BM_Group_Match(benchmark::State & state)403 void BM_Group_Match(benchmark::State& state) {
404   std::array<ctrl_t, Group::kWidth> group;
405   Iota(group.begin(), group.end(), -4);
406   Group g{group.data()};
407   h2_t h = 1;
408   for (auto _ : state) {
409     ::benchmark::DoNotOptimize(h);
410     ::benchmark::DoNotOptimize(g);
411     ::benchmark::DoNotOptimize(g.Match(h));
412   }
413 }
414 BENCHMARK(BM_Group_Match);
415 
BM_Group_MaskEmpty(benchmark::State & state)416 void BM_Group_MaskEmpty(benchmark::State& state) {
417   std::array<ctrl_t, Group::kWidth> group;
418   Iota(group.begin(), group.end(), -4);
419   Group g{group.data()};
420   for (auto _ : state) {
421     ::benchmark::DoNotOptimize(g);
422     ::benchmark::DoNotOptimize(g.MaskEmpty());
423   }
424 }
425 BENCHMARK(BM_Group_MaskEmpty);
426 
BM_Group_MaskEmptyOrDeleted(benchmark::State & state)427 void BM_Group_MaskEmptyOrDeleted(benchmark::State& state) {
428   std::array<ctrl_t, Group::kWidth> group;
429   Iota(group.begin(), group.end(), -4);
430   Group g{group.data()};
431   for (auto _ : state) {
432     ::benchmark::DoNotOptimize(g);
433     ::benchmark::DoNotOptimize(g.MaskEmptyOrDeleted());
434   }
435 }
436 BENCHMARK(BM_Group_MaskEmptyOrDeleted);
437 
BM_Group_CountLeadingEmptyOrDeleted(benchmark::State & state)438 void BM_Group_CountLeadingEmptyOrDeleted(benchmark::State& state) {
439   std::array<ctrl_t, Group::kWidth> group;
440   Iota(group.begin(), group.end(), -2);
441   Group g{group.data()};
442   for (auto _ : state) {
443     ::benchmark::DoNotOptimize(g);
444     ::benchmark::DoNotOptimize(g.CountLeadingEmptyOrDeleted());
445   }
446 }
447 BENCHMARK(BM_Group_CountLeadingEmptyOrDeleted);
448 
BM_Group_MatchFirstEmptyOrDeleted(benchmark::State & state)449 void BM_Group_MatchFirstEmptyOrDeleted(benchmark::State& state) {
450   std::array<ctrl_t, Group::kWidth> group;
451   Iota(group.begin(), group.end(), -2);
452   Group g{group.data()};
453   for (auto _ : state) {
454     ::benchmark::DoNotOptimize(g);
455     ::benchmark::DoNotOptimize(g.MaskEmptyOrDeleted().LowestBitSet());
456   }
457 }
458 BENCHMARK(BM_Group_MatchFirstEmptyOrDeleted);
459 
BM_DropDeletes(benchmark::State & state)460 void BM_DropDeletes(benchmark::State& state) {
461   constexpr size_t capacity = (1 << 20) - 1;
462   std::vector<ctrl_t> ctrl(capacity + 1 + Group::kWidth);
463   ctrl[capacity] = ctrl_t::kSentinel;
464   std::vector<ctrl_t> pattern = {ctrl_t::kEmpty,   static_cast<ctrl_t>(2),
465                                  ctrl_t::kDeleted, static_cast<ctrl_t>(2),
466                                  ctrl_t::kEmpty,   static_cast<ctrl_t>(1),
467                                  ctrl_t::kDeleted};
468   for (size_t i = 0; i != capacity; ++i) {
469     ctrl[i] = pattern[i % pattern.size()];
470   }
471   while (state.KeepRunning()) {
472     state.PauseTiming();
473     std::vector<ctrl_t> ctrl_copy = ctrl;
474     state.ResumeTiming();
475     ConvertDeletedToEmptyAndFullToDeleted(ctrl_copy.data(), capacity);
476     ::benchmark::DoNotOptimize(ctrl_copy[capacity]);
477   }
478 }
479 BENCHMARK(BM_DropDeletes);
480 
BM_Resize(benchmark::State & state)481 void BM_Resize(benchmark::State& state) {
482   // For now just measure a small cheap hash table since we
483   // are mostly interested in the overhead of type-erasure
484   // in resize().
485   constexpr int kElements = 64;
486   const int kCapacity = kElements * 2;
487 
488   IntTable table;
489   for (int i = 0; i < kElements; i++) {
490     table.insert(i);
491   }
492   for (auto unused : state) {
493     table.rehash(0);
494     table.rehash(kCapacity);
495   }
496 }
497 BENCHMARK(BM_Resize);
498 
499 }  // namespace
500 }  // namespace container_internal
501 ABSL_NAMESPACE_END
502 }  // namespace absl
503 
504 // These methods are here to make it easy to examine the assembly for targeted
505 // parts of the API.
CodegenAbslRawHashSetInt64Find(absl::container_internal::IntTable * table,int64_t key)506 auto CodegenAbslRawHashSetInt64Find(absl::container_internal::IntTable* table,
507                                     int64_t key) -> decltype(table->find(key)) {
508   return table->find(key);
509 }
510 
CodegenAbslRawHashSetInt64FindNeEnd(absl::container_internal::IntTable * table,int64_t key)511 bool CodegenAbslRawHashSetInt64FindNeEnd(
512     absl::container_internal::IntTable* table, int64_t key) {
513   return table->find(key) != table->end();
514 }
515 
516 // This is useful because the find isn't inlined but the iterator comparison is.
CodegenAbslRawHashSetStringFindNeEnd(absl::container_internal::StringTable * table,const std::string & key)517 bool CodegenAbslRawHashSetStringFindNeEnd(
518     absl::container_internal::StringTable* table, const std::string& key) {
519   return table->find(key) != table->end();
520 }
521 
CodegenAbslRawHashSetInt64Insert(absl::container_internal::IntTable * table,int64_t key)522 auto CodegenAbslRawHashSetInt64Insert(absl::container_internal::IntTable* table,
523                                       int64_t key)
524     -> decltype(table->insert(key)) {
525   return table->insert(key);
526 }
527 
CodegenAbslRawHashSetInt64Contains(absl::container_internal::IntTable * table,int64_t key)528 bool CodegenAbslRawHashSetInt64Contains(
529     absl::container_internal::IntTable* table, int64_t key) {
530   return table->contains(key);
531 }
532 
CodegenAbslRawHashSetInt64Iterate(absl::container_internal::IntTable * table)533 void CodegenAbslRawHashSetInt64Iterate(
534     absl::container_internal::IntTable* table) {
535   for (auto x : *table) benchmark::DoNotOptimize(x);
536 }
537 
538 int odr =
539     (::benchmark::DoNotOptimize(std::make_tuple(
540          &CodegenAbslRawHashSetInt64Find, &CodegenAbslRawHashSetInt64FindNeEnd,
541          &CodegenAbslRawHashSetStringFindNeEnd,
542          &CodegenAbslRawHashSetInt64Insert, &CodegenAbslRawHashSetInt64Contains,
543          &CodegenAbslRawHashSetInt64Iterate)),
544      1);
545