1 // Copyright 2018 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Generates probe length statistics for many combinations of key types and key
16 // distributions, all using the default hash function for swisstable.
17
18 #include <memory>
19 #include <regex> // NOLINT
20 #include <vector>
21
22 #include "absl/container/flat_hash_map.h"
23 #include "absl/container/internal/hash_function_defaults.h"
24 #include "absl/container/internal/hashtable_debug.h"
25 #include "absl/container/internal/raw_hash_set.h"
26 #include "absl/random/distributions.h"
27 #include "absl/random/random.h"
28 #include "absl/strings/str_cat.h"
29 #include "absl/strings/str_format.h"
30 #include "absl/strings/string_view.h"
31 #include "absl/strings/strip.h"
32
33 namespace {
34
35 enum class OutputStyle { kRegular, kBenchmark };
36
37 // The --benchmark command line flag.
38 // This is populated from main().
39 // When run in "benchmark" mode, we have different output. This allows
40 // A/B comparisons with tools like `benchy`.
41 absl::string_view benchmarks;
42
output()43 OutputStyle output() {
44 return !benchmarks.empty() ? OutputStyle::kBenchmark : OutputStyle::kRegular;
45 }
46
47 template <class T>
48 struct Policy {
49 using slot_type = T;
50 using key_type = T;
51 using init_type = T;
52
53 template <class allocator_type, class Arg>
construct__anona2962d170111::Policy54 static void construct(allocator_type* alloc, slot_type* slot,
55 const Arg& arg) {
56 std::allocator_traits<allocator_type>::construct(*alloc, slot, arg);
57 }
58
59 template <class allocator_type>
destroy__anona2962d170111::Policy60 static void destroy(allocator_type* alloc, slot_type* slot) {
61 std::allocator_traits<allocator_type>::destroy(*alloc, slot);
62 }
63
element__anona2962d170111::Policy64 static slot_type& element(slot_type* slot) { return *slot; }
65
66 template <class F, class... Args>
apply__anona2962d170111::Policy67 static auto apply(F&& f, const slot_type& arg)
68 -> decltype(std::forward<F>(f)(arg, arg)) {
69 return std::forward<F>(f)(arg, arg);
70 }
71 };
72
GlobalBitGen()73 absl::BitGen& GlobalBitGen() {
74 static auto* value = new absl::BitGen;
75 return *value;
76 }
77
78 // Keeps a pool of allocations and randomly gives one out.
79 // This introduces more randomization to the addresses given to swisstable and
80 // should help smooth out this factor from probe length calculation.
81 template <class T>
82 class RandomizedAllocator {
83 public:
84 using value_type = T;
85
86 RandomizedAllocator() = default;
87 template <typename U>
RandomizedAllocator(RandomizedAllocator<U>)88 RandomizedAllocator(RandomizedAllocator<U>) {} // NOLINT
89
allocate(size_t n)90 static T* allocate(size_t n) {
91 auto& pointers = GetPointers(n);
92 // Fill the pool
93 while (pointers.size() < kRandomPool) {
94 pointers.push_back(std::allocator<T>{}.allocate(n));
95 }
96
97 // Choose a random one.
98 size_t i = absl::Uniform<size_t>(GlobalBitGen(), 0, pointers.size());
99 T* result = pointers[i];
100 pointers[i] = pointers.back();
101 pointers.pop_back();
102 return result;
103 }
104
deallocate(T * p,size_t n)105 static void deallocate(T* p, size_t n) {
106 // Just put it back on the pool. No need to release the memory.
107 GetPointers(n).push_back(p);
108 }
109
110 private:
111 // We keep at least kRandomPool allocations for each size.
112 static constexpr size_t kRandomPool = 20;
113
GetPointers(size_t n)114 static std::vector<T*>& GetPointers(size_t n) {
115 static auto* m = new absl::flat_hash_map<size_t, std::vector<T*>>();
116 return (*m)[n];
117 }
118 };
119
120 template <class T>
121 struct DefaultHash {
122 using type = absl::container_internal::hash_default_hash<T>;
123 };
124
125 template <class T>
126 using DefaultHashT = typename DefaultHash<T>::type;
127
128 template <class T>
129 struct Table : absl::container_internal::raw_hash_set<
130 Policy<T>, DefaultHashT<T>,
131 absl::container_internal::hash_default_eq<T>,
132 RandomizedAllocator<T>> {};
133
134 struct LoadSizes {
135 size_t min_load;
136 size_t max_load;
137 };
138
GetMinMaxLoadSizes()139 LoadSizes GetMinMaxLoadSizes() {
140 static const auto sizes = [] {
141 Table<int> t;
142
143 // First, fill enough to have a good distribution.
144 constexpr size_t kMinSize = 10000;
145 while (t.size() < kMinSize) t.insert(t.size());
146
147 const auto reach_min_load_factor = [&] {
148 const double lf = t.load_factor();
149 while (lf <= t.load_factor()) t.insert(t.size());
150 };
151
152 // Then, insert until we reach min load factor.
153 reach_min_load_factor();
154 const size_t min_load_size = t.size();
155
156 // Keep going until we hit min load factor again, then go back one.
157 t.insert(t.size());
158 reach_min_load_factor();
159
160 return LoadSizes{min_load_size, t.size() - 1};
161 }();
162 return sizes;
163 }
164
165 struct Ratios {
166 double min_load;
167 double avg_load;
168 double max_load;
169 };
170
171 // See absl/container/internal/hashtable_debug.h for details on
172 // probe length calculation.
173 template <class ElemFn>
CollectMeanProbeLengths()174 Ratios CollectMeanProbeLengths() {
175 const auto min_max_sizes = GetMinMaxLoadSizes();
176
177 ElemFn elem;
178 using Key = decltype(elem());
179 Table<Key> t;
180
181 Ratios result;
182 while (t.size() < min_max_sizes.min_load) t.insert(elem());
183 result.min_load =
184 absl::container_internal::GetHashtableDebugProbeSummary(t).mean;
185
186 while (t.size() < (min_max_sizes.min_load + min_max_sizes.max_load) / 2)
187 t.insert(elem());
188 result.avg_load =
189 absl::container_internal::GetHashtableDebugProbeSummary(t).mean;
190
191 while (t.size() < min_max_sizes.max_load) t.insert(elem());
192 result.max_load =
193 absl::container_internal::GetHashtableDebugProbeSummary(t).mean;
194
195 return result;
196 }
197
198 template <int Align>
PointerForAlignment()199 uintptr_t PointerForAlignment() {
200 alignas(Align) static constexpr uintptr_t kInitPointer = 0;
201 return reinterpret_cast<uintptr_t>(&kInitPointer);
202 }
203
204 // This incomplete type is used for testing hash of pointers of different
205 // alignments.
206 // NOTE: We are generating invalid pointer values on the fly with
207 // reinterpret_cast. There are not "safely derived" pointers so using them is
208 // technically UB. It is unlikely to be a problem, though.
209 template <int Align>
210 struct Ptr;
211
212 template <int Align>
MakePtr(uintptr_t v)213 Ptr<Align>* MakePtr(uintptr_t v) {
214 if (sizeof(v) == 8) {
215 constexpr int kCopyBits = 16;
216 // Ensure high bits are all the same.
217 v = static_cast<uintptr_t>(static_cast<intptr_t>(v << kCopyBits) >>
218 kCopyBits);
219 }
220 return reinterpret_cast<Ptr<Align>*>(v);
221 }
222
223 struct IntIdentity {
224 uint64_t i;
operator ==(IntIdentity a,IntIdentity b)225 friend bool operator==(IntIdentity a, IntIdentity b) { return a.i == b.i; }
operator ++__anona2962d170111::IntIdentity226 IntIdentity operator++(int) { return IntIdentity{i++}; }
227 };
228
229 template <int Align>
230 struct PtrIdentity {
PtrIdentity__anona2962d170111::PtrIdentity231 explicit PtrIdentity(uintptr_t val = PointerForAlignment<Align>()) : i(val) {}
232 uintptr_t i;
operator ==(PtrIdentity a,PtrIdentity b)233 friend bool operator==(PtrIdentity a, PtrIdentity b) { return a.i == b.i; }
operator ++__anona2962d170111::PtrIdentity234 PtrIdentity operator++(int) {
235 PtrIdentity p(i);
236 i += Align;
237 return p;
238 }
239 };
240
241 constexpr char kStringFormat[] = "/path/to/file/name-%07d-of-9999999.txt";
242
243 template <bool small>
244 struct String {
245 std::string value;
Make__anona2962d170111::String246 static std::string Make(uint32_t v) {
247 return {small ? absl::StrCat(v) : absl::StrFormat(kStringFormat, v)};
248 }
249 };
250
251 template <>
252 struct DefaultHash<IntIdentity> {
253 struct type {
operator ()__anona2962d170111::DefaultHash::type254 size_t operator()(IntIdentity t) const { return t.i; }
255 };
256 };
257
258 template <int Align>
259 struct DefaultHash<PtrIdentity<Align>> {
260 struct type {
operator ()__anona2962d170111::DefaultHash::type261 size_t operator()(PtrIdentity<Align> t) const { return t.i; }
262 };
263 };
264
265 template <class T>
266 struct Sequential {
operator ()__anona2962d170111::Sequential267 T operator()() const { return current++; }
268 mutable T current{};
269 };
270
271 template <int Align>
272 struct Sequential<Ptr<Align>*> {
operator ()__anona2962d170111::Sequential273 Ptr<Align>* operator()() const {
274 auto* result = MakePtr<Align>(current);
275 current += Align;
276 return result;
277 }
278 mutable uintptr_t current = PointerForAlignment<Align>();
279 };
280
281
282 template <bool small>
283 struct Sequential<String<small>> {
operator ()__anona2962d170111::Sequential284 std::string operator()() const { return String<small>::Make(current++); }
285 mutable uint32_t current = 0;
286 };
287
288 template <class T, class U>
289 struct Sequential<std::pair<T, U>> {
290 mutable Sequential<T> tseq;
291 mutable Sequential<U> useq;
292
293 using RealT = decltype(tseq());
294 using RealU = decltype(useq());
295
296 mutable std::vector<RealT> ts;
297 mutable std::vector<RealU> us;
298 mutable size_t ti = 0, ui = 0;
299
operator ()__anona2962d170111::Sequential300 std::pair<RealT, RealU> operator()() const {
301 std::pair<RealT, RealU> value{get_t(), get_u()};
302 if (ti == 0) {
303 ti = ui + 1;
304 ui = 0;
305 } else {
306 --ti;
307 ++ui;
308 }
309 return value;
310 }
311
get_t__anona2962d170111::Sequential312 RealT get_t() const {
313 while (ti >= ts.size()) ts.push_back(tseq());
314 return ts[ti];
315 }
316
get_u__anona2962d170111::Sequential317 RealU get_u() const {
318 while (ui >= us.size()) us.push_back(useq());
319 return us[ui];
320 }
321 };
322
323 template <class T, int percent_skip>
324 struct AlmostSequential {
325 mutable Sequential<T> current;
326
operator ()__anona2962d170111::AlmostSequential327 auto operator()() const -> decltype(current()) {
328 while (absl::Uniform(GlobalBitGen(), 0.0, 1.0) <= percent_skip / 100.)
329 current();
330 return current();
331 }
332 };
333
334 struct Uniform {
335 template <typename T>
operator ()__anona2962d170111::Uniform336 T operator()(T) const {
337 return absl::Uniform<T>(absl::IntervalClosed, GlobalBitGen(), T{0}, ~T{0});
338 }
339 };
340
341 struct Gaussian {
342 template <typename T>
operator ()__anona2962d170111::Gaussian343 T operator()(T) const {
344 double d;
345 do {
346 d = absl::Gaussian<double>(GlobalBitGen(), 1e6, 1e4);
347 } while (d <= 0 || d > std::numeric_limits<T>::max() / 2);
348 return static_cast<T>(d);
349 }
350 };
351
352 struct Zipf {
353 template <typename T>
operator ()__anona2962d170111::Zipf354 T operator()(T) const {
355 return absl::Zipf<T>(GlobalBitGen(), std::numeric_limits<T>::max(), 1.6);
356 }
357 };
358
359 template <class T, class Dist>
360 struct Random {
operator ()__anona2962d170111::Random361 T operator()() const { return Dist{}(T{}); }
362 };
363
364 template <class Dist, int Align>
365 struct Random<Ptr<Align>*, Dist> {
operator ()__anona2962d170111::Random366 Ptr<Align>* operator()() const {
367 return MakePtr<Align>(Random<uintptr_t, Dist>{}() * Align);
368 }
369 };
370
371 template <class Dist>
372 struct Random<IntIdentity, Dist> {
operator ()__anona2962d170111::Random373 IntIdentity operator()() const {
374 return IntIdentity{Random<uint64_t, Dist>{}()};
375 }
376 };
377
378 template <class Dist, int Align>
379 struct Random<PtrIdentity<Align>, Dist> {
operator ()__anona2962d170111::Random380 PtrIdentity<Align> operator()() const {
381 return PtrIdentity<Align>{Random<uintptr_t, Dist>{}() * Align};
382 }
383 };
384
385 template <class Dist, bool small>
386 struct Random<String<small>, Dist> {
operator ()__anona2962d170111::Random387 std::string operator()() const {
388 return String<small>::Make(Random<uint32_t, Dist>{}());
389 }
390 };
391
392 template <class T, class U, class Dist>
393 struct Random<std::pair<T, U>, Dist> {
operator ()__anona2962d170111::Random394 auto operator()() const
395 -> decltype(std::make_pair(Random<T, Dist>{}(), Random<U, Dist>{}())) {
396 return std::make_pair(Random<T, Dist>{}(), Random<U, Dist>{}());
397 }
398 };
399
400 template <typename>
401 std::string Name();
402
Name(uint32_t *)403 std::string Name(uint32_t*) { return "u32"; }
Name(uint64_t *)404 std::string Name(uint64_t*) { return "u64"; }
Name(IntIdentity *)405 std::string Name(IntIdentity*) { return "IntIdentity"; }
406
407 template <int Align>
Name(Ptr<Align> **)408 std::string Name(Ptr<Align>**) {
409 return absl::StrCat("Ptr", Align);
410 }
411
412 template <int Align>
Name(PtrIdentity<Align> *)413 std::string Name(PtrIdentity<Align>*) {
414 return absl::StrCat("PtrIdentity", Align);
415 }
416
417 template <bool small>
Name(String<small> *)418 std::string Name(String<small>*) {
419 return small ? "StrS" : "StrL";
420 }
421
422 template <class T, class U>
Name(std::pair<T,U> *)423 std::string Name(std::pair<T, U>*) {
424 if (output() == OutputStyle::kBenchmark)
425 return absl::StrCat("P_", Name<T>(), "_", Name<U>());
426 return absl::StrCat("P<", Name<T>(), ",", Name<U>(), ">");
427 }
428
429 template <class T>
Name(Sequential<T> *)430 std::string Name(Sequential<T>*) {
431 return "Sequential";
432 }
433
434 template <class T, int P>
Name(AlmostSequential<T,P> *)435 std::string Name(AlmostSequential<T, P>*) {
436 return absl::StrCat("AlmostSeq_", P);
437 }
438
439 template <class T>
Name(Random<T,Uniform> *)440 std::string Name(Random<T, Uniform>*) {
441 return "UnifRand";
442 }
443
444 template <class T>
Name(Random<T,Gaussian> *)445 std::string Name(Random<T, Gaussian>*) {
446 return "GausRand";
447 }
448
449 template <class T>
Name(Random<T,Zipf> *)450 std::string Name(Random<T, Zipf>*) {
451 return "ZipfRand";
452 }
453
454 template <typename T>
Name()455 std::string Name() {
456 return Name(static_cast<T*>(nullptr));
457 }
458
459 constexpr int kNameWidth = 15;
460 constexpr int kDistWidth = 16;
461
CanRunBenchmark(absl::string_view name)462 bool CanRunBenchmark(absl::string_view name) {
463 static std::regex* const filter = []() -> std::regex* {
464 return benchmarks.empty() || benchmarks == "all"
465 ? nullptr
466 : new std::regex(std::string(benchmarks));
467 }();
468 return filter == nullptr || std::regex_search(std::string(name), *filter);
469 }
470
471 struct Result {
472 std::string name;
473 std::string dist_name;
474 Ratios ratios;
475 };
476
477 template <typename T, typename Dist>
RunForTypeAndDistribution(std::vector<Result> & results)478 void RunForTypeAndDistribution(std::vector<Result>& results) {
479 std::string name = absl::StrCat(Name<T>(), "/", Name<Dist>());
480 // We have to check against all three names (min/avg/max) before we run it.
481 // If any of them is enabled, we run it.
482 if (!CanRunBenchmark(absl::StrCat(name, "/min")) &&
483 !CanRunBenchmark(absl::StrCat(name, "/avg")) &&
484 !CanRunBenchmark(absl::StrCat(name, "/max"))) {
485 return;
486 }
487 results.push_back({Name<T>(), Name<Dist>(), CollectMeanProbeLengths<Dist>()});
488 }
489
490 template <class T>
RunForType(std::vector<Result> & results)491 void RunForType(std::vector<Result>& results) {
492 RunForTypeAndDistribution<T, Sequential<T>>(results);
493 RunForTypeAndDistribution<T, AlmostSequential<T, 20>>(results);
494 RunForTypeAndDistribution<T, AlmostSequential<T, 50>>(results);
495 RunForTypeAndDistribution<T, Random<T, Uniform>>(results);
496 #ifdef NDEBUG
497 // Disable these in non-opt mode because they take too long.
498 RunForTypeAndDistribution<T, Random<T, Gaussian>>(results);
499 RunForTypeAndDistribution<T, Random<T, Zipf>>(results);
500 #endif // NDEBUG
501 }
502
503 } // namespace
504
main(int argc,char ** argv)505 int main(int argc, char** argv) {
506 // Parse the benchmark flags. Ignore all of them except the regex pattern.
507 for (int i = 1; i < argc; ++i) {
508 absl::string_view arg = argv[i];
509 const auto next = [&] { return argv[std::min(i + 1, argc - 1)]; };
510
511 if (absl::ConsumePrefix(&arg, "--benchmark_filter")) {
512 if (arg == "") {
513 // --benchmark_filter X
514 benchmarks = next();
515 } else if (absl::ConsumePrefix(&arg, "=")) {
516 // --benchmark_filter=X
517 benchmarks = arg;
518 }
519 }
520
521 // Any --benchmark flag turns on the mode.
522 if (absl::ConsumePrefix(&arg, "--benchmark")) {
523 if (benchmarks.empty()) benchmarks="all";
524 }
525 }
526
527 std::vector<Result> results;
528 RunForType<uint64_t>(results);
529 RunForType<IntIdentity>(results);
530 RunForType<Ptr<8>*>(results);
531 RunForType<Ptr<16>*>(results);
532 RunForType<Ptr<32>*>(results);
533 RunForType<Ptr<64>*>(results);
534 RunForType<PtrIdentity<8>>(results);
535 RunForType<PtrIdentity<16>>(results);
536 RunForType<PtrIdentity<32>>(results);
537 RunForType<PtrIdentity<64>>(results);
538 RunForType<std::pair<uint32_t, uint32_t>>(results);
539 RunForType<String<true>>(results);
540 RunForType<String<false>>(results);
541 RunForType<std::pair<uint64_t, String<true>>>(results);
542 RunForType<std::pair<String<true>, uint64_t>>(results);
543 RunForType<std::pair<uint64_t, String<false>>>(results);
544 RunForType<std::pair<String<false>, uint64_t>>(results);
545
546 switch (output()) {
547 case OutputStyle::kRegular:
548 absl::PrintF("%-*s%-*s Min Avg Max\n%s\n", kNameWidth,
549 "Type", kDistWidth, "Distribution",
550 std::string(kNameWidth + kDistWidth + 10 * 3, '-'));
551 for (const auto& result : results) {
552 absl::PrintF("%-*s%-*s %8.4f %8.4f %8.4f\n", kNameWidth, result.name,
553 kDistWidth, result.dist_name, result.ratios.min_load,
554 result.ratios.avg_load, result.ratios.max_load);
555 }
556 break;
557 case OutputStyle::kBenchmark: {
558 absl::PrintF("{\n");
559 absl::PrintF(" \"benchmarks\": [\n");
560 absl::string_view comma;
561 for (const auto& result : results) {
562 auto print = [&](absl::string_view stat, double Ratios::*val) {
563 std::string name =
564 absl::StrCat(result.name, "/", result.dist_name, "/", stat);
565 // Check the regex again. We might had have enabled only one of the
566 // stats for the benchmark.
567 if (!CanRunBenchmark(name)) return;
568 absl::PrintF(" %s{\n", comma);
569 absl::PrintF(" \"cpu_time\": %f,\n", 1e9 * result.ratios.*val);
570 absl::PrintF(" \"real_time\": %f,\n", 1e9 * result.ratios.*val);
571 absl::PrintF(" \"iterations\": 1,\n");
572 absl::PrintF(" \"name\": \"%s\",\n", name);
573 absl::PrintF(" \"time_unit\": \"ns\"\n");
574 absl::PrintF(" }\n");
575 comma = ",";
576 };
577 print("min", &Ratios::min_load);
578 print("avg", &Ratios::avg_load);
579 print("max", &Ratios::max_load);
580 }
581 absl::PrintF(" ],\n");
582 absl::PrintF(" \"context\": {\n");
583 absl::PrintF(" }\n");
584 absl::PrintF("}\n");
585 break;
586 }
587 }
588
589 return 0;
590 }
591