xref: /aosp_15_r20/external/perfetto/src/base/flat_hash_map_benchmark.cc (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <functional>
16 #include <random>
17 #include <unordered_map>
18 
19 #include <benchmark/benchmark.h>
20 #include <stdio.h>
21 #include <sys/mman.h>
22 #include <unistd.h>
23 
24 #include "perfetto/base/logging.h"
25 #include "perfetto/ext/base/file_utils.h"
26 #include "perfetto/ext/base/flat_hash_map.h"
27 #include "perfetto/ext/base/hash.h"
28 #include "perfetto/ext/base/scoped_file.h"
29 #include "perfetto/ext/base/string_view.h"
30 
31 // This benchmark allows to compare our FlatHashMap implementation against
32 // reference implementations from Absl (Google), Folly F14 (FB), and Tssil's
33 // reference RobinHood hashmap.
34 // Those libraries are not checked in into the repo. If you want to reproduce
35 // the benchmark you need to:
36 // - Manually install the three libraries using following the instructions in
37 //   their readme (they all use cmake).
38 // - When running cmake, remember to pass
39 //   -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS='-DNDEBUG -O3 -msse4.2 -mavx'.
40 //   That sets cflags for a more fair comparison.
41 // - Set is_debug=false in the GN args.
42 // - Set the GN var perfetto_benchmark_3p_libs_prefix="/usr/local" (or whatever
43 //   other directory you set as DESTDIR when running make install).
44 // The presence of the perfetto_benchmark_3p_libs_prefix GN variable will
45 // automatically define PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS.
46 
47 #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
48 
49 // Last tested: https://github.com/abseil/abseil-cpp @ f2dbd918d.
50 #include <absl/container/flat_hash_map.h>
51 
52 // Last tested: https://github.com/facebook/folly @ 028a9abae3.
53 #include <folly/container/F14Map.h>
54 
55 // Last tested: https://github.com/Tessil/robin-map @ a603419b9.
56 #include <tsl/robin_map.h>
57 #endif
58 
59 namespace {
60 
61 using namespace perfetto;
62 using benchmark::Counter;
63 using perfetto::base::AlreadyHashed;
64 using perfetto::base::LinearProbe;
65 using perfetto::base::QuadraticHalfProbe;
66 using perfetto::base::QuadraticProbe;
67 
68 // Our FlatHashMap doesn't have a STL-like interface, mainly because we use
69 // columnar-oriented storage, not array-of-tuples, so we can't easily map into
70 // that interface. This wrapper makes our FlatHashMap compatible with STL (just
71 // for what it takes to build this translation unit), at the cost of some small
72 // performance penalty (around 1-2%).
73 template <typename Key, typename Value, typename Hasher, typename Probe>
74 class Ours : public base::FlatHashMap<Key, Value, Hasher, Probe> {
75  public:
76   struct Iterator {
77     using value_type = std::pair<const Key&, Value&>;
Iterator__anon4292ef250111::Ours::Iterator78     Iterator(const Key& k, Value& v) : pair_{k, v} {}
operator ->__anon4292ef250111::Ours::Iterator79     value_type* operator->() { return &pair_; }
operator *__anon4292ef250111::Ours::Iterator80     value_type& operator*() { return pair_; }
operator ==__anon4292ef250111::Ours::Iterator81     bool operator==(const Iterator& other) const {
82       return &pair_.first == &other.pair_.first;
83     }
operator !=__anon4292ef250111::Ours::Iterator84     bool operator!=(const Iterator& other) const { return !operator==(other); }
85     value_type pair_;
86   };
87 
insert(std::pair<Key,Value> && pair)88   void insert(std::pair<Key, Value>&& pair) {
89     this->Insert(std::move(pair.first), std::move(pair.second));
90   }
91 
find(const Key & key)92   Iterator find(const Key& key) {
93     const size_t idx = this->FindInternal(key);
94     return Iterator(this->keys_[idx], this->values_[idx]);
95   }
96 
end()97   Iterator end() {
98     return Iterator(this->keys_[this->kNotFound],
99                     this->values_[this->kNotFound]);
100   }
101 };
102 
LoadTraceStrings(benchmark::State & state)103 std::vector<uint64_t> LoadTraceStrings(benchmark::State& state) {
104   std::vector<uint64_t> str_hashes;
105   // This requires that the user has downloaded the file
106   // go/perfetto-benchmark-trace-strings into /tmp/trace_strings. The file is
107   // too big (2.3 GB after uncompression) and it's not worth adding it to the
108   // //test/data. Also it contains data from a team member's phone and cannot
109   // be public.
110   base::ScopedFstream f(fopen("/tmp/trace_strings", "re"));
111   if (!f) {
112     state.SkipWithError(
113         "Test strings missing. Googlers: download "
114         "go/perfetto-benchmark-trace-strings and save into /tmp/trace_strings");
115     return str_hashes;
116   }
117   char line[4096];
118   while (fgets(line, sizeof(line), *f)) {
119     base::Hasher hasher;
120     hasher.Update(line, strlen(line));
121     str_hashes.emplace_back(hasher.digest());
122   }
123   return str_hashes;
124 }
125 
IsBenchmarkFunctionalOnly()126 bool IsBenchmarkFunctionalOnly() {
127   return getenv("BENCHMARK_FUNCTIONAL_TEST_ONLY") != nullptr;
128 }
129 
num_samples()130 size_t num_samples() {
131   return IsBenchmarkFunctionalOnly() ? size_t(100) : size_t(10 * 1000 * 1000);
132 }
133 
134 // Uses directly the base::FlatHashMap with no STL wrapper. Configures the map
135 // in append-only mode.
BM_HashMap_InsertTraceStrings_AppendOnly(benchmark::State & state)136 void BM_HashMap_InsertTraceStrings_AppendOnly(benchmark::State& state) {
137   std::vector<uint64_t> hashes = LoadTraceStrings(state);
138   for (auto _ : state) {
139     base::FlatHashMap<uint64_t, uint64_t, AlreadyHashed<uint64_t>, LinearProbe,
140                       /*AppendOnly=*/true>
141         mapz;
142     for (uint64_t hash : hashes)
143       mapz.Insert(hash, 42);
144 
145     benchmark::ClobberMemory();
146     state.counters["uniq"] = Counter(static_cast<double>(mapz.size()));
147   }
148   state.counters["tot"] = Counter(static_cast<double>(hashes.size()),
149                                   Counter::kIsIterationInvariant);
150   state.counters["rate"] = Counter(static_cast<double>(hashes.size()),
151                                    Counter::kIsIterationInvariantRate);
152 }
153 
154 template <typename MapType>
BM_HashMap_InsertTraceStrings(benchmark::State & state)155 void BM_HashMap_InsertTraceStrings(benchmark::State& state) {
156   std::vector<uint64_t> hashes = LoadTraceStrings(state);
157   for (auto _ : state) {
158     MapType mapz;
159     for (uint64_t hash : hashes)
160       mapz.insert({hash, 42});
161 
162     benchmark::ClobberMemory();
163     state.counters["uniq"] = Counter(static_cast<double>(mapz.size()));
164   }
165   state.counters["tot"] = Counter(static_cast<double>(hashes.size()),
166                                   Counter::kIsIterationInvariant);
167   state.counters["rate"] = Counter(static_cast<double>(hashes.size()),
168                                    Counter::kIsIterationInvariantRate);
169 }
170 
171 template <typename MapType>
BM_HashMap_TraceTids(benchmark::State & state)172 void BM_HashMap_TraceTids(benchmark::State& state) {
173   std::vector<std::pair<char, int>> ops_and_tids;
174   {
175     base::ScopedFstream f(fopen("/tmp/tids", "re"));
176     if (!f) {
177       // This test requires a large (800MB) test file. It's not checked into the
178       // repository's //test/data because it would slow down all developers for
179       // a marginal benefit.
180       state.SkipWithError(
181           "Please run `curl -Lo /tmp/tids "
182           "https://storage.googleapis.com/perfetto/test_datalong_trace_tids.txt"
183           "` and try again.");
184       return;
185     }
186     char op;
187     int tid;
188     while (fscanf(*f, "%c %d\n", &op, &tid) == 2)
189       ops_and_tids.emplace_back(op, tid);
190   }
191 
192   for (auto _ : state) {
193     MapType mapz;
194     for (const auto& ops_and_tid : ops_and_tids) {
195       if (ops_and_tid.first == '[') {
196         mapz[ops_and_tid.second]++;
197       } else {
198         mapz.insert({ops_and_tid.second, 0});
199       }
200     }
201 
202     benchmark::ClobberMemory();
203     state.counters["uniq"] = Counter(static_cast<double>(mapz.size()));
204   }
205   state.counters["rate"] = Counter(static_cast<double>(ops_and_tids.size()),
206                                    Counter::kIsIterationInvariantRate);
207 }
208 
209 template <typename MapType>
BM_HashMap_InsertRandInts(benchmark::State & state)210 void BM_HashMap_InsertRandInts(benchmark::State& state) {
211   std::minstd_rand0 rng(0);
212   std::vector<size_t> keys(static_cast<size_t>(num_samples()));
213   std::shuffle(keys.begin(), keys.end(), rng);
214   for (auto _ : state) {
215     MapType mapz;
216     for (const auto key : keys)
217       mapz.insert({key, key});
218     benchmark::DoNotOptimize(mapz);
219     benchmark::ClobberMemory();
220   }
221   state.counters["insertions"] = Counter(static_cast<double>(keys.size()),
222                                          Counter::kIsIterationInvariantRate);
223 }
224 
225 // This test is performs insertions on integers that are designed to create
226 // lot of clustering on the same small set of buckets.
227 // This covers the unlucky case of using a map with a poor hashing function.
228 template <typename MapType>
BM_HashMap_InsertCollidingInts(benchmark::State & state)229 void BM_HashMap_InsertCollidingInts(benchmark::State& state) {
230   std::vector<size_t> keys;
231   const size_t kNumSamples = num_samples();
232 
233   // Generates numbers that are all distinct from each other, but that are
234   // designed to collide on the same buckets.
235   constexpr size_t kShift = 8;  // Collide on the same 2^8 = 256 buckets.
236   for (size_t i = 0; i < kNumSamples; i++) {
237     size_t bucket = i & ((1 << kShift) - 1);  // [0, 256].
238     size_t multiplier = i >> kShift;          // 0,0,0... 1,1,1..., 2,2,2...
239     size_t key = 8192 * multiplier + bucket;
240     keys.push_back(key);
241   }
242   for (auto _ : state) {
243     MapType mapz;
244     for (const size_t key : keys)
245       mapz.insert({key, key});
246     benchmark::DoNotOptimize(mapz);
247     benchmark::ClobberMemory();
248   }
249   state.counters["insertions"] = Counter(static_cast<double>(keys.size()),
250                                          Counter::kIsIterationInvariantRate);
251 }
252 
253 // Unlike the previous benchmark, here integers don't just collide on the same
254 // buckets, they have a large number of duplicates with the same values.
255 // Most of those insertions are no-ops. This tests the ability of the hashmap
256 // to deal with cases where the hash function is good but the insertions contain
257 // lot of dupes (e.g. dealing with pids).
258 template <typename MapType>
BM_HashMap_InsertDupeInts(benchmark::State & state)259 void BM_HashMap_InsertDupeInts(benchmark::State& state) {
260   std::vector<size_t> keys;
261   const size_t kNumSamples = num_samples();
262 
263   for (size_t i = 0; i < kNumSamples; i++)
264     keys.push_back(i % 16384);
265 
266   for (auto _ : state) {
267     MapType mapz;
268     for (const size_t key : keys)
269       mapz.insert({key, key});
270     benchmark::DoNotOptimize(mapz);
271     benchmark::ClobberMemory();
272   }
273   state.counters["insertions"] = Counter(static_cast<double>(keys.size()),
274                                          Counter::kIsIterationInvariantRate);
275 }
276 
277 template <typename MapType>
BM_HashMap_LookupRandInts(benchmark::State & state)278 void BM_HashMap_LookupRandInts(benchmark::State& state) {
279   std::minstd_rand0 rng(0);
280   std::vector<size_t> keys(static_cast<size_t>(num_samples()));
281   std::shuffle(keys.begin(), keys.end(), rng);
282 
283   MapType mapz;
284   for (const size_t key : keys)
285     mapz.insert({key, key});
286 
287   for (auto _ : state) {
288     int64_t total = 0;
289     for (const size_t key : keys) {
290       auto it = mapz.find(static_cast<uint64_t>(key));
291       PERFETTO_CHECK(it != mapz.end());
292       total += it->second;
293     }
294     benchmark::DoNotOptimize(total);
295     benchmark::ClobberMemory();
296     state.counters["sum"] = Counter(static_cast<double>(total));
297   }
298   state.counters["lookups"] = Counter(static_cast<double>(keys.size()),
299                                       Counter::kIsIterationInvariantRate);
300 }
301 
302 }  // namespace
303 
304 using Ours_LinearProbing =
305     Ours<uint64_t, uint64_t, AlreadyHashed<uint64_t>, LinearProbe>;
306 using Ours_QuadProbing =
307     Ours<uint64_t, uint64_t, AlreadyHashed<uint64_t>, QuadraticProbe>;
308 using Ours_QuadCompProbing =
309     Ours<uint64_t, uint64_t, AlreadyHashed<uint64_t>, QuadraticHalfProbe>;
310 using StdUnorderedMap =
311     std::unordered_map<uint64_t, uint64_t, AlreadyHashed<uint64_t>>;
312 
313 #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
314 using RobinMap = tsl::robin_map<uint64_t, uint64_t, AlreadyHashed<uint64_t>>;
315 using AbslFlatHashMap =
316     absl::flat_hash_map<uint64_t, uint64_t, AlreadyHashed<uint64_t>>;
317 using FollyF14FastMap =
318     folly::F14FastMap<uint64_t, uint64_t, AlreadyHashed<uint64_t>>;
319 #endif
320 
321 BENCHMARK(BM_HashMap_InsertTraceStrings_AppendOnly);
322 BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, Ours_LinearProbing);
323 BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, Ours_QuadProbing);
324 BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, StdUnorderedMap);
325 #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
326 BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, RobinMap);
327 BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, AbslFlatHashMap);
328 BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, FollyF14FastMap);
329 #endif
330 
331 #define TID_ARGS int, uint64_t, std::hash<int>
332 BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, Ours<TID_ARGS, LinearProbe>);
333 BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, Ours<TID_ARGS, QuadraticProbe>);
334 BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, Ours<TID_ARGS, QuadraticHalfProbe>);
335 BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, std::unordered_map<TID_ARGS>);
336 #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
337 BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, tsl::robin_map<TID_ARGS>);
338 BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, absl::flat_hash_map<TID_ARGS>);
339 BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, folly::F14FastMap<TID_ARGS>);
340 #endif
341 
342 BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, Ours_LinearProbing);
343 BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, Ours_QuadProbing);
344 BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, StdUnorderedMap);
345 #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
346 BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, RobinMap);
347 BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, AbslFlatHashMap);
348 BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, FollyF14FastMap);
349 #endif
350 
351 BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, Ours_LinearProbing);
352 BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, Ours_QuadProbing);
353 BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, Ours_QuadCompProbing);
354 BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, StdUnorderedMap);
355 #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
356 BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, RobinMap);
357 BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, AbslFlatHashMap);
358 BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, FollyF14FastMap);
359 #endif
360 
361 BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, Ours_LinearProbing);
362 BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, Ours_QuadProbing);
363 BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, Ours_QuadCompProbing);
364 BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, StdUnorderedMap);
365 #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
366 BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, RobinMap);
367 BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, AbslFlatHashMap);
368 BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, FollyF14FastMap);
369 #endif
370 
371 BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, Ours_LinearProbing);
372 BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, Ours_QuadProbing);
373 BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, StdUnorderedMap);
374 #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
375 BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, RobinMap);
376 BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, AbslFlatHashMap);
377 BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, FollyF14FastMap);
378 #endif
379