1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <functional>
16 #include <random>
17 #include <unordered_map>
18
19 #include <benchmark/benchmark.h>
20 #include <stdio.h>
21 #include <sys/mman.h>
22 #include <unistd.h>
23
24 #include "perfetto/base/logging.h"
25 #include "perfetto/ext/base/file_utils.h"
26 #include "perfetto/ext/base/flat_hash_map.h"
27 #include "perfetto/ext/base/hash.h"
28 #include "perfetto/ext/base/scoped_file.h"
29 #include "perfetto/ext/base/string_view.h"
30
31 // This benchmark allows to compare our FlatHashMap implementation against
32 // reference implementations from Absl (Google), Folly F14 (FB), and Tssil's
33 // reference RobinHood hashmap.
34 // Those libraries are not checked in into the repo. If you want to reproduce
35 // the benchmark you need to:
36 // - Manually install the three libraries using following the instructions in
37 // their readme (they all use cmake).
38 // - When running cmake, remember to pass
39 // -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS='-DNDEBUG -O3 -msse4.2 -mavx'.
40 // That sets cflags for a more fair comparison.
41 // - Set is_debug=false in the GN args.
42 // - Set the GN var perfetto_benchmark_3p_libs_prefix="/usr/local" (or whatever
43 // other directory you set as DESTDIR when running make install).
44 // The presence of the perfetto_benchmark_3p_libs_prefix GN variable will
45 // automatically define PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS.
46
47 #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
48
49 // Last tested: https://github.com/abseil/abseil-cpp @ f2dbd918d.
50 #include <absl/container/flat_hash_map.h>
51
52 // Last tested: https://github.com/facebook/folly @ 028a9abae3.
53 #include <folly/container/F14Map.h>
54
55 // Last tested: https://github.com/Tessil/robin-map @ a603419b9.
56 #include <tsl/robin_map.h>
57 #endif
58
59 namespace {
60
61 using namespace perfetto;
62 using benchmark::Counter;
63 using perfetto::base::AlreadyHashed;
64 using perfetto::base::LinearProbe;
65 using perfetto::base::QuadraticHalfProbe;
66 using perfetto::base::QuadraticProbe;
67
68 // Our FlatHashMap doesn't have a STL-like interface, mainly because we use
69 // columnar-oriented storage, not array-of-tuples, so we can't easily map into
70 // that interface. This wrapper makes our FlatHashMap compatible with STL (just
71 // for what it takes to build this translation unit), at the cost of some small
72 // performance penalty (around 1-2%).
73 template <typename Key, typename Value, typename Hasher, typename Probe>
74 class Ours : public base::FlatHashMap<Key, Value, Hasher, Probe> {
75 public:
76 struct Iterator {
77 using value_type = std::pair<const Key&, Value&>;
Iterator__anon4292ef250111::Ours::Iterator78 Iterator(const Key& k, Value& v) : pair_{k, v} {}
operator ->__anon4292ef250111::Ours::Iterator79 value_type* operator->() { return &pair_; }
operator *__anon4292ef250111::Ours::Iterator80 value_type& operator*() { return pair_; }
operator ==__anon4292ef250111::Ours::Iterator81 bool operator==(const Iterator& other) const {
82 return &pair_.first == &other.pair_.first;
83 }
operator !=__anon4292ef250111::Ours::Iterator84 bool operator!=(const Iterator& other) const { return !operator==(other); }
85 value_type pair_;
86 };
87
insert(std::pair<Key,Value> && pair)88 void insert(std::pair<Key, Value>&& pair) {
89 this->Insert(std::move(pair.first), std::move(pair.second));
90 }
91
find(const Key & key)92 Iterator find(const Key& key) {
93 const size_t idx = this->FindInternal(key);
94 return Iterator(this->keys_[idx], this->values_[idx]);
95 }
96
end()97 Iterator end() {
98 return Iterator(this->keys_[this->kNotFound],
99 this->values_[this->kNotFound]);
100 }
101 };
102
LoadTraceStrings(benchmark::State & state)103 std::vector<uint64_t> LoadTraceStrings(benchmark::State& state) {
104 std::vector<uint64_t> str_hashes;
105 // This requires that the user has downloaded the file
106 // go/perfetto-benchmark-trace-strings into /tmp/trace_strings. The file is
107 // too big (2.3 GB after uncompression) and it's not worth adding it to the
108 // //test/data. Also it contains data from a team member's phone and cannot
109 // be public.
110 base::ScopedFstream f(fopen("/tmp/trace_strings", "re"));
111 if (!f) {
112 state.SkipWithError(
113 "Test strings missing. Googlers: download "
114 "go/perfetto-benchmark-trace-strings and save into /tmp/trace_strings");
115 return str_hashes;
116 }
117 char line[4096];
118 while (fgets(line, sizeof(line), *f)) {
119 base::Hasher hasher;
120 hasher.Update(line, strlen(line));
121 str_hashes.emplace_back(hasher.digest());
122 }
123 return str_hashes;
124 }
125
IsBenchmarkFunctionalOnly()126 bool IsBenchmarkFunctionalOnly() {
127 return getenv("BENCHMARK_FUNCTIONAL_TEST_ONLY") != nullptr;
128 }
129
num_samples()130 size_t num_samples() {
131 return IsBenchmarkFunctionalOnly() ? size_t(100) : size_t(10 * 1000 * 1000);
132 }
133
134 // Uses directly the base::FlatHashMap with no STL wrapper. Configures the map
135 // in append-only mode.
BM_HashMap_InsertTraceStrings_AppendOnly(benchmark::State & state)136 void BM_HashMap_InsertTraceStrings_AppendOnly(benchmark::State& state) {
137 std::vector<uint64_t> hashes = LoadTraceStrings(state);
138 for (auto _ : state) {
139 base::FlatHashMap<uint64_t, uint64_t, AlreadyHashed<uint64_t>, LinearProbe,
140 /*AppendOnly=*/true>
141 mapz;
142 for (uint64_t hash : hashes)
143 mapz.Insert(hash, 42);
144
145 benchmark::ClobberMemory();
146 state.counters["uniq"] = Counter(static_cast<double>(mapz.size()));
147 }
148 state.counters["tot"] = Counter(static_cast<double>(hashes.size()),
149 Counter::kIsIterationInvariant);
150 state.counters["rate"] = Counter(static_cast<double>(hashes.size()),
151 Counter::kIsIterationInvariantRate);
152 }
153
154 template <typename MapType>
BM_HashMap_InsertTraceStrings(benchmark::State & state)155 void BM_HashMap_InsertTraceStrings(benchmark::State& state) {
156 std::vector<uint64_t> hashes = LoadTraceStrings(state);
157 for (auto _ : state) {
158 MapType mapz;
159 for (uint64_t hash : hashes)
160 mapz.insert({hash, 42});
161
162 benchmark::ClobberMemory();
163 state.counters["uniq"] = Counter(static_cast<double>(mapz.size()));
164 }
165 state.counters["tot"] = Counter(static_cast<double>(hashes.size()),
166 Counter::kIsIterationInvariant);
167 state.counters["rate"] = Counter(static_cast<double>(hashes.size()),
168 Counter::kIsIterationInvariantRate);
169 }
170
171 template <typename MapType>
BM_HashMap_TraceTids(benchmark::State & state)172 void BM_HashMap_TraceTids(benchmark::State& state) {
173 std::vector<std::pair<char, int>> ops_and_tids;
174 {
175 base::ScopedFstream f(fopen("/tmp/tids", "re"));
176 if (!f) {
177 // This test requires a large (800MB) test file. It's not checked into the
178 // repository's //test/data because it would slow down all developers for
179 // a marginal benefit.
180 state.SkipWithError(
181 "Please run `curl -Lo /tmp/tids "
182 "https://storage.googleapis.com/perfetto/test_datalong_trace_tids.txt"
183 "` and try again.");
184 return;
185 }
186 char op;
187 int tid;
188 while (fscanf(*f, "%c %d\n", &op, &tid) == 2)
189 ops_and_tids.emplace_back(op, tid);
190 }
191
192 for (auto _ : state) {
193 MapType mapz;
194 for (const auto& ops_and_tid : ops_and_tids) {
195 if (ops_and_tid.first == '[') {
196 mapz[ops_and_tid.second]++;
197 } else {
198 mapz.insert({ops_and_tid.second, 0});
199 }
200 }
201
202 benchmark::ClobberMemory();
203 state.counters["uniq"] = Counter(static_cast<double>(mapz.size()));
204 }
205 state.counters["rate"] = Counter(static_cast<double>(ops_and_tids.size()),
206 Counter::kIsIterationInvariantRate);
207 }
208
209 template <typename MapType>
BM_HashMap_InsertRandInts(benchmark::State & state)210 void BM_HashMap_InsertRandInts(benchmark::State& state) {
211 std::minstd_rand0 rng(0);
212 std::vector<size_t> keys(static_cast<size_t>(num_samples()));
213 std::shuffle(keys.begin(), keys.end(), rng);
214 for (auto _ : state) {
215 MapType mapz;
216 for (const auto key : keys)
217 mapz.insert({key, key});
218 benchmark::DoNotOptimize(mapz);
219 benchmark::ClobberMemory();
220 }
221 state.counters["insertions"] = Counter(static_cast<double>(keys.size()),
222 Counter::kIsIterationInvariantRate);
223 }
224
225 // This test is performs insertions on integers that are designed to create
226 // lot of clustering on the same small set of buckets.
227 // This covers the unlucky case of using a map with a poor hashing function.
228 template <typename MapType>
BM_HashMap_InsertCollidingInts(benchmark::State & state)229 void BM_HashMap_InsertCollidingInts(benchmark::State& state) {
230 std::vector<size_t> keys;
231 const size_t kNumSamples = num_samples();
232
233 // Generates numbers that are all distinct from each other, but that are
234 // designed to collide on the same buckets.
235 constexpr size_t kShift = 8; // Collide on the same 2^8 = 256 buckets.
236 for (size_t i = 0; i < kNumSamples; i++) {
237 size_t bucket = i & ((1 << kShift) - 1); // [0, 256].
238 size_t multiplier = i >> kShift; // 0,0,0... 1,1,1..., 2,2,2...
239 size_t key = 8192 * multiplier + bucket;
240 keys.push_back(key);
241 }
242 for (auto _ : state) {
243 MapType mapz;
244 for (const size_t key : keys)
245 mapz.insert({key, key});
246 benchmark::DoNotOptimize(mapz);
247 benchmark::ClobberMemory();
248 }
249 state.counters["insertions"] = Counter(static_cast<double>(keys.size()),
250 Counter::kIsIterationInvariantRate);
251 }
252
253 // Unlike the previous benchmark, here integers don't just collide on the same
254 // buckets, they have a large number of duplicates with the same values.
255 // Most of those insertions are no-ops. This tests the ability of the hashmap
256 // to deal with cases where the hash function is good but the insertions contain
257 // lot of dupes (e.g. dealing with pids).
258 template <typename MapType>
BM_HashMap_InsertDupeInts(benchmark::State & state)259 void BM_HashMap_InsertDupeInts(benchmark::State& state) {
260 std::vector<size_t> keys;
261 const size_t kNumSamples = num_samples();
262
263 for (size_t i = 0; i < kNumSamples; i++)
264 keys.push_back(i % 16384);
265
266 for (auto _ : state) {
267 MapType mapz;
268 for (const size_t key : keys)
269 mapz.insert({key, key});
270 benchmark::DoNotOptimize(mapz);
271 benchmark::ClobberMemory();
272 }
273 state.counters["insertions"] = Counter(static_cast<double>(keys.size()),
274 Counter::kIsIterationInvariantRate);
275 }
276
277 template <typename MapType>
BM_HashMap_LookupRandInts(benchmark::State & state)278 void BM_HashMap_LookupRandInts(benchmark::State& state) {
279 std::minstd_rand0 rng(0);
280 std::vector<size_t> keys(static_cast<size_t>(num_samples()));
281 std::shuffle(keys.begin(), keys.end(), rng);
282
283 MapType mapz;
284 for (const size_t key : keys)
285 mapz.insert({key, key});
286
287 for (auto _ : state) {
288 int64_t total = 0;
289 for (const size_t key : keys) {
290 auto it = mapz.find(static_cast<uint64_t>(key));
291 PERFETTO_CHECK(it != mapz.end());
292 total += it->second;
293 }
294 benchmark::DoNotOptimize(total);
295 benchmark::ClobberMemory();
296 state.counters["sum"] = Counter(static_cast<double>(total));
297 }
298 state.counters["lookups"] = Counter(static_cast<double>(keys.size()),
299 Counter::kIsIterationInvariantRate);
300 }
301
302 } // namespace
303
304 using Ours_LinearProbing =
305 Ours<uint64_t, uint64_t, AlreadyHashed<uint64_t>, LinearProbe>;
306 using Ours_QuadProbing =
307 Ours<uint64_t, uint64_t, AlreadyHashed<uint64_t>, QuadraticProbe>;
308 using Ours_QuadCompProbing =
309 Ours<uint64_t, uint64_t, AlreadyHashed<uint64_t>, QuadraticHalfProbe>;
310 using StdUnorderedMap =
311 std::unordered_map<uint64_t, uint64_t, AlreadyHashed<uint64_t>>;
312
313 #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
314 using RobinMap = tsl::robin_map<uint64_t, uint64_t, AlreadyHashed<uint64_t>>;
315 using AbslFlatHashMap =
316 absl::flat_hash_map<uint64_t, uint64_t, AlreadyHashed<uint64_t>>;
317 using FollyF14FastMap =
318 folly::F14FastMap<uint64_t, uint64_t, AlreadyHashed<uint64_t>>;
319 #endif
320
321 BENCHMARK(BM_HashMap_InsertTraceStrings_AppendOnly);
322 BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, Ours_LinearProbing);
323 BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, Ours_QuadProbing);
324 BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, StdUnorderedMap);
325 #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
326 BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, RobinMap);
327 BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, AbslFlatHashMap);
328 BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, FollyF14FastMap);
329 #endif
330
331 #define TID_ARGS int, uint64_t, std::hash<int>
332 BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, Ours<TID_ARGS, LinearProbe>);
333 BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, Ours<TID_ARGS, QuadraticProbe>);
334 BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, Ours<TID_ARGS, QuadraticHalfProbe>);
335 BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, std::unordered_map<TID_ARGS>);
336 #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
337 BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, tsl::robin_map<TID_ARGS>);
338 BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, absl::flat_hash_map<TID_ARGS>);
339 BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, folly::F14FastMap<TID_ARGS>);
340 #endif
341
342 BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, Ours_LinearProbing);
343 BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, Ours_QuadProbing);
344 BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, StdUnorderedMap);
345 #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
346 BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, RobinMap);
347 BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, AbslFlatHashMap);
348 BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, FollyF14FastMap);
349 #endif
350
351 BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, Ours_LinearProbing);
352 BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, Ours_QuadProbing);
353 BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, Ours_QuadCompProbing);
354 BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, StdUnorderedMap);
355 #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
356 BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, RobinMap);
357 BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, AbslFlatHashMap);
358 BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, FollyF14FastMap);
359 #endif
360
361 BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, Ours_LinearProbing);
362 BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, Ours_QuadProbing);
363 BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, Ours_QuadCompProbing);
364 BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, StdUnorderedMap);
365 #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
366 BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, RobinMap);
367 BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, AbslFlatHashMap);
368 BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, FollyF14FastMap);
369 #endif
370
371 BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, Ours_LinearProbing);
372 BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, Ours_QuadProbing);
373 BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, StdUnorderedMap);
374 #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
375 BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, RobinMap);
376 BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, AbslFlatHashMap);
377 BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, FollyF14FastMap);
378 #endif
379