1 // Copyright 2019 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <algorithm>
6 #include <atomic>
7 #include <limits>
8 #include <memory>
9 #include <vector>
10
11 #include "base/debug/debugging_buildflags.h"
12 #include "base/timer/lap_timer.h"
13 #include "build/build_config.h"
14 #include "partition_alloc/extended_api.h"
15 #include "partition_alloc/partition_alloc.h"
16 #include "partition_alloc/partition_alloc_base/logging.h"
17 #include "partition_alloc/partition_alloc_base/strings/stringprintf.h"
18 #include "partition_alloc/partition_alloc_base/threading/platform_thread_for_testing.h"
19 #include "partition_alloc/partition_alloc_base/time/time.h"
20 #include "partition_alloc/partition_alloc_check.h"
21 #include "partition_alloc/partition_alloc_constants.h"
22 #include "partition_alloc/partition_alloc_for_testing.h"
23 #include "partition_alloc/partition_root.h"
24 #include "partition_alloc/thread_cache.h"
25 #include "testing/gtest/include/gtest/gtest.h"
26 #include "testing/perf/perf_result_reporter.h"
27
28 #if BUILDFLAG(IS_ANDROID) || defined(ARCH_CPU_32_BITS) || BUILDFLAG(IS_FUCHSIA)
29 // Some tests allocate many GB of memory, which can cause issues on Android and
30 // address-space exhaustion for any 32-bit process.
31 #define MEMORY_CONSTRAINED
32 #endif
33
34 #if BUILDFLAG(ENABLE_ALLOCATION_STACK_TRACE_RECORDER)
35 #include "base/allocator/dispatcher/dispatcher.h"
36 #include "base/debug/allocation_trace.h"
37 #endif
38
39 namespace partition_alloc::internal {
40
41 namespace {
42
43 // Change kTimeLimit to something higher if you need more time to capture a
44 // trace.
45 constexpr ::base::TimeDelta kTimeLimit = ::base::Seconds(2);
46 constexpr int kWarmupRuns = 10000;
47 constexpr int kTimeCheckInterval = 100000;
48 constexpr size_t kAllocSize = 40;
49
50 // Size constants are mostly arbitrary, but try to simulate something like CSS
51 // parsing which consists of lots of relatively small objects.
52 constexpr int kMultiBucketMinimumSize = 24;
53 constexpr int kMultiBucketIncrement = 13;
54 // Final size is 24 + (13 * 22) = 310 bytes.
55 constexpr int kMultiBucketRounds = 22;
56
57 constexpr char kMetricPrefixMemoryAllocation[] = "MemoryAllocation.";
58 constexpr char kMetricThroughput[] = "throughput";
59 constexpr char kMetricTimePerAllocation[] = "time_per_allocation";
60
SetUpReporter(const std::string & story_name)61 perf_test::PerfResultReporter SetUpReporter(const std::string& story_name) {
62 perf_test::PerfResultReporter reporter(kMetricPrefixMemoryAllocation,
63 story_name);
64 reporter.RegisterImportantMetric(kMetricThroughput, "runs/s");
65 reporter.RegisterImportantMetric(kMetricTimePerAllocation, "ns");
66 return reporter;
67 }
68
69 enum class AllocatorType {
70 kSystem,
71 kPartitionAlloc,
72 kPartitionAllocWithThreadCache,
73 #if BUILDFLAG(ENABLE_ALLOCATION_STACK_TRACE_RECORDER)
74 kPartitionAllocWithAllocationStackTraceRecorder,
75 #endif
76 };
77
78 class Allocator {
79 public:
80 Allocator() = default;
81 virtual ~Allocator() = default;
82 virtual void* Alloc(size_t size) = 0;
83 virtual void Free(void* data) = 0;
84 };
85
86 class SystemAllocator : public Allocator {
87 public:
88 SystemAllocator() = default;
89 ~SystemAllocator() override = default;
Alloc(size_t size)90 void* Alloc(size_t size) override { return malloc(size); }
Free(void * data)91 void Free(void* data) override { free(data); }
92 };
93
94 class PartitionAllocator : public Allocator {
95 public:
96 PartitionAllocator() = default;
~PartitionAllocator()97 ~PartitionAllocator() override { alloc_.DestructForTesting(); }
98
Alloc(size_t size)99 void* Alloc(size_t size) override {
100 return alloc_.AllocInline<AllocFlags::kNoHooks>(size);
101 }
Free(void * data)102 void Free(void* data) override {
103 // Even though it's easy to invoke the fast path with
104 // alloc_.Free<kNoHooks>(), we chose to use the slower path, because it's
105 // more common with PA-E.
106 PartitionRoot::FreeInlineInUnknownRoot<
107 partition_alloc::FreeFlags::kNoHooks>(data);
108 }
109
110 private:
111 PartitionRoot alloc_{PartitionOptions{}};
112 };
113
114 class PartitionAllocatorWithThreadCache : public Allocator {
115 public:
PartitionAllocatorWithThreadCache(bool use_alternate_bucket_dist)116 explicit PartitionAllocatorWithThreadCache(bool use_alternate_bucket_dist)
117 : scope_(allocator_.root()) {
118 ThreadCacheRegistry::Instance().PurgeAll();
119 if (!use_alternate_bucket_dist) {
120 allocator_.root()->SwitchToDenserBucketDistribution();
121 } else {
122 allocator_.root()->ResetBucketDistributionForTesting();
123 }
124 }
125 ~PartitionAllocatorWithThreadCache() override = default;
126
Alloc(size_t size)127 void* Alloc(size_t size) override {
128 return allocator_.root()->AllocInline<AllocFlags::kNoHooks>(size);
129 }
Free(void * data)130 void Free(void* data) override {
131 // Even though it's easy to invoke the fast path with
132 // alloc_.Free<kNoHooks>(), we chose to use the slower path, because it's
133 // more common with PA-E.
134 PartitionRoot::FreeInlineInUnknownRoot<
135 partition_alloc::FreeFlags::kNoHooks>(data);
136 }
137
138 private:
__anone46897d20202() 139 static constexpr partition_alloc::PartitionOptions kOpts = []() {
140 partition_alloc::PartitionOptions opts;
141 #if !BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
142 opts.thread_cache = PartitionOptions::kEnabled;
143 #endif
144 return opts;
145 }();
146 PartitionAllocatorForTesting<internal::DisallowLeaks> allocator_{kOpts};
147 internal::ThreadCacheProcessScopeForTesting scope_;
148 };
149
150 #if BUILDFLAG(ENABLE_ALLOCATION_STACK_TRACE_RECORDER)
151 class PartitionAllocatorWithAllocationStackTraceRecorder : public Allocator {
152 public:
PartitionAllocatorWithAllocationStackTraceRecorder(bool register_hooks)153 explicit PartitionAllocatorWithAllocationStackTraceRecorder(
154 bool register_hooks)
155 : register_hooks_(register_hooks) {
156 if (register_hooks_) {
157 dispatcher_.InitializeForTesting(&recorder_);
158 }
159 }
160
~PartitionAllocatorWithAllocationStackTraceRecorder()161 ~PartitionAllocatorWithAllocationStackTraceRecorder() override {
162 if (register_hooks_) {
163 dispatcher_.ResetForTesting();
164 }
165 }
166
Alloc(size_t size)167 void* Alloc(size_t size) override { return alloc_.AllocInline(size); }
168
Free(void * data)169 void Free(void* data) override {
170 // Even though it's easy to invoke the fast path with
171 // alloc_.Free<kNoHooks>(), we chose to use the slower path, because it's
172 // more common with PA-E.
173 PartitionRoot::FreeInlineInUnknownRoot<
174 partition_alloc::FreeFlags::kNoHooks>(data);
175 }
176
177 private:
178 bool const register_hooks_;
179 PartitionRoot alloc_{PartitionOptions{}};
180 ::base::allocator::dispatcher::Dispatcher& dispatcher_ =
181 ::base::allocator::dispatcher::Dispatcher::GetInstance();
182 ::base::debug::tracer::AllocationTraceRecorder recorder_;
183 };
184 #endif // BUILDFLAG(ENABLE_ALLOCATION_STACK_TRACE_RECORDER)
185
186 class TestLoopThread : public base::PlatformThreadForTesting::Delegate {
187 public:
TestLoopThread(float (* test_fn)(Allocator *),Allocator * allocator)188 TestLoopThread(float (*test_fn)(Allocator*), Allocator* allocator)
189 : test_fn_(test_fn), allocator_(allocator) {
190 PA_CHECK(base::PlatformThreadForTesting::Create(0, this, &thread_handle_));
191 }
192
Run()193 float Run() {
194 base::PlatformThreadForTesting::Join(thread_handle_);
195 return laps_per_second_;
196 }
197
ThreadMain()198 void ThreadMain() override { laps_per_second_ = test_fn_(allocator_); }
199
200 float (*test_fn_)(Allocator*) = nullptr;
201 Allocator* allocator_ = nullptr;
202 base::PlatformThreadHandle thread_handle_;
203 std::atomic<float> laps_per_second_;
204 };
205
DisplayResults(const std::string & story_name,float iterations_per_second)206 void DisplayResults(const std::string& story_name,
207 float iterations_per_second) {
208 auto reporter = SetUpReporter(story_name);
209 reporter.AddResult(kMetricThroughput, iterations_per_second);
210 reporter.AddResult(kMetricTimePerAllocation,
211 static_cast<size_t>(1e9 / iterations_per_second));
212 }
213
214 class MemoryAllocationPerfNode {
215 public:
GetNext() const216 MemoryAllocationPerfNode* GetNext() const { return next_; }
SetNext(MemoryAllocationPerfNode * p)217 void SetNext(MemoryAllocationPerfNode* p) { next_ = p; }
FreeAll(MemoryAllocationPerfNode * first,Allocator * alloc)218 static void FreeAll(MemoryAllocationPerfNode* first, Allocator* alloc) {
219 MemoryAllocationPerfNode* cur = first;
220 while (cur != nullptr) {
221 MemoryAllocationPerfNode* next = cur->GetNext();
222 alloc->Free(cur);
223 cur = next;
224 }
225 }
226
227 private:
228 MemoryAllocationPerfNode* next_ = nullptr;
229 };
230
231 #if !defined(MEMORY_CONSTRAINED)
SingleBucket(Allocator * allocator)232 float SingleBucket(Allocator* allocator) {
233 auto* first =
234 reinterpret_cast<MemoryAllocationPerfNode*>(allocator->Alloc(kAllocSize));
235 size_t allocated_memory = kAllocSize;
236
237 ::base::LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
238 MemoryAllocationPerfNode* cur = first;
239 do {
240 auto* next = reinterpret_cast<MemoryAllocationPerfNode*>(
241 allocator->Alloc(kAllocSize));
242 PA_CHECK(next != nullptr);
243 cur->SetNext(next);
244 cur = next;
245 timer.NextLap();
246 allocated_memory += kAllocSize;
247 // With multiple threads, can get OOM otherwise.
248 if (allocated_memory > 200e6) {
249 cur->SetNext(nullptr);
250 MemoryAllocationPerfNode::FreeAll(first->GetNext(), allocator);
251 cur = first;
252 allocated_memory = kAllocSize;
253 }
254 } while (!timer.HasTimeLimitExpired());
255
256 // next_ = nullptr only works if the class constructor is called (it's not
257 // called in this case because then we can allocate arbitrary-length
258 // payloads.)
259 cur->SetNext(nullptr);
260 MemoryAllocationPerfNode::FreeAll(first, allocator);
261
262 return timer.LapsPerSecond();
263 }
264 #endif // defined(MEMORY_CONSTRAINED)
265
SingleBucketWithFree(Allocator * allocator)266 float SingleBucketWithFree(Allocator* allocator) {
267 // Allocate an initial element to make sure the bucket stays set up.
268 void* elem = allocator->Alloc(kAllocSize);
269
270 ::base::LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
271 do {
272 void* cur = allocator->Alloc(kAllocSize);
273 PA_CHECK(cur != nullptr);
274 allocator->Free(cur);
275 timer.NextLap();
276 } while (!timer.HasTimeLimitExpired());
277
278 allocator->Free(elem);
279 return timer.LapsPerSecond();
280 }
281
282 #if !defined(MEMORY_CONSTRAINED)
MultiBucket(Allocator * allocator)283 float MultiBucket(Allocator* allocator) {
284 auto* first =
285 reinterpret_cast<MemoryAllocationPerfNode*>(allocator->Alloc(kAllocSize));
286 MemoryAllocationPerfNode* cur = first;
287 size_t allocated_memory = kAllocSize;
288
289 ::base::LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
290 do {
291 for (int i = 0; i < kMultiBucketRounds; i++) {
292 size_t size = kMultiBucketMinimumSize + (i * kMultiBucketIncrement);
293 auto* next =
294 reinterpret_cast<MemoryAllocationPerfNode*>(allocator->Alloc(size));
295 PA_CHECK(next != nullptr);
296 cur->SetNext(next);
297 cur = next;
298 allocated_memory += size;
299 }
300
301 // Can OOM with multiple threads.
302 if (allocated_memory > 100e6) {
303 cur->SetNext(nullptr);
304 MemoryAllocationPerfNode::FreeAll(first->GetNext(), allocator);
305 cur = first;
306 allocated_memory = kAllocSize;
307 }
308
309 timer.NextLap();
310 } while (!timer.HasTimeLimitExpired());
311
312 cur->SetNext(nullptr);
313 MemoryAllocationPerfNode::FreeAll(first, allocator);
314
315 return timer.LapsPerSecond() * kMultiBucketRounds;
316 }
317 #endif // defined(MEMORY_CONSTRAINED)
318
MultiBucketWithFree(Allocator * allocator)319 float MultiBucketWithFree(Allocator* allocator) {
320 std::vector<void*> elems;
321 elems.reserve(kMultiBucketRounds);
322 // Do an initial round of allocation to make sure that the buckets stay in
323 // use (and aren't accidentally released back to the OS).
324 for (int i = 0; i < kMultiBucketRounds; i++) {
325 void* cur =
326 allocator->Alloc(kMultiBucketMinimumSize + (i * kMultiBucketIncrement));
327 PA_CHECK(cur != nullptr);
328 elems.push_back(cur);
329 }
330
331 ::base::LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
332 do {
333 for (int i = 0; i < kMultiBucketRounds; i++) {
334 void* cur = allocator->Alloc(kMultiBucketMinimumSize +
335 (i * kMultiBucketIncrement));
336 PA_CHECK(cur != nullptr);
337 allocator->Free(cur);
338 }
339 timer.NextLap();
340 } while (!timer.HasTimeLimitExpired());
341
342 for (void* ptr : elems) {
343 allocator->Free(ptr);
344 }
345
346 return timer.LapsPerSecond() * kMultiBucketRounds;
347 }
348
DirectMapped(Allocator * allocator)349 float DirectMapped(Allocator* allocator) {
350 constexpr size_t kSize = 2 * 1000 * 1000;
351
352 ::base::LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
353 do {
354 void* cur = allocator->Alloc(kSize);
355 PA_CHECK(cur != nullptr);
356 allocator->Free(cur);
357 timer.NextLap();
358 } while (!timer.HasTimeLimitExpired());
359
360 return timer.LapsPerSecond();
361 }
362
CreateAllocator(AllocatorType type,bool use_alternate_bucket_dist)363 std::unique_ptr<Allocator> CreateAllocator(AllocatorType type,
364 bool use_alternate_bucket_dist) {
365 switch (type) {
366 case AllocatorType::kSystem:
367 return std::make_unique<SystemAllocator>();
368 case AllocatorType::kPartitionAlloc:
369 return std::make_unique<PartitionAllocator>();
370 case AllocatorType::kPartitionAllocWithThreadCache:
371 return std::make_unique<PartitionAllocatorWithThreadCache>(
372 use_alternate_bucket_dist);
373 #if BUILDFLAG(ENABLE_ALLOCATION_STACK_TRACE_RECORDER)
374 case AllocatorType::kPartitionAllocWithAllocationStackTraceRecorder:
375 return std::make_unique<
376 PartitionAllocatorWithAllocationStackTraceRecorder>(true);
377 #endif
378 }
379 }
380
LogResults(int thread_count,AllocatorType alloc_type,uint64_t total_laps_per_second,uint64_t min_laps_per_second)381 void LogResults(int thread_count,
382 AllocatorType alloc_type,
383 uint64_t total_laps_per_second,
384 uint64_t min_laps_per_second) {
385 PA_LOG(INFO) << "RESULTSCSV: " << thread_count << ","
386 << static_cast<int>(alloc_type) << "," << total_laps_per_second
387 << "," << min_laps_per_second;
388 }
389
RunTest(int thread_count,bool use_alternate_bucket_dist,AllocatorType alloc_type,float (* test_fn)(Allocator *),float (* noisy_neighbor_fn)(Allocator *),const char * story_base_name)390 void RunTest(int thread_count,
391 bool use_alternate_bucket_dist,
392 AllocatorType alloc_type,
393 float (*test_fn)(Allocator*),
394 float (*noisy_neighbor_fn)(Allocator*),
395 const char* story_base_name) {
396 auto alloc = CreateAllocator(alloc_type, use_alternate_bucket_dist);
397
398 std::unique_ptr<TestLoopThread> noisy_neighbor_thread = nullptr;
399 if (noisy_neighbor_fn) {
400 noisy_neighbor_thread =
401 std::make_unique<TestLoopThread>(noisy_neighbor_fn, alloc.get());
402 }
403
404 std::vector<std::unique_ptr<TestLoopThread>> threads;
405 for (int i = 0; i < thread_count; ++i) {
406 threads.push_back(std::make_unique<TestLoopThread>(test_fn, alloc.get()));
407 }
408
409 uint64_t total_laps_per_second = 0;
410 uint64_t min_laps_per_second = std::numeric_limits<uint64_t>::max();
411 for (int i = 0; i < thread_count; ++i) {
412 uint64_t laps_per_second = threads[i]->Run();
413 min_laps_per_second = std::min(laps_per_second, min_laps_per_second);
414 total_laps_per_second += laps_per_second;
415 }
416
417 if (noisy_neighbor_thread) {
418 noisy_neighbor_thread->Run();
419 }
420
421 char const* alloc_type_str;
422 switch (alloc_type) {
423 case AllocatorType::kSystem:
424 alloc_type_str = "System";
425 break;
426 case AllocatorType::kPartitionAlloc:
427 alloc_type_str = "PartitionAlloc";
428 break;
429 case AllocatorType::kPartitionAllocWithThreadCache:
430 alloc_type_str = "PartitionAllocWithThreadCache";
431 break;
432 #if BUILDFLAG(ENABLE_ALLOCATION_STACK_TRACE_RECORDER)
433 case AllocatorType::kPartitionAllocWithAllocationStackTraceRecorder:
434 alloc_type_str = "PartitionAllocWithAllocationStackTraceRecorder";
435 break;
436 #endif
437 }
438
439 std::string name = base::TruncatingStringPrintf(
440 "%s%s_%s_%d", kMetricPrefixMemoryAllocation, story_base_name,
441 alloc_type_str, thread_count);
442
443 DisplayResults(name + "_total", total_laps_per_second);
444 DisplayResults(name + "_worst", min_laps_per_second);
445 LogResults(thread_count, alloc_type, total_laps_per_second,
446 min_laps_per_second);
447 }
448
449 class PartitionAllocMemoryAllocationPerfTest
450 : public testing::TestWithParam<std::tuple<int, bool, AllocatorType>> {};
451
452 // Only one partition with a thread cache: cannot use the thread cache when
453 // PartitionAlloc is malloc().
454 INSTANTIATE_TEST_SUITE_P(
455 ,
456 PartitionAllocMemoryAllocationPerfTest,
457 ::testing::Combine(
458 ::testing::Values(1, 2, 3, 4),
459 ::testing::Values(false, true),
460 ::testing::Values(
461 AllocatorType::kSystem,
462 AllocatorType::kPartitionAlloc,
463 AllocatorType::kPartitionAllocWithThreadCache
464 #if BUILDFLAG(ENABLE_ALLOCATION_STACK_TRACE_RECORDER)
465 ,
466 AllocatorType::kPartitionAllocWithAllocationStackTraceRecorder
467 #endif
468 )));
469
470 // This test (and the other one below) allocates a large amount of memory, which
471 // can cause issues on Android.
472 #if !defined(MEMORY_CONSTRAINED)
TEST_P(PartitionAllocMemoryAllocationPerfTest,SingleBucket)473 TEST_P(PartitionAllocMemoryAllocationPerfTest, SingleBucket) {
474 auto params = GetParam();
475 RunTest(std::get<int>(params), std::get<bool>(params),
476 std::get<AllocatorType>(params), SingleBucket, nullptr,
477 "SingleBucket");
478 }
479 #endif // defined(MEMORY_CONSTRAINED)
480
TEST_P(PartitionAllocMemoryAllocationPerfTest,SingleBucketWithFree)481 TEST_P(PartitionAllocMemoryAllocationPerfTest, SingleBucketWithFree) {
482 auto params = GetParam();
483 RunTest(std::get<int>(params), std::get<bool>(params),
484 std::get<AllocatorType>(params), SingleBucketWithFree, nullptr,
485 "SingleBucketWithFree");
486 }
487
488 #if !defined(MEMORY_CONSTRAINED)
TEST_P(PartitionAllocMemoryAllocationPerfTest,MultiBucket)489 TEST_P(PartitionAllocMemoryAllocationPerfTest, MultiBucket) {
490 auto params = GetParam();
491 RunTest(std::get<int>(params), std::get<bool>(params),
492 std::get<AllocatorType>(params), MultiBucket, nullptr, "MultiBucket");
493 }
494 #endif // defined(MEMORY_CONSTRAINED)
495
TEST_P(PartitionAllocMemoryAllocationPerfTest,MultiBucketWithFree)496 TEST_P(PartitionAllocMemoryAllocationPerfTest, MultiBucketWithFree) {
497 auto params = GetParam();
498 RunTest(std::get<int>(params), std::get<bool>(params),
499 std::get<AllocatorType>(params), MultiBucketWithFree, nullptr,
500 "MultiBucketWithFree");
501 }
502
TEST_P(PartitionAllocMemoryAllocationPerfTest,DirectMapped)503 TEST_P(PartitionAllocMemoryAllocationPerfTest, DirectMapped) {
504 auto params = GetParam();
505 RunTest(std::get<int>(params), std::get<bool>(params),
506 std::get<AllocatorType>(params), DirectMapped, nullptr,
507 "DirectMapped");
508 }
509
510 #if !defined(MEMORY_CONSTRAINED)
TEST_P(PartitionAllocMemoryAllocationPerfTest,DISABLED_MultiBucketWithNoisyNeighbor)511 TEST_P(PartitionAllocMemoryAllocationPerfTest,
512 DISABLED_MultiBucketWithNoisyNeighbor) {
513 auto params = GetParam();
514 RunTest(std::get<int>(params), std::get<bool>(params),
515 std::get<AllocatorType>(params), MultiBucket, DirectMapped,
516 "MultiBucketWithNoisyNeighbor");
517 }
518 #endif // !defined(MEMORY_CONSTRAINED)
519
520 } // namespace
521
522 } // namespace partition_alloc::internal
523