1 // Copyright 2019 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <algorithm>
6 #include <atomic>
7 #include <limits>
8 #include <memory>
9 #include <vector>
10 
11 #include "base/debug/debugging_buildflags.h"
12 #include "base/timer/lap_timer.h"
13 #include "build/build_config.h"
14 #include "partition_alloc/extended_api.h"
15 #include "partition_alloc/partition_alloc.h"
16 #include "partition_alloc/partition_alloc_base/logging.h"
17 #include "partition_alloc/partition_alloc_base/strings/stringprintf.h"
18 #include "partition_alloc/partition_alloc_base/threading/platform_thread_for_testing.h"
19 #include "partition_alloc/partition_alloc_base/time/time.h"
20 #include "partition_alloc/partition_alloc_check.h"
21 #include "partition_alloc/partition_alloc_constants.h"
22 #include "partition_alloc/partition_alloc_for_testing.h"
23 #include "partition_alloc/partition_root.h"
24 #include "partition_alloc/thread_cache.h"
25 #include "testing/gtest/include/gtest/gtest.h"
26 #include "testing/perf/perf_result_reporter.h"
27 
28 #if BUILDFLAG(IS_ANDROID) || defined(ARCH_CPU_32_BITS) || BUILDFLAG(IS_FUCHSIA)
29 // Some tests allocate many GB of memory, which can cause issues on Android and
30 // address-space exhaustion for any 32-bit process.
31 #define MEMORY_CONSTRAINED
32 #endif
33 
34 #if BUILDFLAG(ENABLE_ALLOCATION_STACK_TRACE_RECORDER)
35 #include "base/allocator/dispatcher/dispatcher.h"
36 #include "base/debug/allocation_trace.h"
37 #endif
38 
39 namespace partition_alloc::internal {
40 
41 namespace {
42 
43 // Change kTimeLimit to something higher if you need more time to capture a
44 // trace.
45 constexpr ::base::TimeDelta kTimeLimit = ::base::Seconds(2);
46 constexpr int kWarmupRuns = 10000;
47 constexpr int kTimeCheckInterval = 100000;
48 constexpr size_t kAllocSize = 40;
49 
50 // Size constants are mostly arbitrary, but try to simulate something like CSS
51 // parsing which consists of lots of relatively small objects.
52 constexpr int kMultiBucketMinimumSize = 24;
53 constexpr int kMultiBucketIncrement = 13;
54 // Final size is 24 + (13 * 22) = 310 bytes.
55 constexpr int kMultiBucketRounds = 22;
56 
57 constexpr char kMetricPrefixMemoryAllocation[] = "MemoryAllocation.";
58 constexpr char kMetricThroughput[] = "throughput";
59 constexpr char kMetricTimePerAllocation[] = "time_per_allocation";
60 
SetUpReporter(const std::string & story_name)61 perf_test::PerfResultReporter SetUpReporter(const std::string& story_name) {
62   perf_test::PerfResultReporter reporter(kMetricPrefixMemoryAllocation,
63                                          story_name);
64   reporter.RegisterImportantMetric(kMetricThroughput, "runs/s");
65   reporter.RegisterImportantMetric(kMetricTimePerAllocation, "ns");
66   return reporter;
67 }
68 
69 enum class AllocatorType {
70   kSystem,
71   kPartitionAlloc,
72   kPartitionAllocWithThreadCache,
73 #if BUILDFLAG(ENABLE_ALLOCATION_STACK_TRACE_RECORDER)
74   kPartitionAllocWithAllocationStackTraceRecorder,
75 #endif
76 };
77 
78 class Allocator {
79  public:
80   Allocator() = default;
81   virtual ~Allocator() = default;
82   virtual void* Alloc(size_t size) = 0;
83   virtual void Free(void* data) = 0;
84 };
85 
86 class SystemAllocator : public Allocator {
87  public:
88   SystemAllocator() = default;
89   ~SystemAllocator() override = default;
Alloc(size_t size)90   void* Alloc(size_t size) override { return malloc(size); }
Free(void * data)91   void Free(void* data) override { free(data); }
92 };
93 
94 class PartitionAllocator : public Allocator {
95  public:
96   PartitionAllocator() = default;
~PartitionAllocator()97   ~PartitionAllocator() override { alloc_.DestructForTesting(); }
98 
Alloc(size_t size)99   void* Alloc(size_t size) override {
100     return alloc_.AllocInline<AllocFlags::kNoHooks>(size);
101   }
Free(void * data)102   void Free(void* data) override {
103     // Even though it's easy to invoke the fast path with
104     // alloc_.Free<kNoHooks>(), we chose to use the slower path, because it's
105     // more common with PA-E.
106     PartitionRoot::FreeInlineInUnknownRoot<
107         partition_alloc::FreeFlags::kNoHooks>(data);
108   }
109 
110  private:
111   PartitionRoot alloc_{PartitionOptions{}};
112 };
113 
114 class PartitionAllocatorWithThreadCache : public Allocator {
115  public:
PartitionAllocatorWithThreadCache(bool use_alternate_bucket_dist)116   explicit PartitionAllocatorWithThreadCache(bool use_alternate_bucket_dist)
117       : scope_(allocator_.root()) {
118     ThreadCacheRegistry::Instance().PurgeAll();
119     if (!use_alternate_bucket_dist) {
120       allocator_.root()->SwitchToDenserBucketDistribution();
121     } else {
122       allocator_.root()->ResetBucketDistributionForTesting();
123     }
124   }
125   ~PartitionAllocatorWithThreadCache() override = default;
126 
Alloc(size_t size)127   void* Alloc(size_t size) override {
128     return allocator_.root()->AllocInline<AllocFlags::kNoHooks>(size);
129   }
Free(void * data)130   void Free(void* data) override {
131     // Even though it's easy to invoke the fast path with
132     // alloc_.Free<kNoHooks>(), we chose to use the slower path, because it's
133     // more common with PA-E.
134     PartitionRoot::FreeInlineInUnknownRoot<
135         partition_alloc::FreeFlags::kNoHooks>(data);
136   }
137 
138  private:
__anone46897d20202() 139   static constexpr partition_alloc::PartitionOptions kOpts = []() {
140     partition_alloc::PartitionOptions opts;
141 #if !BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
142     opts.thread_cache = PartitionOptions::kEnabled;
143 #endif
144     return opts;
145   }();
146   PartitionAllocatorForTesting<internal::DisallowLeaks> allocator_{kOpts};
147   internal::ThreadCacheProcessScopeForTesting scope_;
148 };
149 
150 #if BUILDFLAG(ENABLE_ALLOCATION_STACK_TRACE_RECORDER)
151 class PartitionAllocatorWithAllocationStackTraceRecorder : public Allocator {
152  public:
PartitionAllocatorWithAllocationStackTraceRecorder(bool register_hooks)153   explicit PartitionAllocatorWithAllocationStackTraceRecorder(
154       bool register_hooks)
155       : register_hooks_(register_hooks) {
156     if (register_hooks_) {
157       dispatcher_.InitializeForTesting(&recorder_);
158     }
159   }
160 
~PartitionAllocatorWithAllocationStackTraceRecorder()161   ~PartitionAllocatorWithAllocationStackTraceRecorder() override {
162     if (register_hooks_) {
163       dispatcher_.ResetForTesting();
164     }
165   }
166 
Alloc(size_t size)167   void* Alloc(size_t size) override { return alloc_.AllocInline(size); }
168 
Free(void * data)169   void Free(void* data) override {
170     // Even though it's easy to invoke the fast path with
171     // alloc_.Free<kNoHooks>(), we chose to use the slower path, because it's
172     // more common with PA-E.
173     PartitionRoot::FreeInlineInUnknownRoot<
174         partition_alloc::FreeFlags::kNoHooks>(data);
175   }
176 
177  private:
178   bool const register_hooks_;
179   PartitionRoot alloc_{PartitionOptions{}};
180   ::base::allocator::dispatcher::Dispatcher& dispatcher_ =
181       ::base::allocator::dispatcher::Dispatcher::GetInstance();
182   ::base::debug::tracer::AllocationTraceRecorder recorder_;
183 };
184 #endif  // BUILDFLAG(ENABLE_ALLOCATION_STACK_TRACE_RECORDER)
185 
186 class TestLoopThread : public base::PlatformThreadForTesting::Delegate {
187  public:
TestLoopThread(float (* test_fn)(Allocator *),Allocator * allocator)188   TestLoopThread(float (*test_fn)(Allocator*), Allocator* allocator)
189       : test_fn_(test_fn), allocator_(allocator) {
190     PA_CHECK(base::PlatformThreadForTesting::Create(0, this, &thread_handle_));
191   }
192 
Run()193   float Run() {
194     base::PlatformThreadForTesting::Join(thread_handle_);
195     return laps_per_second_;
196   }
197 
ThreadMain()198   void ThreadMain() override { laps_per_second_ = test_fn_(allocator_); }
199 
200   float (*test_fn_)(Allocator*) = nullptr;
201   Allocator* allocator_ = nullptr;
202   base::PlatformThreadHandle thread_handle_;
203   std::atomic<float> laps_per_second_;
204 };
205 
DisplayResults(const std::string & story_name,float iterations_per_second)206 void DisplayResults(const std::string& story_name,
207                     float iterations_per_second) {
208   auto reporter = SetUpReporter(story_name);
209   reporter.AddResult(kMetricThroughput, iterations_per_second);
210   reporter.AddResult(kMetricTimePerAllocation,
211                      static_cast<size_t>(1e9 / iterations_per_second));
212 }
213 
214 class MemoryAllocationPerfNode {
215  public:
GetNext() const216   MemoryAllocationPerfNode* GetNext() const { return next_; }
SetNext(MemoryAllocationPerfNode * p)217   void SetNext(MemoryAllocationPerfNode* p) { next_ = p; }
FreeAll(MemoryAllocationPerfNode * first,Allocator * alloc)218   static void FreeAll(MemoryAllocationPerfNode* first, Allocator* alloc) {
219     MemoryAllocationPerfNode* cur = first;
220     while (cur != nullptr) {
221       MemoryAllocationPerfNode* next = cur->GetNext();
222       alloc->Free(cur);
223       cur = next;
224     }
225   }
226 
227  private:
228   MemoryAllocationPerfNode* next_ = nullptr;
229 };
230 
231 #if !defined(MEMORY_CONSTRAINED)
SingleBucket(Allocator * allocator)232 float SingleBucket(Allocator* allocator) {
233   auto* first =
234       reinterpret_cast<MemoryAllocationPerfNode*>(allocator->Alloc(kAllocSize));
235   size_t allocated_memory = kAllocSize;
236 
237   ::base::LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
238   MemoryAllocationPerfNode* cur = first;
239   do {
240     auto* next = reinterpret_cast<MemoryAllocationPerfNode*>(
241         allocator->Alloc(kAllocSize));
242     PA_CHECK(next != nullptr);
243     cur->SetNext(next);
244     cur = next;
245     timer.NextLap();
246     allocated_memory += kAllocSize;
247     // With multiple threads, can get OOM otherwise.
248     if (allocated_memory > 200e6) {
249       cur->SetNext(nullptr);
250       MemoryAllocationPerfNode::FreeAll(first->GetNext(), allocator);
251       cur = first;
252       allocated_memory = kAllocSize;
253     }
254   } while (!timer.HasTimeLimitExpired());
255 
256   // next_ = nullptr only works if the class constructor is called (it's not
257   // called in this case because then we can allocate arbitrary-length
258   // payloads.)
259   cur->SetNext(nullptr);
260   MemoryAllocationPerfNode::FreeAll(first, allocator);
261 
262   return timer.LapsPerSecond();
263 }
264 #endif  // defined(MEMORY_CONSTRAINED)
265 
SingleBucketWithFree(Allocator * allocator)266 float SingleBucketWithFree(Allocator* allocator) {
267   // Allocate an initial element to make sure the bucket stays set up.
268   void* elem = allocator->Alloc(kAllocSize);
269 
270   ::base::LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
271   do {
272     void* cur = allocator->Alloc(kAllocSize);
273     PA_CHECK(cur != nullptr);
274     allocator->Free(cur);
275     timer.NextLap();
276   } while (!timer.HasTimeLimitExpired());
277 
278   allocator->Free(elem);
279   return timer.LapsPerSecond();
280 }
281 
282 #if !defined(MEMORY_CONSTRAINED)
MultiBucket(Allocator * allocator)283 float MultiBucket(Allocator* allocator) {
284   auto* first =
285       reinterpret_cast<MemoryAllocationPerfNode*>(allocator->Alloc(kAllocSize));
286   MemoryAllocationPerfNode* cur = first;
287   size_t allocated_memory = kAllocSize;
288 
289   ::base::LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
290   do {
291     for (int i = 0; i < kMultiBucketRounds; i++) {
292       size_t size = kMultiBucketMinimumSize + (i * kMultiBucketIncrement);
293       auto* next =
294           reinterpret_cast<MemoryAllocationPerfNode*>(allocator->Alloc(size));
295       PA_CHECK(next != nullptr);
296       cur->SetNext(next);
297       cur = next;
298       allocated_memory += size;
299     }
300 
301     // Can OOM with multiple threads.
302     if (allocated_memory > 100e6) {
303       cur->SetNext(nullptr);
304       MemoryAllocationPerfNode::FreeAll(first->GetNext(), allocator);
305       cur = first;
306       allocated_memory = kAllocSize;
307     }
308 
309     timer.NextLap();
310   } while (!timer.HasTimeLimitExpired());
311 
312   cur->SetNext(nullptr);
313   MemoryAllocationPerfNode::FreeAll(first, allocator);
314 
315   return timer.LapsPerSecond() * kMultiBucketRounds;
316 }
317 #endif  // defined(MEMORY_CONSTRAINED)
318 
MultiBucketWithFree(Allocator * allocator)319 float MultiBucketWithFree(Allocator* allocator) {
320   std::vector<void*> elems;
321   elems.reserve(kMultiBucketRounds);
322   // Do an initial round of allocation to make sure that the buckets stay in
323   // use (and aren't accidentally released back to the OS).
324   for (int i = 0; i < kMultiBucketRounds; i++) {
325     void* cur =
326         allocator->Alloc(kMultiBucketMinimumSize + (i * kMultiBucketIncrement));
327     PA_CHECK(cur != nullptr);
328     elems.push_back(cur);
329   }
330 
331   ::base::LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
332   do {
333     for (int i = 0; i < kMultiBucketRounds; i++) {
334       void* cur = allocator->Alloc(kMultiBucketMinimumSize +
335                                    (i * kMultiBucketIncrement));
336       PA_CHECK(cur != nullptr);
337       allocator->Free(cur);
338     }
339     timer.NextLap();
340   } while (!timer.HasTimeLimitExpired());
341 
342   for (void* ptr : elems) {
343     allocator->Free(ptr);
344   }
345 
346   return timer.LapsPerSecond() * kMultiBucketRounds;
347 }
348 
DirectMapped(Allocator * allocator)349 float DirectMapped(Allocator* allocator) {
350   constexpr size_t kSize = 2 * 1000 * 1000;
351 
352   ::base::LapTimer timer(kWarmupRuns, kTimeLimit, kTimeCheckInterval);
353   do {
354     void* cur = allocator->Alloc(kSize);
355     PA_CHECK(cur != nullptr);
356     allocator->Free(cur);
357     timer.NextLap();
358   } while (!timer.HasTimeLimitExpired());
359 
360   return timer.LapsPerSecond();
361 }
362 
CreateAllocator(AllocatorType type,bool use_alternate_bucket_dist)363 std::unique_ptr<Allocator> CreateAllocator(AllocatorType type,
364                                            bool use_alternate_bucket_dist) {
365   switch (type) {
366     case AllocatorType::kSystem:
367       return std::make_unique<SystemAllocator>();
368     case AllocatorType::kPartitionAlloc:
369       return std::make_unique<PartitionAllocator>();
370     case AllocatorType::kPartitionAllocWithThreadCache:
371       return std::make_unique<PartitionAllocatorWithThreadCache>(
372           use_alternate_bucket_dist);
373 #if BUILDFLAG(ENABLE_ALLOCATION_STACK_TRACE_RECORDER)
374     case AllocatorType::kPartitionAllocWithAllocationStackTraceRecorder:
375       return std::make_unique<
376           PartitionAllocatorWithAllocationStackTraceRecorder>(true);
377 #endif
378   }
379 }
380 
LogResults(int thread_count,AllocatorType alloc_type,uint64_t total_laps_per_second,uint64_t min_laps_per_second)381 void LogResults(int thread_count,
382                 AllocatorType alloc_type,
383                 uint64_t total_laps_per_second,
384                 uint64_t min_laps_per_second) {
385   PA_LOG(INFO) << "RESULTSCSV: " << thread_count << ","
386                << static_cast<int>(alloc_type) << "," << total_laps_per_second
387                << "," << min_laps_per_second;
388 }
389 
RunTest(int thread_count,bool use_alternate_bucket_dist,AllocatorType alloc_type,float (* test_fn)(Allocator *),float (* noisy_neighbor_fn)(Allocator *),const char * story_base_name)390 void RunTest(int thread_count,
391              bool use_alternate_bucket_dist,
392              AllocatorType alloc_type,
393              float (*test_fn)(Allocator*),
394              float (*noisy_neighbor_fn)(Allocator*),
395              const char* story_base_name) {
396   auto alloc = CreateAllocator(alloc_type, use_alternate_bucket_dist);
397 
398   std::unique_ptr<TestLoopThread> noisy_neighbor_thread = nullptr;
399   if (noisy_neighbor_fn) {
400     noisy_neighbor_thread =
401         std::make_unique<TestLoopThread>(noisy_neighbor_fn, alloc.get());
402   }
403 
404   std::vector<std::unique_ptr<TestLoopThread>> threads;
405   for (int i = 0; i < thread_count; ++i) {
406     threads.push_back(std::make_unique<TestLoopThread>(test_fn, alloc.get()));
407   }
408 
409   uint64_t total_laps_per_second = 0;
410   uint64_t min_laps_per_second = std::numeric_limits<uint64_t>::max();
411   for (int i = 0; i < thread_count; ++i) {
412     uint64_t laps_per_second = threads[i]->Run();
413     min_laps_per_second = std::min(laps_per_second, min_laps_per_second);
414     total_laps_per_second += laps_per_second;
415   }
416 
417   if (noisy_neighbor_thread) {
418     noisy_neighbor_thread->Run();
419   }
420 
421   char const* alloc_type_str;
422   switch (alloc_type) {
423     case AllocatorType::kSystem:
424       alloc_type_str = "System";
425       break;
426     case AllocatorType::kPartitionAlloc:
427       alloc_type_str = "PartitionAlloc";
428       break;
429     case AllocatorType::kPartitionAllocWithThreadCache:
430       alloc_type_str = "PartitionAllocWithThreadCache";
431       break;
432 #if BUILDFLAG(ENABLE_ALLOCATION_STACK_TRACE_RECORDER)
433     case AllocatorType::kPartitionAllocWithAllocationStackTraceRecorder:
434       alloc_type_str = "PartitionAllocWithAllocationStackTraceRecorder";
435       break;
436 #endif
437   }
438 
439   std::string name = base::TruncatingStringPrintf(
440       "%s%s_%s_%d", kMetricPrefixMemoryAllocation, story_base_name,
441       alloc_type_str, thread_count);
442 
443   DisplayResults(name + "_total", total_laps_per_second);
444   DisplayResults(name + "_worst", min_laps_per_second);
445   LogResults(thread_count, alloc_type, total_laps_per_second,
446              min_laps_per_second);
447 }
448 
449 class PartitionAllocMemoryAllocationPerfTest
450     : public testing::TestWithParam<std::tuple<int, bool, AllocatorType>> {};
451 
452 // Only one partition with a thread cache: cannot use the thread cache when
453 // PartitionAlloc is malloc().
454 INSTANTIATE_TEST_SUITE_P(
455     ,
456     PartitionAllocMemoryAllocationPerfTest,
457     ::testing::Combine(
458         ::testing::Values(1, 2, 3, 4),
459         ::testing::Values(false, true),
460         ::testing::Values(
461             AllocatorType::kSystem,
462             AllocatorType::kPartitionAlloc,
463             AllocatorType::kPartitionAllocWithThreadCache
464 #if BUILDFLAG(ENABLE_ALLOCATION_STACK_TRACE_RECORDER)
465             ,
466             AllocatorType::kPartitionAllocWithAllocationStackTraceRecorder
467 #endif
468             )));
469 
470 // This test (and the other one below) allocates a large amount of memory, which
471 // can cause issues on Android.
472 #if !defined(MEMORY_CONSTRAINED)
TEST_P(PartitionAllocMemoryAllocationPerfTest,SingleBucket)473 TEST_P(PartitionAllocMemoryAllocationPerfTest, SingleBucket) {
474   auto params = GetParam();
475   RunTest(std::get<int>(params), std::get<bool>(params),
476           std::get<AllocatorType>(params), SingleBucket, nullptr,
477           "SingleBucket");
478 }
479 #endif  // defined(MEMORY_CONSTRAINED)
480 
TEST_P(PartitionAllocMemoryAllocationPerfTest,SingleBucketWithFree)481 TEST_P(PartitionAllocMemoryAllocationPerfTest, SingleBucketWithFree) {
482   auto params = GetParam();
483   RunTest(std::get<int>(params), std::get<bool>(params),
484           std::get<AllocatorType>(params), SingleBucketWithFree, nullptr,
485           "SingleBucketWithFree");
486 }
487 
488 #if !defined(MEMORY_CONSTRAINED)
TEST_P(PartitionAllocMemoryAllocationPerfTest,MultiBucket)489 TEST_P(PartitionAllocMemoryAllocationPerfTest, MultiBucket) {
490   auto params = GetParam();
491   RunTest(std::get<int>(params), std::get<bool>(params),
492           std::get<AllocatorType>(params), MultiBucket, nullptr, "MultiBucket");
493 }
494 #endif  // defined(MEMORY_CONSTRAINED)
495 
TEST_P(PartitionAllocMemoryAllocationPerfTest,MultiBucketWithFree)496 TEST_P(PartitionAllocMemoryAllocationPerfTest, MultiBucketWithFree) {
497   auto params = GetParam();
498   RunTest(std::get<int>(params), std::get<bool>(params),
499           std::get<AllocatorType>(params), MultiBucketWithFree, nullptr,
500           "MultiBucketWithFree");
501 }
502 
TEST_P(PartitionAllocMemoryAllocationPerfTest,DirectMapped)503 TEST_P(PartitionAllocMemoryAllocationPerfTest, DirectMapped) {
504   auto params = GetParam();
505   RunTest(std::get<int>(params), std::get<bool>(params),
506           std::get<AllocatorType>(params), DirectMapped, nullptr,
507           "DirectMapped");
508 }
509 
510 #if !defined(MEMORY_CONSTRAINED)
TEST_P(PartitionAllocMemoryAllocationPerfTest,DISABLED_MultiBucketWithNoisyNeighbor)511 TEST_P(PartitionAllocMemoryAllocationPerfTest,
512        DISABLED_MultiBucketWithNoisyNeighbor) {
513   auto params = GetParam();
514   RunTest(std::get<int>(params), std::get<bool>(params),
515           std::get<AllocatorType>(params), MultiBucket, DirectMapped,
516           "MultiBucketWithNoisyNeighbor");
517 }
518 #endif  // !defined(MEMORY_CONSTRAINED)
519 
520 }  // namespace
521 
522 }  // namespace partition_alloc::internal
523