1 // Copyright 2014 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <stddef.h>
6
7 #include <memory>
8 #include <vector>
9
10 #include "base/base_switches.h"
11 #include "base/command_line.h"
12 #include "base/functional/bind.h"
13 #include "base/location.h"
14 #include "base/memory/ptr_util.h"
15 #include "base/synchronization/condition_variable.h"
16 #include "base/synchronization/lock.h"
17 #include "base/synchronization/waitable_event.h"
18 #include "base/task/current_thread.h"
19 #include "base/task/single_thread_task_runner.h"
20 #include "base/task/task_observer.h"
21 #include "base/threading/thread.h"
22 #include "base/time/time.h"
23 #include "build/build_config.h"
24 #include "testing/gtest/include/gtest/gtest.h"
25 #include "testing/perf/perf_result_reporter.h"
26
27 #if BUILDFLAG(IS_POSIX)
28 #include <pthread.h>
29 #endif
30
31 namespace base {
32
33 namespace {
34
35 const int kNumRuns = 100000;
36
37 constexpr char kMetricPrefixThread[] = "Thread.";
38 constexpr char kMetricClockTimePerHop[] = "wall_time_per_hop";
39 constexpr char kMetricCpuTimePerHop[] = "cpu_time_per_hop";
40 constexpr char kStoryBaseTask[] = "task";
41 constexpr char kStoryBaseTaskWithObserver[] = "task_with_observer";
42 constexpr char kStoryBaseWaitableEvent[] = "waitable_event";
43 constexpr char kStoryBaseCondVar[] = "condition_variable";
44 constexpr char kStorySuffixOneThread[] = "_1_thread";
45 constexpr char kStorySuffixFourThreads[] = "_4_threads";
46
47 #if BUILDFLAG(IS_POSIX)
48 constexpr char kStoryBasePthreadCondVar[] = "pthread_condition_variable";
49 #endif // BUILDFLAG(IS_POSIX)
50
SetUpReporter(const std::string & story_name)51 perf_test::PerfResultReporter SetUpReporter(const std::string& story_name) {
52 perf_test::PerfResultReporter reporter(kMetricPrefixThread, story_name);
53 reporter.RegisterImportantMetric(kMetricClockTimePerHop, "us");
54 reporter.RegisterImportantMetric(kMetricCpuTimePerHop, "us");
55 return reporter;
56 }
57
58 // Base class for a threading perf-test. This sets up some threads for the
59 // test and measures the clock-time in addition to time spent on each thread.
60 class ThreadPerfTest : public testing::Test {
61 public:
ThreadPerfTest()62 ThreadPerfTest()
63 : done_(WaitableEvent::ResetPolicy::AUTOMATIC,
64 WaitableEvent::InitialState::NOT_SIGNALED) {}
65
66 // To be implemented by each test. Subclass must uses threads_ such that
67 // their cpu-time can be measured. Test must return from PingPong() _and_
68 // call FinishMeasurement from any thread to complete the test.
Init()69 virtual void Init() {
70 if (ThreadTicks::IsSupported())
71 ThreadTicks::WaitUntilInitialized();
72 }
73 virtual void PingPong(int hops) = 0;
Reset()74 virtual void Reset() {}
75
TimeOnThread(base::ThreadTicks * ticks,base::WaitableEvent * done)76 void TimeOnThread(base::ThreadTicks* ticks, base::WaitableEvent* done) {
77 *ticks = base::ThreadTicks::Now();
78 done->Signal();
79 }
80
ThreadNow(const base::Thread & thread)81 base::ThreadTicks ThreadNow(const base::Thread& thread) {
82 base::WaitableEvent done(WaitableEvent::ResetPolicy::AUTOMATIC,
83 WaitableEvent::InitialState::NOT_SIGNALED);
84 base::ThreadTicks ticks;
85 thread.task_runner()->PostTask(
86 FROM_HERE, base::BindOnce(&ThreadPerfTest::TimeOnThread,
87 base::Unretained(this), &ticks, &done));
88 done.Wait();
89 return ticks;
90 }
91
RunPingPongTest(const std::string & story_name,unsigned num_threads)92 void RunPingPongTest(const std::string& story_name, unsigned num_threads) {
93 // Create threads and collect starting cpu-time for each thread.
94 std::vector<base::ThreadTicks> thread_starts;
95 while (threads_.size() < num_threads) {
96 threads_.push_back(std::make_unique<base::Thread>("PingPonger"));
97 threads_.back()->Start();
98 if (base::ThreadTicks::IsSupported())
99 thread_starts.push_back(ThreadNow(*threads_.back()));
100 }
101
102 Init();
103
104 base::TimeTicks start = base::TimeTicks::Now();
105 PingPong(kNumRuns);
106 done_.Wait();
107 base::TimeTicks end = base::TimeTicks::Now();
108
109 // Gather the cpu-time spent on each thread. This does one extra tasks,
110 // but that should be in the noise given enough runs.
111 base::TimeDelta thread_time;
112 while (threads_.size()) {
113 if (base::ThreadTicks::IsSupported()) {
114 thread_time += ThreadNow(*threads_.back()) - thread_starts.back();
115 thread_starts.pop_back();
116 }
117 threads_.pop_back();
118 }
119
120 Reset();
121
122 double us_per_task_clock = (end - start).InMicrosecondsF() / kNumRuns;
123 double us_per_task_cpu = thread_time.InMicrosecondsF() / kNumRuns;
124
125 auto reporter = SetUpReporter(story_name);
126 // Clock time per task.
127 reporter.AddResult(kMetricClockTimePerHop, us_per_task_clock);
128
129 // Total utilization across threads if available (likely higher).
130 if (base::ThreadTicks::IsSupported()) {
131 reporter.AddResult(kMetricCpuTimePerHop, us_per_task_cpu);
132 }
133 }
134
135 protected:
FinishMeasurement()136 void FinishMeasurement() { done_.Signal(); }
137 std::vector<std::unique_ptr<base::Thread>> threads_;
138
139 private:
140 base::WaitableEvent done_;
141 };
142
143 // Class to test task performance by posting empty tasks back and forth.
144 class TaskPerfTest : public ThreadPerfTest {
NextThread(int count)145 base::Thread* NextThread(int count) {
146 return threads_[count % threads_.size()].get();
147 }
148
PingPong(int hops)149 void PingPong(int hops) override {
150 if (!hops) {
151 FinishMeasurement();
152 return;
153 }
154 NextThread(hops)->task_runner()->PostTask(
155 FROM_HERE, base::BindOnce(&ThreadPerfTest::PingPong,
156 base::Unretained(this), hops - 1));
157 }
158 };
159
160 // This tries to test the 'best-case' as well as the 'worst-case' task posting
161 // performance. The best-case keeps one thread alive such that it never yeilds,
162 // while the worse-case forces a context switch for every task. Four threads are
163 // used to ensure the threads do yeild (with just two it might be possible for
164 // both threads to stay awake if they can signal each other fast enough).
TEST_F(TaskPerfTest,TaskPingPong)165 TEST_F(TaskPerfTest, TaskPingPong) {
166 RunPingPongTest(std::string(kStoryBaseTask) + kStorySuffixOneThread, 1);
167 RunPingPongTest(std::string(kStoryBaseTask) + kStorySuffixFourThreads, 4);
168 }
169
170
171 // Same as above, but add observers to test their perf impact.
172 class MessageLoopObserver : public base::TaskObserver {
173 public:
WillProcessTask(const base::PendingTask & pending_task,bool was_blocked_or_low_priority)174 void WillProcessTask(const base::PendingTask& pending_task,
175 bool was_blocked_or_low_priority) override {}
DidProcessTask(const base::PendingTask & pending_task)176 void DidProcessTask(const base::PendingTask& pending_task) override {}
177 };
178 MessageLoopObserver message_loop_observer;
179
180 class TaskObserverPerfTest : public TaskPerfTest {
181 public:
Init()182 void Init() override {
183 TaskPerfTest::Init();
184 for (auto& i : threads_) {
185 i->task_runner()->PostTask(
186 FROM_HERE, BindOnce(
187 [](MessageLoopObserver* observer) {
188 CurrentThread::Get()->AddTaskObserver(observer);
189 },
190 Unretained(&message_loop_observer)));
191 }
192 }
193 };
194
TEST_F(TaskObserverPerfTest,TaskPingPong)195 TEST_F(TaskObserverPerfTest, TaskPingPong) {
196 RunPingPongTest(
197 std::string(kStoryBaseTaskWithObserver) + kStorySuffixOneThread, 1);
198 RunPingPongTest(
199 std::string(kStoryBaseTaskWithObserver) + kStorySuffixFourThreads, 4);
200 }
201
202 // Class to test our WaitableEvent performance by signaling back and fort.
203 // WaitableEvent is templated so we can also compare with other versions.
204 template <typename WaitableEventType>
205 class EventPerfTest : public ThreadPerfTest {
206 public:
Init()207 void Init() override {
208 for (size_t i = 0; i < threads_.size(); i++) {
209 events_.push_back(std::make_unique<WaitableEventType>(
210 WaitableEvent::ResetPolicy::AUTOMATIC,
211 WaitableEvent::InitialState::NOT_SIGNALED));
212 }
213 }
214
Reset()215 void Reset() override { events_.clear(); }
216
WaitAndSignalOnThread(size_t event)217 void WaitAndSignalOnThread(size_t event) {
218 size_t next_event = (event + 1) % events_.size();
219 int my_hops = 0;
220 do {
221 events_[event]->Wait();
222 my_hops = --remaining_hops_; // We own 'hops' between Wait and Signal.
223 events_[next_event]->Signal();
224 } while (my_hops > 0);
225 // Once we are done, all threads will signal as hops passes zero.
226 // We only signal completion once, on the thread that reaches zero.
227 if (!my_hops)
228 FinishMeasurement();
229 }
230
PingPong(int hops)231 void PingPong(int hops) override {
232 remaining_hops_ = hops;
233 for (size_t i = 0; i < threads_.size(); i++) {
234 threads_[i]->task_runner()->PostTask(
235 FROM_HERE, base::BindOnce(&EventPerfTest::WaitAndSignalOnThread,
236 base::Unretained(this), i));
237 }
238
239 // Kick off the Signal ping-ponging.
240 events_.front()->Signal();
241 }
242
243 int remaining_hops_;
244 std::vector<std::unique_ptr<WaitableEventType>> events_;
245 };
246
247 // Similar to the task posting test, this just tests similar functionality
248 // using WaitableEvents. We only test four threads (worst-case), but we
249 // might want to craft a way to test the best-case (where the thread doesn't
250 // end up blocking because the event is already signalled).
251 typedef EventPerfTest<base::WaitableEvent> WaitableEventThreadPerfTest;
TEST_F(WaitableEventThreadPerfTest,EventPingPong)252 TEST_F(WaitableEventThreadPerfTest, EventPingPong) {
253 RunPingPongTest(
254 std::string(kStoryBaseWaitableEvent) + kStorySuffixFourThreads, 4);
255 }
256
257 // Build a minimal event using ConditionVariable.
258 class ConditionVariableEvent {
259 public:
ConditionVariableEvent(WaitableEvent::ResetPolicy reset_policy,WaitableEvent::InitialState initial_state)260 ConditionVariableEvent(WaitableEvent::ResetPolicy reset_policy,
261 WaitableEvent::InitialState initial_state)
262 : cond_(&lock_), signaled_(false) {
263 DCHECK_EQ(WaitableEvent::ResetPolicy::AUTOMATIC, reset_policy);
264 DCHECK_EQ(WaitableEvent::InitialState::NOT_SIGNALED, initial_state);
265 }
266
Signal()267 void Signal() {
268 {
269 base::AutoLock scoped_lock(lock_);
270 signaled_ = true;
271 }
272 cond_.Signal();
273 }
274
Wait()275 void Wait() {
276 base::AutoLock scoped_lock(lock_);
277 while (!signaled_)
278 cond_.Wait();
279 signaled_ = false;
280 }
281
282 private:
283 base::Lock lock_;
284 base::ConditionVariable cond_;
285 bool signaled_;
286 };
287
288 // This is meant to test the absolute minimal context switching time
289 // using our own base synchronization code.
290 typedef EventPerfTest<ConditionVariableEvent> ConditionVariablePerfTest;
TEST_F(ConditionVariablePerfTest,EventPingPong)291 TEST_F(ConditionVariablePerfTest, EventPingPong) {
292 RunPingPongTest(std::string(kStoryBaseCondVar) + kStorySuffixFourThreads, 4);
293 }
294 #if BUILDFLAG(IS_POSIX)
295
296 // Absolutely 100% minimal posix waitable event. If there is a better/faster
297 // way to force a context switch, we should use that instead.
298 class PthreadEvent {
299 public:
PthreadEvent(WaitableEvent::ResetPolicy reset_policy,WaitableEvent::InitialState initial_state)300 PthreadEvent(WaitableEvent::ResetPolicy reset_policy,
301 WaitableEvent::InitialState initial_state) {
302 DCHECK_EQ(WaitableEvent::ResetPolicy::AUTOMATIC, reset_policy);
303 DCHECK_EQ(WaitableEvent::InitialState::NOT_SIGNALED, initial_state);
304 pthread_mutex_init(&mutex_, nullptr);
305 pthread_cond_init(&cond_, nullptr);
306 signaled_ = false;
307 }
308
~PthreadEvent()309 ~PthreadEvent() {
310 pthread_cond_destroy(&cond_);
311 pthread_mutex_destroy(&mutex_);
312 }
313
Signal()314 void Signal() {
315 pthread_mutex_lock(&mutex_);
316 signaled_ = true;
317 pthread_mutex_unlock(&mutex_);
318 pthread_cond_signal(&cond_);
319 }
320
Wait()321 void Wait() {
322 pthread_mutex_lock(&mutex_);
323 while (!signaled_)
324 pthread_cond_wait(&cond_, &mutex_);
325 signaled_ = false;
326 pthread_mutex_unlock(&mutex_);
327 }
328
329 private:
330 bool signaled_;
331 pthread_mutex_t mutex_;
332 pthread_cond_t cond_;
333 };
334
335 // This is meant to test the absolute minimal context switching time.
336 // If there is any faster way to do this we should substitute it in.
337 typedef EventPerfTest<PthreadEvent> PthreadEventPerfTest;
TEST_F(PthreadEventPerfTest,EventPingPong)338 TEST_F(PthreadEventPerfTest, EventPingPong) {
339 RunPingPongTest(
340 std::string(kStoryBasePthreadCondVar) + kStorySuffixFourThreads, 4);
341 }
342
343 #endif
344
345 } // namespace
346
347 } // namespace base
348