1 // Copyright (C) 2018 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <random>
16
17 #include <benchmark/benchmark.h>
18
19 #include "perfetto/base/time.h"
20 #include "perfetto/ext/traced/traced.h"
21 #include "perfetto/ext/tracing/core/trace_packet.h"
22 #include "perfetto/tracing/core/trace_config.h"
23 #include "src/base/test/test_task_runner.h"
24 #include "test/gtest_and_gmock.h"
25 #include "test/test_helper.h"
26
27 #include "protos/perfetto/config/test_config.gen.h"
28 #include "protos/perfetto/trace/test_event.gen.h"
29 #include "protos/perfetto/trace/trace_packet.pbzero.h"
30
31 namespace perfetto {
32
33 namespace {
34
IsBenchmarkFunctionalOnly()35 bool IsBenchmarkFunctionalOnly() {
36 return getenv("BENCHMARK_FUNCTIONAL_TEST_ONLY") != nullptr;
37 }
38
BenchmarkProducer(benchmark::State & state)39 void BenchmarkProducer(benchmark::State& state) {
40 base::TestTaskRunner task_runner;
41
42 TestHelper helper(&task_runner);
43 helper.StartServiceIfRequired();
44
45 FakeProducer* producer = helper.ConnectFakeProducer();
46 helper.ConnectConsumer();
47 helper.WaitForConsumerConnect();
48
49 TraceConfig trace_config;
50 trace_config.add_buffers()->set_size_kb(512);
51
52 auto* ds_config = trace_config.add_data_sources()->mutable_config();
53 ds_config->set_name("android.perfetto.FakeProducer");
54 ds_config->set_target_buffer(0);
55
56 static constexpr uint32_t kRandomSeed = 42;
57 uint32_t message_count = static_cast<uint32_t>(state.range(0));
58 uint32_t message_bytes = static_cast<uint32_t>(state.range(1));
59 uint32_t mb_per_s = static_cast<uint32_t>(state.range(2));
60
61 uint32_t messages_per_s = mb_per_s * 1024 * 1024 / message_bytes;
62 uint32_t time_for_messages_ms =
63 10000 + (messages_per_s == 0 ? 0 : message_count * 1000 / messages_per_s);
64
65 ds_config->mutable_for_testing()->set_seed(kRandomSeed);
66 ds_config->mutable_for_testing()->set_message_count(message_count);
67 ds_config->mutable_for_testing()->set_message_size(message_bytes);
68 ds_config->mutable_for_testing()->set_max_messages_per_second(messages_per_s);
69
70 helper.StartTracing(trace_config);
71 helper.WaitForProducerEnabled();
72
73 uint64_t wall_start_ns = static_cast<uint64_t>(base::GetWallTimeNs().count());
74 uint64_t service_start_ns =
75 helper.service_thread()->GetThreadCPUTimeNsForTesting();
76 uint64_t producer_start_ns =
77 helper.producer_thread()->GetThreadCPUTimeNsForTesting();
78 uint32_t iterations = 0;
79 for (auto _ : state) {
80 auto cname = "produced.and.committed." + std::to_string(iterations++);
81 auto on_produced_and_committed = task_runner.CreateCheckpoint(cname);
82 producer->ProduceEventBatch(helper.WrapTask(on_produced_and_committed));
83 task_runner.RunUntilCheckpoint(cname, time_for_messages_ms);
84 }
85 uint64_t service_ns =
86 helper.service_thread()->GetThreadCPUTimeNsForTesting() -
87 service_start_ns;
88 uint64_t producer_ns =
89 helper.producer_thread()->GetThreadCPUTimeNsForTesting() -
90 producer_start_ns;
91 uint64_t wall_ns =
92 static_cast<uint64_t>(base::GetWallTimeNs().count()) - wall_start_ns;
93
94 state.counters["Ser CPU"] = benchmark::Counter(
95 100.0 * static_cast<double>(service_ns) / static_cast<double>(wall_ns));
96 state.counters["Ser ns/m"] = benchmark::Counter(
97 static_cast<double>(service_ns) / static_cast<double>(message_count));
98 state.counters["Pro CPU"] = benchmark::Counter(
99 100.0 * static_cast<double>(producer_ns) / static_cast<double>(wall_ns));
100 state.SetBytesProcessed(iterations * message_bytes * message_count);
101
102 // Read back the buffer just to check correctness.
103 helper.ReadData();
104 helper.WaitForReadData();
105
106 bool is_first_packet = true;
107 std::minstd_rand0 rnd_engine(kRandomSeed);
108 for (const auto& packet : helper.trace()) {
109 ASSERT_TRUE(packet.has_for_testing());
110 if (is_first_packet) {
111 rnd_engine = std::minstd_rand0(packet.for_testing().seq_value());
112 is_first_packet = false;
113 } else {
114 ASSERT_EQ(packet.for_testing().seq_value(), rnd_engine());
115 }
116 }
117 }
118
BenchmarkConsumer(benchmark::State & state)119 static void BenchmarkConsumer(benchmark::State& state) {
120 base::TestTaskRunner task_runner;
121
122 TestHelper helper(&task_runner);
123 helper.StartServiceIfRequired();
124
125 FakeProducer* producer = helper.ConnectFakeProducer();
126 helper.ConnectConsumer();
127 helper.WaitForConsumerConnect();
128
129 TraceConfig trace_config;
130
131 static const uint32_t kBufferSizeBytes =
132 IsBenchmarkFunctionalOnly() ? 16 * 1024 : 2 * 1024 * 1024;
133 trace_config.add_buffers()->set_size_kb(kBufferSizeBytes / 1024);
134
135 static constexpr uint32_t kRandomSeed = 42;
136 uint32_t message_bytes = static_cast<uint32_t>(state.range(0));
137 uint32_t mb_per_s = static_cast<uint32_t>(state.range(1));
138 bool is_saturated_producer = mb_per_s == 0;
139
140 uint32_t message_count = kBufferSizeBytes / message_bytes;
141 uint32_t messages_per_s = mb_per_s * 1024 * 1024 / message_bytes;
142 uint32_t number_of_batches =
143 is_saturated_producer ? 0 : std::max(1u, message_count / messages_per_s);
144
145 auto* ds_config = trace_config.add_data_sources()->mutable_config();
146 ds_config->set_name("android.perfetto.FakeProducer");
147 ds_config->set_target_buffer(0);
148 ds_config->mutable_for_testing()->set_seed(kRandomSeed);
149 ds_config->mutable_for_testing()->set_message_count(message_count);
150 ds_config->mutable_for_testing()->set_message_size(message_bytes);
151 ds_config->mutable_for_testing()->set_max_messages_per_second(messages_per_s);
152
153 helper.StartTracing(trace_config);
154 helper.WaitForProducerEnabled();
155
156 uint64_t wall_start_ns = static_cast<uint64_t>(base::GetWallTimeNs().count());
157 uint64_t service_start_ns = static_cast<uint64_t>(
158 helper.service_thread()->GetThreadCPUTimeNsForTesting());
159 uint64_t consumer_start_ns =
160 static_cast<uint64_t>(base::GetThreadCPUTimeNs().count());
161 uint64_t read_time_taken_ns = 0;
162
163 uint64_t iterations = 0;
164 uint32_t counter = 0;
165 for (auto _ : state) {
166 auto cname = "produced.and.committed." + std::to_string(iterations++);
167 auto on_produced_and_committed = task_runner.CreateCheckpoint(cname);
168 producer->ProduceEventBatch(helper.WrapTask(on_produced_and_committed));
169
170 if (is_saturated_producer) {
171 // If the producer is running in saturated mode, wait until it flushes
172 // data.
173 task_runner.RunUntilCheckpoint(cname);
174
175 // Then time how long it takes to read back the data.
176 int64_t start = base::GetWallTimeNs().count();
177 helper.ReadData(counter);
178 helper.WaitForReadData(counter++);
179 read_time_taken_ns +=
180 static_cast<uint64_t>(base::GetWallTimeNs().count() - start);
181 } else {
182 // If the producer is not running in saturated mode, every second the
183 // producer will send a batch of data over. Wait for a second before
184 // performing readback; do this for each batch the producer sends.
185 for (uint32_t i = 0; i < number_of_batches; i++) {
186 auto batch_cname = "batch.checkpoint." + std::to_string(counter);
187 auto batch_checkpoint = task_runner.CreateCheckpoint(batch_cname);
188 task_runner.PostDelayedTask(batch_checkpoint, 1000);
189 task_runner.RunUntilCheckpoint(batch_cname);
190
191 int64_t start = base::GetWallTimeNs().count();
192 helper.ReadData(counter);
193 helper.WaitForReadData(counter++);
194 read_time_taken_ns +=
195 static_cast<uint64_t>(base::GetWallTimeNs().count() - start);
196 }
197 }
198 }
199 uint64_t service_ns =
200 helper.service_thread()->GetThreadCPUTimeNsForTesting() -
201 service_start_ns;
202 uint64_t consumer_ns =
203 static_cast<uint64_t>(base::GetThreadCPUTimeNs().count()) -
204 consumer_start_ns;
205 uint64_t wall_ns =
206 static_cast<uint64_t>(base::GetWallTimeNs().count()) - wall_start_ns;
207
208 state.counters["Ser CPU"] = benchmark::Counter(
209 100.0 * static_cast<double>(service_ns) / static_cast<double>(wall_ns));
210 state.counters["Ser ns/m"] =
211 benchmark::Counter(1.0 * static_cast<double>(service_ns) /
212 static_cast<double>(message_count));
213 state.counters["Con CPU"] = benchmark::Counter(
214 100.0 * static_cast<double>(consumer_ns) / static_cast<double>(wall_ns));
215 state.counters["Con Speed"] =
216 benchmark::Counter(static_cast<double>(iterations) * 1000.0 * 1000.0 *
217 1000.0 * static_cast<double>(kBufferSizeBytes) /
218 static_cast<double>(read_time_taken_ns));
219 }
220
SaturateCpuProducerArgs(benchmark::internal::Benchmark * b)221 void SaturateCpuProducerArgs(benchmark::internal::Benchmark* b) {
222 int min_message_count = 16;
223 int max_message_count = IsBenchmarkFunctionalOnly() ? 16 : 1024 * 1024;
224 int min_payload = 8;
225 int max_payload = IsBenchmarkFunctionalOnly() ? 8 : 2048;
226 for (int count = min_message_count; count <= max_message_count; count *= 2) {
227 for (int bytes = min_payload; bytes <= max_payload; bytes *= 2) {
228 b->Args({count, bytes, 0 /* speed */});
229 }
230 }
231 }
232
ConstantRateProducerArgs(benchmark::internal::Benchmark * b)233 void ConstantRateProducerArgs(benchmark::internal::Benchmark* b) {
234 int message_count = IsBenchmarkFunctionalOnly() ? 2 * 1024 : 128 * 1024;
235 int min_speed = IsBenchmarkFunctionalOnly() ? 128 : 8;
236 int max_speed = 128;
237 for (int speed = min_speed; speed <= max_speed; speed *= 2) {
238 b->Args({message_count, 128, speed});
239 b->Args({message_count, 256, speed});
240 }
241 }
242
SaturateCpuConsumerArgs(benchmark::internal::Benchmark * b)243 void SaturateCpuConsumerArgs(benchmark::internal::Benchmark* b) {
244 int min_payload = 8;
245 int max_payload = IsBenchmarkFunctionalOnly() ? 8 : 64 * 1024;
246 for (int bytes = min_payload; bytes <= max_payload; bytes *= 2) {
247 b->Args({bytes, 0 /* speed */});
248 }
249 }
250
ConstantRateConsumerArgs(benchmark::internal::Benchmark * b)251 void ConstantRateConsumerArgs(benchmark::internal::Benchmark* b) {
252 int min_speed = IsBenchmarkFunctionalOnly() ? 128 : 1;
253 int max_speed = IsBenchmarkFunctionalOnly() ? 128 : 2;
254 for (int speed = min_speed; speed <= max_speed; speed *= 2) {
255 b->Args({2, speed});
256 b->Args({4, speed});
257 }
258 }
259
260 } // namespace
261
BM_EndToEnd_Producer_SaturateCpu(benchmark::State & state)262 static void BM_EndToEnd_Producer_SaturateCpu(benchmark::State& state) {
263 BenchmarkProducer(state);
264 }
265
266 BENCHMARK(BM_EndToEnd_Producer_SaturateCpu)
267 ->Unit(benchmark::kMicrosecond)
268 ->UseRealTime()
269 ->Apply(SaturateCpuProducerArgs);
270
BM_EndToEnd_Producer_ConstantRate(benchmark::State & state)271 static void BM_EndToEnd_Producer_ConstantRate(benchmark::State& state) {
272 BenchmarkProducer(state);
273 }
274
275 BENCHMARK(BM_EndToEnd_Producer_ConstantRate)
276 ->Unit(benchmark::kMicrosecond)
277 ->UseRealTime()
278 ->Apply(ConstantRateProducerArgs);
279
BM_EndToEnd_Consumer_SaturateCpu(benchmark::State & state)280 static void BM_EndToEnd_Consumer_SaturateCpu(benchmark::State& state) {
281 BenchmarkConsumer(state);
282 }
283
284 BENCHMARK(BM_EndToEnd_Consumer_SaturateCpu)
285 ->Unit(benchmark::kMicrosecond)
286 ->UseRealTime()
287 ->Apply(SaturateCpuConsumerArgs);
288
BM_EndToEnd_Consumer_ConstantRate(benchmark::State & state)289 static void BM_EndToEnd_Consumer_ConstantRate(benchmark::State& state) {
290 BenchmarkConsumer(state);
291 }
292
293 BENCHMARK(BM_EndToEnd_Consumer_ConstantRate)
294 ->Unit(benchmark::kMillisecond)
295 ->UseRealTime()
296 ->Apply(ConstantRateConsumerArgs);
297
298 } // namespace perfetto
299