xref: /aosp_15_r20/external/perfetto/test/end_to_end_benchmark.cc (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 // Copyright (C) 2018 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <random>
16 
17 #include <benchmark/benchmark.h>
18 
19 #include "perfetto/base/time.h"
20 #include "perfetto/ext/traced/traced.h"
21 #include "perfetto/ext/tracing/core/trace_packet.h"
22 #include "perfetto/tracing/core/trace_config.h"
23 #include "src/base/test/test_task_runner.h"
24 #include "test/gtest_and_gmock.h"
25 #include "test/test_helper.h"
26 
27 #include "protos/perfetto/config/test_config.gen.h"
28 #include "protos/perfetto/trace/test_event.gen.h"
29 #include "protos/perfetto/trace/trace_packet.pbzero.h"
30 
31 namespace perfetto {
32 
33 namespace {
34 
IsBenchmarkFunctionalOnly()35 bool IsBenchmarkFunctionalOnly() {
36   return getenv("BENCHMARK_FUNCTIONAL_TEST_ONLY") != nullptr;
37 }
38 
BenchmarkProducer(benchmark::State & state)39 void BenchmarkProducer(benchmark::State& state) {
40   base::TestTaskRunner task_runner;
41 
42   TestHelper helper(&task_runner);
43   helper.StartServiceIfRequired();
44 
45   FakeProducer* producer = helper.ConnectFakeProducer();
46   helper.ConnectConsumer();
47   helper.WaitForConsumerConnect();
48 
49   TraceConfig trace_config;
50   trace_config.add_buffers()->set_size_kb(512);
51 
52   auto* ds_config = trace_config.add_data_sources()->mutable_config();
53   ds_config->set_name("android.perfetto.FakeProducer");
54   ds_config->set_target_buffer(0);
55 
56   static constexpr uint32_t kRandomSeed = 42;
57   uint32_t message_count = static_cast<uint32_t>(state.range(0));
58   uint32_t message_bytes = static_cast<uint32_t>(state.range(1));
59   uint32_t mb_per_s = static_cast<uint32_t>(state.range(2));
60 
61   uint32_t messages_per_s = mb_per_s * 1024 * 1024 / message_bytes;
62   uint32_t time_for_messages_ms =
63       10000 + (messages_per_s == 0 ? 0 : message_count * 1000 / messages_per_s);
64 
65   ds_config->mutable_for_testing()->set_seed(kRandomSeed);
66   ds_config->mutable_for_testing()->set_message_count(message_count);
67   ds_config->mutable_for_testing()->set_message_size(message_bytes);
68   ds_config->mutable_for_testing()->set_max_messages_per_second(messages_per_s);
69 
70   helper.StartTracing(trace_config);
71   helper.WaitForProducerEnabled();
72 
73   uint64_t wall_start_ns = static_cast<uint64_t>(base::GetWallTimeNs().count());
74   uint64_t service_start_ns =
75       helper.service_thread()->GetThreadCPUTimeNsForTesting();
76   uint64_t producer_start_ns =
77       helper.producer_thread()->GetThreadCPUTimeNsForTesting();
78   uint32_t iterations = 0;
79   for (auto _ : state) {
80     auto cname = "produced.and.committed." + std::to_string(iterations++);
81     auto on_produced_and_committed = task_runner.CreateCheckpoint(cname);
82     producer->ProduceEventBatch(helper.WrapTask(on_produced_and_committed));
83     task_runner.RunUntilCheckpoint(cname, time_for_messages_ms);
84   }
85   uint64_t service_ns =
86       helper.service_thread()->GetThreadCPUTimeNsForTesting() -
87       service_start_ns;
88   uint64_t producer_ns =
89       helper.producer_thread()->GetThreadCPUTimeNsForTesting() -
90       producer_start_ns;
91   uint64_t wall_ns =
92       static_cast<uint64_t>(base::GetWallTimeNs().count()) - wall_start_ns;
93 
94   state.counters["Ser CPU"] = benchmark::Counter(
95       100.0 * static_cast<double>(service_ns) / static_cast<double>(wall_ns));
96   state.counters["Ser ns/m"] = benchmark::Counter(
97       static_cast<double>(service_ns) / static_cast<double>(message_count));
98   state.counters["Pro CPU"] = benchmark::Counter(
99       100.0 * static_cast<double>(producer_ns) / static_cast<double>(wall_ns));
100   state.SetBytesProcessed(iterations * message_bytes * message_count);
101 
102   // Read back the buffer just to check correctness.
103   helper.ReadData();
104   helper.WaitForReadData();
105 
106   bool is_first_packet = true;
107   std::minstd_rand0 rnd_engine(kRandomSeed);
108   for (const auto& packet : helper.trace()) {
109     ASSERT_TRUE(packet.has_for_testing());
110     if (is_first_packet) {
111       rnd_engine = std::minstd_rand0(packet.for_testing().seq_value());
112       is_first_packet = false;
113     } else {
114       ASSERT_EQ(packet.for_testing().seq_value(), rnd_engine());
115     }
116   }
117 }
118 
BenchmarkConsumer(benchmark::State & state)119 static void BenchmarkConsumer(benchmark::State& state) {
120   base::TestTaskRunner task_runner;
121 
122   TestHelper helper(&task_runner);
123   helper.StartServiceIfRequired();
124 
125   FakeProducer* producer = helper.ConnectFakeProducer();
126   helper.ConnectConsumer();
127   helper.WaitForConsumerConnect();
128 
129   TraceConfig trace_config;
130 
131   static const uint32_t kBufferSizeBytes =
132       IsBenchmarkFunctionalOnly() ? 16 * 1024 : 2 * 1024 * 1024;
133   trace_config.add_buffers()->set_size_kb(kBufferSizeBytes / 1024);
134 
135   static constexpr uint32_t kRandomSeed = 42;
136   uint32_t message_bytes = static_cast<uint32_t>(state.range(0));
137   uint32_t mb_per_s = static_cast<uint32_t>(state.range(1));
138   bool is_saturated_producer = mb_per_s == 0;
139 
140   uint32_t message_count = kBufferSizeBytes / message_bytes;
141   uint32_t messages_per_s = mb_per_s * 1024 * 1024 / message_bytes;
142   uint32_t number_of_batches =
143       is_saturated_producer ? 0 : std::max(1u, message_count / messages_per_s);
144 
145   auto* ds_config = trace_config.add_data_sources()->mutable_config();
146   ds_config->set_name("android.perfetto.FakeProducer");
147   ds_config->set_target_buffer(0);
148   ds_config->mutable_for_testing()->set_seed(kRandomSeed);
149   ds_config->mutable_for_testing()->set_message_count(message_count);
150   ds_config->mutable_for_testing()->set_message_size(message_bytes);
151   ds_config->mutable_for_testing()->set_max_messages_per_second(messages_per_s);
152 
153   helper.StartTracing(trace_config);
154   helper.WaitForProducerEnabled();
155 
156   uint64_t wall_start_ns = static_cast<uint64_t>(base::GetWallTimeNs().count());
157   uint64_t service_start_ns = static_cast<uint64_t>(
158       helper.service_thread()->GetThreadCPUTimeNsForTesting());
159   uint64_t consumer_start_ns =
160       static_cast<uint64_t>(base::GetThreadCPUTimeNs().count());
161   uint64_t read_time_taken_ns = 0;
162 
163   uint64_t iterations = 0;
164   uint32_t counter = 0;
165   for (auto _ : state) {
166     auto cname = "produced.and.committed." + std::to_string(iterations++);
167     auto on_produced_and_committed = task_runner.CreateCheckpoint(cname);
168     producer->ProduceEventBatch(helper.WrapTask(on_produced_and_committed));
169 
170     if (is_saturated_producer) {
171       // If the producer is running in saturated mode, wait until it flushes
172       // data.
173       task_runner.RunUntilCheckpoint(cname);
174 
175       // Then time how long it takes to read back the data.
176       int64_t start = base::GetWallTimeNs().count();
177       helper.ReadData(counter);
178       helper.WaitForReadData(counter++);
179       read_time_taken_ns +=
180           static_cast<uint64_t>(base::GetWallTimeNs().count() - start);
181     } else {
182       // If the producer is not running in saturated mode, every second the
183       // producer will send a batch of data over. Wait for a second before
184       // performing readback; do this for each batch the producer sends.
185       for (uint32_t i = 0; i < number_of_batches; i++) {
186         auto batch_cname = "batch.checkpoint." + std::to_string(counter);
187         auto batch_checkpoint = task_runner.CreateCheckpoint(batch_cname);
188         task_runner.PostDelayedTask(batch_checkpoint, 1000);
189         task_runner.RunUntilCheckpoint(batch_cname);
190 
191         int64_t start = base::GetWallTimeNs().count();
192         helper.ReadData(counter);
193         helper.WaitForReadData(counter++);
194         read_time_taken_ns +=
195             static_cast<uint64_t>(base::GetWallTimeNs().count() - start);
196       }
197     }
198   }
199   uint64_t service_ns =
200       helper.service_thread()->GetThreadCPUTimeNsForTesting() -
201       service_start_ns;
202   uint64_t consumer_ns =
203       static_cast<uint64_t>(base::GetThreadCPUTimeNs().count()) -
204       consumer_start_ns;
205   uint64_t wall_ns =
206       static_cast<uint64_t>(base::GetWallTimeNs().count()) - wall_start_ns;
207 
208   state.counters["Ser CPU"] = benchmark::Counter(
209       100.0 * static_cast<double>(service_ns) / static_cast<double>(wall_ns));
210   state.counters["Ser ns/m"] =
211       benchmark::Counter(1.0 * static_cast<double>(service_ns) /
212                          static_cast<double>(message_count));
213   state.counters["Con CPU"] = benchmark::Counter(
214       100.0 * static_cast<double>(consumer_ns) / static_cast<double>(wall_ns));
215   state.counters["Con Speed"] =
216       benchmark::Counter(static_cast<double>(iterations) * 1000.0 * 1000.0 *
217                          1000.0 * static_cast<double>(kBufferSizeBytes) /
218                          static_cast<double>(read_time_taken_ns));
219 }
220 
SaturateCpuProducerArgs(benchmark::internal::Benchmark * b)221 void SaturateCpuProducerArgs(benchmark::internal::Benchmark* b) {
222   int min_message_count = 16;
223   int max_message_count = IsBenchmarkFunctionalOnly() ? 16 : 1024 * 1024;
224   int min_payload = 8;
225   int max_payload = IsBenchmarkFunctionalOnly() ? 8 : 2048;
226   for (int count = min_message_count; count <= max_message_count; count *= 2) {
227     for (int bytes = min_payload; bytes <= max_payload; bytes *= 2) {
228       b->Args({count, bytes, 0 /* speed */});
229     }
230   }
231 }
232 
ConstantRateProducerArgs(benchmark::internal::Benchmark * b)233 void ConstantRateProducerArgs(benchmark::internal::Benchmark* b) {
234   int message_count = IsBenchmarkFunctionalOnly() ? 2 * 1024 : 128 * 1024;
235   int min_speed = IsBenchmarkFunctionalOnly() ? 128 : 8;
236   int max_speed = 128;
237   for (int speed = min_speed; speed <= max_speed; speed *= 2) {
238     b->Args({message_count, 128, speed});
239     b->Args({message_count, 256, speed});
240   }
241 }
242 
SaturateCpuConsumerArgs(benchmark::internal::Benchmark * b)243 void SaturateCpuConsumerArgs(benchmark::internal::Benchmark* b) {
244   int min_payload = 8;
245   int max_payload = IsBenchmarkFunctionalOnly() ? 8 : 64 * 1024;
246   for (int bytes = min_payload; bytes <= max_payload; bytes *= 2) {
247     b->Args({bytes, 0 /* speed */});
248   }
249 }
250 
ConstantRateConsumerArgs(benchmark::internal::Benchmark * b)251 void ConstantRateConsumerArgs(benchmark::internal::Benchmark* b) {
252   int min_speed = IsBenchmarkFunctionalOnly() ? 128 : 1;
253   int max_speed = IsBenchmarkFunctionalOnly() ? 128 : 2;
254   for (int speed = min_speed; speed <= max_speed; speed *= 2) {
255     b->Args({2, speed});
256     b->Args({4, speed});
257   }
258 }
259 
260 }  // namespace
261 
BM_EndToEnd_Producer_SaturateCpu(benchmark::State & state)262 static void BM_EndToEnd_Producer_SaturateCpu(benchmark::State& state) {
263   BenchmarkProducer(state);
264 }
265 
266 BENCHMARK(BM_EndToEnd_Producer_SaturateCpu)
267     ->Unit(benchmark::kMicrosecond)
268     ->UseRealTime()
269     ->Apply(SaturateCpuProducerArgs);
270 
BM_EndToEnd_Producer_ConstantRate(benchmark::State & state)271 static void BM_EndToEnd_Producer_ConstantRate(benchmark::State& state) {
272   BenchmarkProducer(state);
273 }
274 
275 BENCHMARK(BM_EndToEnd_Producer_ConstantRate)
276     ->Unit(benchmark::kMicrosecond)
277     ->UseRealTime()
278     ->Apply(ConstantRateProducerArgs);
279 
BM_EndToEnd_Consumer_SaturateCpu(benchmark::State & state)280 static void BM_EndToEnd_Consumer_SaturateCpu(benchmark::State& state) {
281   BenchmarkConsumer(state);
282 }
283 
284 BENCHMARK(BM_EndToEnd_Consumer_SaturateCpu)
285     ->Unit(benchmark::kMicrosecond)
286     ->UseRealTime()
287     ->Apply(SaturateCpuConsumerArgs);
288 
BM_EndToEnd_Consumer_ConstantRate(benchmark::State & state)289 static void BM_EndToEnd_Consumer_ConstantRate(benchmark::State& state) {
290   BenchmarkConsumer(state);
291 }
292 
293 BENCHMARK(BM_EndToEnd_Consumer_ConstantRate)
294     ->Unit(benchmark::kMillisecond)
295     ->UseRealTime()
296     ->Apply(ConstantRateConsumerArgs);
297 
298 }  // namespace perfetto
299