xref: /aosp_15_r20/external/google-benchmark/test/perf_counters_gtest.cc (revision dbb99499c3810fa1611fa2242a2fc446be01a57c)
1 #include <random>
2 #include <thread>
3 
4 #include "../src/perf_counters.h"
5 #include "gmock/gmock.h"
6 #include "gtest/gtest.h"
7 
8 #ifndef GTEST_SKIP
9 struct MsgHandler {
operator =MsgHandler10   void operator=(std::ostream&) {}
11 };
12 #define GTEST_SKIP() return MsgHandler() = std::cout
13 #endif
14 
15 using benchmark::internal::PerfCounters;
16 using benchmark::internal::PerfCountersMeasurement;
17 using benchmark::internal::PerfCounterValues;
18 using ::testing::AllOf;
19 using ::testing::Gt;
20 using ::testing::Lt;
21 
22 namespace {
23 const char kGenericPerfEvent1[] = "CYCLES";
24 const char kGenericPerfEvent2[] = "INSTRUCTIONS";
25 
TEST(PerfCountersTest,Init)26 TEST(PerfCountersTest, Init) {
27   EXPECT_EQ(PerfCounters::Initialize(), PerfCounters::kSupported);
28 }
29 
TEST(PerfCountersTest,OneCounter)30 TEST(PerfCountersTest, OneCounter) {
31   if (!PerfCounters::kSupported) {
32     GTEST_SKIP() << "Performance counters not supported.\n";
33   }
34   EXPECT_TRUE(PerfCounters::Initialize());
35   EXPECT_EQ(PerfCounters::Create({kGenericPerfEvent1}).num_counters(), 1);
36 }
37 
TEST(PerfCountersTest,NegativeTest)38 TEST(PerfCountersTest, NegativeTest) {
39   if (!PerfCounters::kSupported) {
40     EXPECT_FALSE(PerfCounters::Initialize());
41     return;
42   }
43   EXPECT_TRUE(PerfCounters::Initialize());
44   // Safety checks
45   // Create() will always create a valid object, even if passed no or
46   // wrong arguments as the new behavior is to warn and drop unsupported
47   // counters
48   EXPECT_EQ(PerfCounters::Create({}).num_counters(), 0);
49   EXPECT_EQ(PerfCounters::Create({""}).num_counters(), 0);
50   EXPECT_EQ(PerfCounters::Create({"not a counter name"}).num_counters(), 0);
51   {
52     // Try sneaking in a bad egg to see if it is filtered out. The
53     // number of counters has to be two, not zero
54     auto counter =
55         PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1});
56     EXPECT_EQ(counter.num_counters(), 2);
57     EXPECT_EQ(counter.names(), std::vector<std::string>(
58                                    {kGenericPerfEvent2, kGenericPerfEvent1}));
59   }
60   {
61     // Try sneaking in an outrageous counter, like a fat finger mistake
62     auto counter = PerfCounters::Create(
63         {kGenericPerfEvent2, "not a counter name", kGenericPerfEvent1});
64     EXPECT_EQ(counter.num_counters(), 2);
65     EXPECT_EQ(counter.names(), std::vector<std::string>(
66                                    {kGenericPerfEvent2, kGenericPerfEvent1}));
67   }
68   {
69     // Finally try a golden input - it should like both of them
70     EXPECT_EQ(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2})
71                   .num_counters(),
72               2);
73   }
74   {
75     // Add a bad apple in the end of the chain to check the edges
76     auto counter = PerfCounters::Create(
77         {kGenericPerfEvent1, kGenericPerfEvent2, "bad event name"});
78     EXPECT_EQ(counter.num_counters(), 2);
79     EXPECT_EQ(counter.names(), std::vector<std::string>(
80                                    {kGenericPerfEvent1, kGenericPerfEvent2}));
81   }
82 }
83 
TEST(PerfCountersTest,Read1Counter)84 TEST(PerfCountersTest, Read1Counter) {
85   if (!PerfCounters::kSupported) {
86     GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
87   }
88   EXPECT_TRUE(PerfCounters::Initialize());
89   auto counters = PerfCounters::Create({kGenericPerfEvent1});
90   EXPECT_EQ(counters.num_counters(), 1);
91   PerfCounterValues values1(1);
92   EXPECT_TRUE(counters.Snapshot(&values1));
93   EXPECT_GT(values1[0], 0);
94   PerfCounterValues values2(1);
95   EXPECT_TRUE(counters.Snapshot(&values2));
96   EXPECT_GT(values2[0], 0);
97   EXPECT_GT(values2[0], values1[0]);
98 }
99 
TEST(PerfCountersTest,Read2Counters)100 TEST(PerfCountersTest, Read2Counters) {
101   if (!PerfCounters::kSupported) {
102     GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
103   }
104   EXPECT_TRUE(PerfCounters::Initialize());
105   auto counters =
106       PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
107   EXPECT_EQ(counters.num_counters(), 2);
108   PerfCounterValues values1(2);
109   EXPECT_TRUE(counters.Snapshot(&values1));
110   EXPECT_GT(values1[0], 0);
111   EXPECT_GT(values1[1], 0);
112   PerfCounterValues values2(2);
113   EXPECT_TRUE(counters.Snapshot(&values2));
114   EXPECT_GT(values2[0], 0);
115   EXPECT_GT(values2[1], 0);
116 }
117 
TEST(PerfCountersTest,ReopenExistingCounters)118 TEST(PerfCountersTest, ReopenExistingCounters) {
119   // This test works in recent and old Intel hardware, Pixel 3, and Pixel 6.
120   // However we cannot make assumptions beyond 2 HW counters due to Pixel 6.
121   if (!PerfCounters::kSupported) {
122     GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
123   }
124   EXPECT_TRUE(PerfCounters::Initialize());
125   std::vector<std::string> kMetrics({kGenericPerfEvent1});
126   std::vector<PerfCounters> counters(2);
127   for (auto& counter : counters) {
128     counter = PerfCounters::Create(kMetrics);
129   }
130   PerfCounterValues values(1);
131   EXPECT_TRUE(counters[0].Snapshot(&values));
132   EXPECT_TRUE(counters[1].Snapshot(&values));
133 }
134 
TEST(PerfCountersTest,CreateExistingMeasurements)135 TEST(PerfCountersTest, CreateExistingMeasurements) {
136   // The test works (i.e. causes read to fail) for the assumptions
137   // about hardware capabilities (i.e. small number (2) hardware
138   // counters) at this date,
139   // the same as previous test ReopenExistingCounters.
140   if (!PerfCounters::kSupported) {
141     GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
142   }
143   EXPECT_TRUE(PerfCounters::Initialize());
144 
145   // This means we will try 10 counters but we can only guarantee
146   // for sure at this time that only 3 will work. Perhaps in the future
147   // we could use libpfm to query for the hardware limits on this
148   // particular platform.
149   const int kMaxCounters = 10;
150   const int kMinValidCounters = 2;
151 
152   // Let's use a ubiquitous counter that is guaranteed to work
153   // on all platforms
154   const std::vector<std::string> kMetrics{"cycles"};
155 
156   // Cannot create a vector of actual objects because the
157   // copy constructor of PerfCounters is deleted - and so is
158   // implicitly deleted on PerfCountersMeasurement too
159   std::vector<std::unique_ptr<PerfCountersMeasurement>>
160       perf_counter_measurements;
161 
162   perf_counter_measurements.reserve(kMaxCounters);
163   for (int j = 0; j < kMaxCounters; ++j) {
164     perf_counter_measurements.emplace_back(
165         new PerfCountersMeasurement(kMetrics));
166   }
167 
168   std::vector<std::pair<std::string, double>> measurements;
169 
170   // Start all counters together to see if they hold
171   size_t max_counters = kMaxCounters;
172   for (size_t i = 0; i < kMaxCounters; ++i) {
173     auto& counter(*perf_counter_measurements[i]);
174     EXPECT_EQ(counter.num_counters(), 1);
175     if (!counter.Start()) {
176       max_counters = i;
177       break;
178     };
179   }
180 
181   ASSERT_GE(max_counters, kMinValidCounters);
182 
183   // Start all together
184   for (size_t i = 0; i < max_counters; ++i) {
185     auto& counter(*perf_counter_measurements[i]);
186     EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters));
187   }
188 
189   // Start/stop individually
190   for (size_t i = 0; i < max_counters; ++i) {
191     auto& counter(*perf_counter_measurements[i]);
192     measurements.clear();
193     counter.Start();
194     EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters));
195   }
196 }
197 
198 // We try to do some meaningful work here but the compiler
199 // insists in optimizing away our loop so we had to add a
200 // no-optimize macro. In case it fails, we added some entropy
201 // to this pool as well.
202 
do_work()203 BENCHMARK_DONT_OPTIMIZE size_t do_work() {
204   static std::mt19937 rd{std::random_device{}()};
205   static std::uniform_int_distribution<size_t> mrand(0, 10);
206   const size_t kNumLoops = 1000000;
207   size_t sum = 0;
208   for (size_t j = 0; j < kNumLoops; ++j) {
209     sum += mrand(rd);
210   }
211   benchmark::DoNotOptimize(sum);
212   return sum;
213 }
214 
measure(size_t threadcount,PerfCounterValues * before,PerfCounterValues * after)215 void measure(size_t threadcount, PerfCounterValues* before,
216              PerfCounterValues* after) {
217   BM_CHECK_NE(before, nullptr);
218   BM_CHECK_NE(after, nullptr);
219   std::vector<std::thread> threads(threadcount);
220   auto work = [&]() { BM_CHECK(do_work() > 1000); };
221 
222   // We need to first set up the counters, then start the threads, so the
223   // threads would inherit the counters. But later, we need to first destroy
224   // the thread pool (so all the work finishes), then measure the counters. So
225   // the scopes overlap, and we need to explicitly control the scope of the
226   // threadpool.
227   auto counters =
228       PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
229   for (auto& t : threads) t = std::thread(work);
230   counters.Snapshot(before);
231   for (auto& t : threads) t.join();
232   counters.Snapshot(after);
233 }
234 
TEST(PerfCountersTest,MultiThreaded)235 TEST(PerfCountersTest, MultiThreaded) {
236   if (!PerfCounters::kSupported) {
237     GTEST_SKIP() << "Test skipped because libpfm is not supported.";
238   }
239   EXPECT_TRUE(PerfCounters::Initialize());
240   PerfCounterValues before(2);
241   PerfCounterValues after(2);
242 
243   // Notice that this test will work even if we taskset it to a single CPU
244   // In this case the threads will run sequentially
245   // Start two threads and measure the number of combined cycles and
246   // instructions
247   measure(2, &before, &after);
248   std::vector<double> Elapsed2Threads{
249       static_cast<double>(after[0] - before[0]),
250       static_cast<double>(after[1] - before[1])};
251 
252   // Start four threads and measure the number of combined cycles and
253   // instructions
254   measure(4, &before, &after);
255   std::vector<double> Elapsed4Threads{
256       static_cast<double>(after[0] - before[0]),
257       static_cast<double>(after[1] - before[1])};
258 
259   // The following expectations fail (at least on a beefy workstation with lots
260   // of cpus) - it seems that in some circumstances the runtime of 4 threads
261   // can even be better than with 2.
262   // So instead of expecting 4 threads to be slower, let's just make sure they
263   // do not differ too much in general (one is not more than 10x than the
264   // other).
265   EXPECT_THAT(Elapsed4Threads[0] / Elapsed2Threads[0], AllOf(Gt(0.1), Lt(10)));
266   EXPECT_THAT(Elapsed4Threads[1] / Elapsed2Threads[1], AllOf(Gt(0.1), Lt(10)));
267 }
268 
TEST(PerfCountersTest,HardwareLimits)269 TEST(PerfCountersTest, HardwareLimits) {
270   // The test works (i.e. causes read to fail) for the assumptions
271   // about hardware capabilities (i.e. small number (3-4) hardware
272   // counters) at this date,
273   // the same as previous test ReopenExistingCounters.
274   if (!PerfCounters::kSupported) {
275     GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
276   }
277   EXPECT_TRUE(PerfCounters::Initialize());
278 
279   // Taken from `perf list`, but focusses only on those HW events that actually
280   // were reported when running `sudo perf stat -a sleep 10`, intersected over
281   // several platforms. All HW events listed in the first command not reported
282   // in the second seem to not work. This is sad as we don't really get to test
283   // the grouping here (groups can contain up to 6 members)...
284   std::vector<std::string> counter_names{
285       "cycles",         // leader
286       "instructions",   //
287       "branch-misses",  //
288   };
289 
290   // In the off-chance that some of these values are not supported,
291   // we filter them out so the test will complete without failure
292   // albeit it might not actually test the grouping on that platform
293   std::vector<std::string> valid_names;
294   for (const std::string& name : counter_names) {
295     if (PerfCounters::IsCounterSupported(name)) {
296       valid_names.push_back(name);
297     }
298   }
299   PerfCountersMeasurement counter(valid_names);
300 
301   std::vector<std::pair<std::string, double>> measurements;
302 
303   counter.Start();
304   EXPECT_TRUE(counter.Stop(measurements));
305 }
306 
307 }  // namespace
308