xref: /aosp_15_r20/external/pytorch/test/cpp/profiler/perf_events.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 
2 #include <gtest/gtest.h>
3 
4 #include <torch/csrc/profiler/events.h>
5 #include <torch/csrc/profiler/perf.h>
6 
calc_pi()7 double calc_pi() {
8   volatile double pi = 1.0;
9   for (int i = 3; i < 100000; i += 2) {
10     pi += (((i + 1) >> 1) % 2) ? 1.0 / i : -1.0 / i;
11   }
12   return pi * 4.0;
13 }
14 
TEST(ProfilerTest,LinuxPerf)15 TEST(ProfilerTest, LinuxPerf) {
16   torch::profiler::impl::linux_perf::PerfProfiler profiler;
17 
18   std::vector<std::string> standard_events(
19       std::begin(torch::profiler::ProfilerPerfEvents),
20       std::end(torch::profiler::ProfilerPerfEvents));
21   torch::profiler::perf_counters_t counters;
22   counters.resize(standard_events.size(), 0);
23 
24   // Use try..catch HACK to check TORCH_CHECK because we don't yet fail
25   // gracefully if the syscall were to fail
26   try {
27     profiler.Configure(standard_events);
28 
29     profiler.Enable();
30     auto pi = calc_pi();
31     profiler.Disable(counters);
32   } catch (const c10::Error&) {
33     // Bail here if something bad happened during the profiling, we don't want
34     // to make the test fail
35     return;
36   } catch (...) {
37     // something else went wrong - this should be reported
38     ASSERT_EQ(0, 1);
39   }
40 
41   // Should have counted something if worked, so lets test that
42   // And if it not supported the counters should be zeros.
43 #if defined(__ANDROID__) || defined(__linux__)
44   for (auto counter : counters) {
45     ASSERT_GT(counter, 0);
46   }
47 #else /* __ANDROID__ || __linux__ */
48   for (auto counter : counters) {
49     ASSERT_EQ(counter, 0);
50   }
51 #endif /* __ANDROID__ || __linux__ */
52 }
53 
TEST(ProfilerTest,LinuxPerfNestedDepth)54 TEST(ProfilerTest, LinuxPerfNestedDepth) {
55   torch::profiler::impl::linux_perf::PerfProfiler profiler;
56 
57   // Only monotonically increasing events will work
58   std::vector<std::string> standard_events(
59       std::begin(torch::profiler::ProfilerPerfEvents),
60       std::end(torch::profiler::ProfilerPerfEvents));
61 
62   torch::profiler::perf_counters_t counters_A;
63   torch::profiler::perf_counters_t counters_B;
64   torch::profiler::perf_counters_t counters_C;
65 
66   counters_A.resize(standard_events.size(), 0);
67   counters_B.resize(standard_events.size(), 0);
68   counters_C.resize(standard_events.size(), 0);
69 
70   // Use try..catch HACK to check TORCH_CHECK because we don't yet fail
71   // gracefully if the syscall were to fail
72   try {
73     profiler.Configure(standard_events);
74 
75     // * = work kernel calc_pi()
76     //
77     // A --*---+              +--*-- A
78     //         |              |
79     //         |              |
80     //       B +-*--+    +--*-+ B
81     //              |    |
82     //              |    |
83     //            C +-*--+ C
84     //
85 
86     profiler.Enable();
87     auto A = calc_pi();
88 
89     profiler.Enable();
90     auto B = calc_pi();
91 
92     profiler.Enable();
93     auto C = calc_pi();
94     profiler.Disable(counters_C);
95 
96     auto B2 = calc_pi();
97     profiler.Disable(counters_B);
98 
99     auto A2 = calc_pi();
100     profiler.Disable(counters_A);
101   } catch (const c10::Error&) {
102     // Bail here if something bad happened during the profiling, we don't want
103     // to make the test fail
104     return;
105   } catch (...) {
106     // something else went wrong - this should be reported
107     ASSERT_EQ(0, 1);
108   }
109 
110 // for each counter, assert A > B > C
111 #if defined(__ANDROID__) || defined(__linux__)
112   for (auto i = 0; i < standard_events.size(); ++i) {
113     ASSERT_GT(counters_A[i], counters_B[i]);
114     ASSERT_GT(counters_A[i], counters_C[i]);
115     ASSERT_GT(counters_B[i], counters_C[i]);
116     ASSERT_GT(counters_A[i], counters_B[i] + counters_C[i]);
117   }
118 #else /* __ANDROID__ || __linux__ */
119   for (auto i = 0; i < standard_events.size(); ++i) {
120     ASSERT_EQ(counters_A[i], 0);
121     ASSERT_EQ(counters_B[i], 0);
122     ASSERT_EQ(counters_C[i], 0);
123   }
124 #endif /* __ANDROID__ || __linux__ */
125 }
126 
TEST(ProfilerTest,LinuxPerfNestedMultiple)127 TEST(ProfilerTest, LinuxPerfNestedMultiple) {
128   torch::profiler::impl::linux_perf::PerfProfiler profiler;
129 
130   // Only monotonically increasing events will work
131   std::vector<std::string> standard_events(
132       std::begin(torch::profiler::ProfilerPerfEvents),
133       std::end(torch::profiler::ProfilerPerfEvents));
134 
135   torch::profiler::perf_counters_t counters_A;
136   torch::profiler::perf_counters_t counters_B;
137   torch::profiler::perf_counters_t counters_C;
138 
139   counters_A.resize(standard_events.size(), 0);
140   counters_B.resize(standard_events.size(), 0);
141   counters_C.resize(standard_events.size(), 0);
142 
143   // Use try..catch HACK to check TORCH_CHECK because we don't yet fail
144   // gracefully if the syscall were to fail
145   try {
146     profiler.Configure(standard_events);
147 
148     // * = work kernel calc_pi()
149     //
150     // A --*---+    +---*----+    +--*-- A
151     //         |    |        |    |
152     //         |    |        |    |
153     //      B  +-**-+ B    C +-*--+ C
154 
155     profiler.Enable();
156     auto A1 = calc_pi();
157 
158     profiler.Enable();
159     auto B1 = calc_pi();
160     auto B2 = calc_pi();
161     profiler.Disable(counters_B);
162 
163     auto A2 = calc_pi();
164 
165     profiler.Enable();
166     auto C1 = calc_pi();
167     profiler.Disable(counters_C);
168 
169     auto A3 = calc_pi();
170     profiler.Disable(counters_A);
171   } catch (const c10::Error&) {
172     // Bail here if something bad happened during the profiling, we don't want
173     // to make the test fail
174     return;
175   } catch (...) {
176     // something else went wrong - this should be reported
177     ASSERT_EQ(0, 1);
178   }
179 
180 // for each counter, assert A > B > C
181 #if defined(__ANDROID__) || defined(__linux__)
182   for (auto i = 0; i < standard_events.size(); ++i) {
183     ASSERT_GT(counters_A[i], counters_B[i]);
184     ASSERT_GT(counters_A[i], counters_C[i]);
185     ASSERT_GT(counters_B[i], counters_C[i]);
186     ASSERT_GT(counters_A[i], counters_B[i] + counters_C[i]);
187   }
188 #else /* __ANDROID__ || __linux__ */
189   for (auto i = 0; i < standard_events.size(); ++i) {
190     ASSERT_EQ(counters_A[i], 0);
191     ASSERT_EQ(counters_B[i], 0);
192     ASSERT_EQ(counters_C[i], 0);
193   }
194 #endif /* __ANDROID__ || __linux__ */
195 }
196 
TEST(ProfilerTest,LinuxPerfNestedSingle)197 TEST(ProfilerTest, LinuxPerfNestedSingle) {
198   torch::profiler::impl::linux_perf::PerfProfiler profiler;
199 
200   // Only monotonically increasing events will work
201   std::vector<std::string> standard_events(
202       std::begin(torch::profiler::ProfilerPerfEvents),
203       std::end(torch::profiler::ProfilerPerfEvents));
204 
205   torch::profiler::perf_counters_t counters_A;
206   torch::profiler::perf_counters_t counters_B;
207   torch::profiler::perf_counters_t counters_C;
208 
209   counters_A.resize(standard_events.size(), 0);
210   counters_B.resize(standard_events.size(), 0);
211   counters_C.resize(standard_events.size(), 0);
212 
213   // Use try..catch HACK to check TORCH_CHECK because we don't yet fail
214   // gracefully if the syscall were to fail
215   try {
216     profiler.Configure(standard_events);
217 
218     profiler.Enable();
219     profiler.Enable();
220     profiler.Enable();
221     auto A1 = calc_pi();
222     profiler.Disable(counters_C);
223     profiler.Disable(counters_B);
224     profiler.Disable(counters_A);
225   } catch (const c10::Error&) {
226     // Bail here if something bad happened during the profiling, we don't want
227     // to make the test fail
228     return;
229   } catch (...) {
230     // something else went wrong - this should be reported
231     ASSERT_EQ(0, 1);
232   }
233 
234 // for each counter, assert A > B > C
235 #if defined(__ANDROID__) || defined(__linux__)
236   for (auto i = 0; i < standard_events.size(); ++i) {
237     ASSERT_GE(counters_A[i], counters_B[i]);
238     ASSERT_GE(counters_A[i], counters_C[i]);
239     ASSERT_GE(counters_B[i], counters_C[i]);
240   }
241 #else /* __ANDROID__ || __linux__ */
242   for (auto i = 0; i < standard_events.size(); ++i) {
243     ASSERT_EQ(counters_A[i], 0);
244     ASSERT_EQ(counters_B[i], 0);
245     ASSERT_EQ(counters_C[i], 0);
246   }
247 #endif /* __ANDROID__ || __linux__ */
248 }
249