1
2 #include <gtest/gtest.h>
3
4 #include <torch/csrc/profiler/events.h>
5 #include <torch/csrc/profiler/perf.h>
6
calc_pi()7 double calc_pi() {
8 volatile double pi = 1.0;
9 for (int i = 3; i < 100000; i += 2) {
10 pi += (((i + 1) >> 1) % 2) ? 1.0 / i : -1.0 / i;
11 }
12 return pi * 4.0;
13 }
14
TEST(ProfilerTest,LinuxPerf)15 TEST(ProfilerTest, LinuxPerf) {
16 torch::profiler::impl::linux_perf::PerfProfiler profiler;
17
18 std::vector<std::string> standard_events(
19 std::begin(torch::profiler::ProfilerPerfEvents),
20 std::end(torch::profiler::ProfilerPerfEvents));
21 torch::profiler::perf_counters_t counters;
22 counters.resize(standard_events.size(), 0);
23
24 // Use try..catch HACK to check TORCH_CHECK because we don't yet fail
25 // gracefully if the syscall were to fail
26 try {
27 profiler.Configure(standard_events);
28
29 profiler.Enable();
30 auto pi = calc_pi();
31 profiler.Disable(counters);
32 } catch (const c10::Error&) {
33 // Bail here if something bad happened during the profiling, we don't want
34 // to make the test fail
35 return;
36 } catch (...) {
37 // something else went wrong - this should be reported
38 ASSERT_EQ(0, 1);
39 }
40
41 // Should have counted something if worked, so lets test that
42 // And if it not supported the counters should be zeros.
43 #if defined(__ANDROID__) || defined(__linux__)
44 for (auto counter : counters) {
45 ASSERT_GT(counter, 0);
46 }
47 #else /* __ANDROID__ || __linux__ */
48 for (auto counter : counters) {
49 ASSERT_EQ(counter, 0);
50 }
51 #endif /* __ANDROID__ || __linux__ */
52 }
53
TEST(ProfilerTest,LinuxPerfNestedDepth)54 TEST(ProfilerTest, LinuxPerfNestedDepth) {
55 torch::profiler::impl::linux_perf::PerfProfiler profiler;
56
57 // Only monotonically increasing events will work
58 std::vector<std::string> standard_events(
59 std::begin(torch::profiler::ProfilerPerfEvents),
60 std::end(torch::profiler::ProfilerPerfEvents));
61
62 torch::profiler::perf_counters_t counters_A;
63 torch::profiler::perf_counters_t counters_B;
64 torch::profiler::perf_counters_t counters_C;
65
66 counters_A.resize(standard_events.size(), 0);
67 counters_B.resize(standard_events.size(), 0);
68 counters_C.resize(standard_events.size(), 0);
69
70 // Use try..catch HACK to check TORCH_CHECK because we don't yet fail
71 // gracefully if the syscall were to fail
72 try {
73 profiler.Configure(standard_events);
74
75 // * = work kernel calc_pi()
76 //
77 // A --*---+ +--*-- A
78 // | |
79 // | |
80 // B +-*--+ +--*-+ B
81 // | |
82 // | |
83 // C +-*--+ C
84 //
85
86 profiler.Enable();
87 auto A = calc_pi();
88
89 profiler.Enable();
90 auto B = calc_pi();
91
92 profiler.Enable();
93 auto C = calc_pi();
94 profiler.Disable(counters_C);
95
96 auto B2 = calc_pi();
97 profiler.Disable(counters_B);
98
99 auto A2 = calc_pi();
100 profiler.Disable(counters_A);
101 } catch (const c10::Error&) {
102 // Bail here if something bad happened during the profiling, we don't want
103 // to make the test fail
104 return;
105 } catch (...) {
106 // something else went wrong - this should be reported
107 ASSERT_EQ(0, 1);
108 }
109
110 // for each counter, assert A > B > C
111 #if defined(__ANDROID__) || defined(__linux__)
112 for (auto i = 0; i < standard_events.size(); ++i) {
113 ASSERT_GT(counters_A[i], counters_B[i]);
114 ASSERT_GT(counters_A[i], counters_C[i]);
115 ASSERT_GT(counters_B[i], counters_C[i]);
116 ASSERT_GT(counters_A[i], counters_B[i] + counters_C[i]);
117 }
118 #else /* __ANDROID__ || __linux__ */
119 for (auto i = 0; i < standard_events.size(); ++i) {
120 ASSERT_EQ(counters_A[i], 0);
121 ASSERT_EQ(counters_B[i], 0);
122 ASSERT_EQ(counters_C[i], 0);
123 }
124 #endif /* __ANDROID__ || __linux__ */
125 }
126
TEST(ProfilerTest,LinuxPerfNestedMultiple)127 TEST(ProfilerTest, LinuxPerfNestedMultiple) {
128 torch::profiler::impl::linux_perf::PerfProfiler profiler;
129
130 // Only monotonically increasing events will work
131 std::vector<std::string> standard_events(
132 std::begin(torch::profiler::ProfilerPerfEvents),
133 std::end(torch::profiler::ProfilerPerfEvents));
134
135 torch::profiler::perf_counters_t counters_A;
136 torch::profiler::perf_counters_t counters_B;
137 torch::profiler::perf_counters_t counters_C;
138
139 counters_A.resize(standard_events.size(), 0);
140 counters_B.resize(standard_events.size(), 0);
141 counters_C.resize(standard_events.size(), 0);
142
143 // Use try..catch HACK to check TORCH_CHECK because we don't yet fail
144 // gracefully if the syscall were to fail
145 try {
146 profiler.Configure(standard_events);
147
148 // * = work kernel calc_pi()
149 //
150 // A --*---+ +---*----+ +--*-- A
151 // | | | |
152 // | | | |
153 // B +-**-+ B C +-*--+ C
154
155 profiler.Enable();
156 auto A1 = calc_pi();
157
158 profiler.Enable();
159 auto B1 = calc_pi();
160 auto B2 = calc_pi();
161 profiler.Disable(counters_B);
162
163 auto A2 = calc_pi();
164
165 profiler.Enable();
166 auto C1 = calc_pi();
167 profiler.Disable(counters_C);
168
169 auto A3 = calc_pi();
170 profiler.Disable(counters_A);
171 } catch (const c10::Error&) {
172 // Bail here if something bad happened during the profiling, we don't want
173 // to make the test fail
174 return;
175 } catch (...) {
176 // something else went wrong - this should be reported
177 ASSERT_EQ(0, 1);
178 }
179
180 // for each counter, assert A > B > C
181 #if defined(__ANDROID__) || defined(__linux__)
182 for (auto i = 0; i < standard_events.size(); ++i) {
183 ASSERT_GT(counters_A[i], counters_B[i]);
184 ASSERT_GT(counters_A[i], counters_C[i]);
185 ASSERT_GT(counters_B[i], counters_C[i]);
186 ASSERT_GT(counters_A[i], counters_B[i] + counters_C[i]);
187 }
188 #else /* __ANDROID__ || __linux__ */
189 for (auto i = 0; i < standard_events.size(); ++i) {
190 ASSERT_EQ(counters_A[i], 0);
191 ASSERT_EQ(counters_B[i], 0);
192 ASSERT_EQ(counters_C[i], 0);
193 }
194 #endif /* __ANDROID__ || __linux__ */
195 }
196
TEST(ProfilerTest,LinuxPerfNestedSingle)197 TEST(ProfilerTest, LinuxPerfNestedSingle) {
198 torch::profiler::impl::linux_perf::PerfProfiler profiler;
199
200 // Only monotonically increasing events will work
201 std::vector<std::string> standard_events(
202 std::begin(torch::profiler::ProfilerPerfEvents),
203 std::end(torch::profiler::ProfilerPerfEvents));
204
205 torch::profiler::perf_counters_t counters_A;
206 torch::profiler::perf_counters_t counters_B;
207 torch::profiler::perf_counters_t counters_C;
208
209 counters_A.resize(standard_events.size(), 0);
210 counters_B.resize(standard_events.size(), 0);
211 counters_C.resize(standard_events.size(), 0);
212
213 // Use try..catch HACK to check TORCH_CHECK because we don't yet fail
214 // gracefully if the syscall were to fail
215 try {
216 profiler.Configure(standard_events);
217
218 profiler.Enable();
219 profiler.Enable();
220 profiler.Enable();
221 auto A1 = calc_pi();
222 profiler.Disable(counters_C);
223 profiler.Disable(counters_B);
224 profiler.Disable(counters_A);
225 } catch (const c10::Error&) {
226 // Bail here if something bad happened during the profiling, we don't want
227 // to make the test fail
228 return;
229 } catch (...) {
230 // something else went wrong - this should be reported
231 ASSERT_EQ(0, 1);
232 }
233
234 // for each counter, assert A > B > C
235 #if defined(__ANDROID__) || defined(__linux__)
236 for (auto i = 0; i < standard_events.size(); ++i) {
237 ASSERT_GE(counters_A[i], counters_B[i]);
238 ASSERT_GE(counters_A[i], counters_C[i]);
239 ASSERT_GE(counters_B[i], counters_C[i]);
240 }
241 #else /* __ANDROID__ || __linux__ */
242 for (auto i = 0; i < standard_events.size(); ++i) {
243 ASSERT_EQ(counters_A[i], 0);
244 ASSERT_EQ(counters_B[i], 0);
245 ASSERT_EQ(counters_C[i], 0);
246 }
247 #endif /* __ANDROID__ || __linux__ */
248 }
249