1 // Copyright 2021 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "perf_counters.h"
16
17 #include <cstring>
18 #include <memory>
19 #include <vector>
20
21 #if defined HAVE_LIBPFM
22 #include "perfmon/pfmlib.h"
23 #include "perfmon/pfmlib_perf_event.h"
24 #endif
25
26 namespace benchmark {
27 namespace internal {
28
29 constexpr size_t PerfCounterValues::kMaxCounters;
30
31 #if defined HAVE_LIBPFM
32
Read(const std::vector<int> & leaders)33 size_t PerfCounterValues::Read(const std::vector<int>& leaders) {
34 // Create a pointer for multiple reads
35 const size_t bufsize = values_.size() * sizeof(values_[0]);
36 char* ptr = reinterpret_cast<char*>(values_.data());
37 size_t size = bufsize;
38 for (int lead : leaders) {
39 auto read_bytes = ::read(lead, ptr, size);
40 if (read_bytes >= ssize_t(sizeof(uint64_t))) {
41 // Actual data bytes are all bytes minus initial padding
42 std::size_t data_bytes =
43 static_cast<std::size_t>(read_bytes) - sizeof(uint64_t);
44 // This should be very cheap since it's in hot cache
45 std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes);
46 // Increment our counters
47 ptr += data_bytes;
48 size -= data_bytes;
49 } else {
50 int err = errno;
51 GetErrorLogInstance() << "Error reading lead " << lead << " errno:" << err
52 << " " << ::strerror(err) << "\n";
53 return 0;
54 }
55 }
56 return (bufsize - size) / sizeof(uint64_t);
57 }
58
59 const bool PerfCounters::kSupported = true;
60
61 // Initializes libpfm only on the first call. Returns whether that single
62 // initialization was successful.
Initialize()63 bool PerfCounters::Initialize() {
64 // Function-scope static gets initialized only once on first call.
65 static const bool success = []() {
66 return pfm_initialize() == PFM_SUCCESS;
67 }();
68 return success;
69 }
70
IsCounterSupported(const std::string & name)71 bool PerfCounters::IsCounterSupported(const std::string& name) {
72 Initialize();
73 perf_event_attr_t attr;
74 std::memset(&attr, 0, sizeof(attr));
75 pfm_perf_encode_arg_t arg;
76 std::memset(&arg, 0, sizeof(arg));
77 arg.attr = &attr;
78 const int mode = PFM_PLM3; // user mode only
79 int ret = pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT_EXT,
80 &arg);
81 return (ret == PFM_SUCCESS);
82 }
83
Create(const std::vector<std::string> & counter_names)84 PerfCounters PerfCounters::Create(
85 const std::vector<std::string>& counter_names) {
86 if (!counter_names.empty()) {
87 Initialize();
88 }
89
90 // Valid counters will populate these arrays but we start empty
91 std::vector<std::string> valid_names;
92 std::vector<int> counter_ids;
93 std::vector<int> leader_ids;
94
95 // Resize to the maximum possible
96 valid_names.reserve(counter_names.size());
97 counter_ids.reserve(counter_names.size());
98
99 const int kCounterMode = PFM_PLM3; // user mode only
100
101 // Group leads will be assigned on demand. The idea is that once we cannot
102 // create a counter descriptor, the reason is that this group has maxed out
103 // so we set the group_id again to -1 and retry - giving the algorithm a
104 // chance to create a new group leader to hold the next set of counters.
105 int group_id = -1;
106
107 // Loop through all performance counters
108 for (size_t i = 0; i < counter_names.size(); ++i) {
109 // we are about to push into the valid names vector
110 // check if we did not reach the maximum
111 if (valid_names.size() == PerfCounterValues::kMaxCounters) {
112 // Log a message if we maxed out and stop adding
113 GetErrorLogInstance()
114 << counter_names.size() << " counters were requested. The maximum is "
115 << PerfCounterValues::kMaxCounters << " and " << valid_names.size()
116 << " were already added. All remaining counters will be ignored\n";
117 // stop the loop and return what we have already
118 break;
119 }
120
121 // Check if this name is empty
122 const auto& name = counter_names[i];
123 if (name.empty()) {
124 GetErrorLogInstance()
125 << "A performance counter name was the empty string\n";
126 continue;
127 }
128
129 // Here first means first in group, ie the group leader
130 const bool is_first = (group_id < 0);
131
132 // This struct will be populated by libpfm from the counter string
133 // and then fed into the syscall perf_event_open
134 struct perf_event_attr attr {};
135 attr.size = sizeof(attr);
136
137 // This is the input struct to libpfm.
138 pfm_perf_encode_arg_t arg{};
139 arg.attr = &attr;
140 const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode,
141 PFM_OS_PERF_EVENT, &arg);
142 if (pfm_get != PFM_SUCCESS) {
143 GetErrorLogInstance()
144 << "Unknown performance counter name: " << name << "\n";
145 continue;
146 }
147
148 // We then proceed to populate the remaining fields in our attribute struct
149 // Note: the man page for perf_event_create suggests inherit = true and
150 // read_format = PERF_FORMAT_GROUP don't work together, but that's not the
151 // case.
152 attr.disabled = is_first;
153 attr.inherit = true;
154 attr.pinned = is_first;
155 attr.exclude_kernel = true;
156 attr.exclude_user = false;
157 attr.exclude_hv = true;
158
159 // Read all counters in a group in one read.
160 attr.read_format = PERF_FORMAT_GROUP; //| PERF_FORMAT_TOTAL_TIME_ENABLED |
161 // PERF_FORMAT_TOTAL_TIME_RUNNING;
162
163 int id = -1;
164 while (id < 0) {
165 static constexpr size_t kNrOfSyscallRetries = 5;
166 // Retry syscall as it was interrupted often (b/64774091).
167 for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
168 ++num_retries) {
169 id = perf_event_open(&attr, 0, -1, group_id, 0);
170 if (id >= 0 || errno != EINTR) {
171 break;
172 }
173 }
174 if (id < 0) {
175 // If the file descriptor is negative we might have reached a limit
176 // in the current group. Set the group_id to -1 and retry
177 if (group_id >= 0) {
178 // Create a new group
179 group_id = -1;
180 } else {
181 // At this point we have already retried to set a new group id and
182 // failed. We then give up.
183 break;
184 }
185 }
186 }
187
188 // We failed to get a new file descriptor. We might have reached a hard
189 // hardware limit that cannot be resolved even with group multiplexing
190 if (id < 0) {
191 GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor "
192 "for performance counter "
193 << name << ". Ignoring\n";
194
195 // We give up on this counter but try to keep going
196 // as the others would be fine
197 continue;
198 }
199 if (group_id < 0) {
200 // This is a leader, store and assign it to the current file descriptor
201 leader_ids.push_back(id);
202 group_id = id;
203 }
204 // This is a valid counter, add it to our descriptor's list
205 counter_ids.push_back(id);
206 valid_names.push_back(name);
207 }
208
209 // Loop through all group leaders activating them
210 // There is another option of starting ALL counters in a process but
211 // that would be far reaching an intrusion. If the user is using PMCs
212 // by themselves then this would have a side effect on them. It is
213 // friendlier to loop through all groups individually.
214 for (int lead : leader_ids) {
215 if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) {
216 // This should never happen but if it does, we give up on the
217 // entire batch as recovery would be a mess.
218 GetErrorLogInstance() << "***WARNING*** Failed to start counters. "
219 "Claring out all counters.\n";
220
221 // Close all performance counters
222 for (int id : counter_ids) {
223 ::close(id);
224 }
225
226 // Return an empty object so our internal state is still good and
227 // the process can continue normally without impact
228 return NoCounters();
229 }
230 }
231
232 return PerfCounters(std::move(valid_names), std::move(counter_ids),
233 std::move(leader_ids));
234 }
235
CloseCounters() const236 void PerfCounters::CloseCounters() const {
237 if (counter_ids_.empty()) {
238 return;
239 }
240 for (int lead : leader_ids_) {
241 ioctl(lead, PERF_EVENT_IOC_DISABLE);
242 }
243 for (int fd : counter_ids_) {
244 close(fd);
245 }
246 }
247 #else // defined HAVE_LIBPFM
Read(const std::vector<int> &)248 size_t PerfCounterValues::Read(const std::vector<int>&) { return 0; }
249
250 const bool PerfCounters::kSupported = false;
251
Initialize()252 bool PerfCounters::Initialize() { return false; }
253
IsCounterSupported(const std::string &)254 bool PerfCounters::IsCounterSupported(const std::string&) { return false; }
255
Create(const std::vector<std::string> & counter_names)256 PerfCounters PerfCounters::Create(
257 const std::vector<std::string>& counter_names) {
258 if (!counter_names.empty()) {
259 GetErrorLogInstance() << "Performance counters not supported.\n";
260 }
261 return NoCounters();
262 }
263
CloseCounters() const264 void PerfCounters::CloseCounters() const {}
265 #endif // defined HAVE_LIBPFM
266
PerfCountersMeasurement(const std::vector<std::string> & counter_names)267 PerfCountersMeasurement::PerfCountersMeasurement(
268 const std::vector<std::string>& counter_names)
269 : start_values_(counter_names.size()), end_values_(counter_names.size()) {
270 counters_ = PerfCounters::Create(counter_names);
271 }
272
operator =(PerfCounters && other)273 PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
274 if (this != &other) {
275 CloseCounters();
276
277 counter_ids_ = std::move(other.counter_ids_);
278 leader_ids_ = std::move(other.leader_ids_);
279 counter_names_ = std::move(other.counter_names_);
280 }
281 return *this;
282 }
283 } // namespace internal
284 } // namespace benchmark
285