1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "perfetto/ext/base/platform.h"
18 #include "perfetto/ext/base/watchdog.h"
19
20 #if PERFETTO_BUILDFLAG(PERFETTO_WATCHDOG)
21
22 #include <fcntl.h>
23 #include <poll.h>
24 #include <signal.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <sys/syscall.h>
28 #include <sys/timerfd.h>
29 #include <unistd.h>
30
31 #include <algorithm>
32 #include <cinttypes>
33 #include <fstream>
34 #include <thread>
35
36 #include "perfetto/base/build_config.h"
37 #include "perfetto/base/logging.h"
38 #include "perfetto/base/thread_utils.h"
39 #include "perfetto/base/time.h"
40 #include "perfetto/ext/base/crash_keys.h"
41 #include "perfetto/ext/base/file_utils.h"
42 #include "perfetto/ext/base/scoped_file.h"
43 #include "perfetto/ext/base/utils.h"
44
45 namespace perfetto {
46 namespace base {
47
48 namespace {
49
50 constexpr uint32_t kDefaultPollingInterval = 30 * 1000;
51
52 base::CrashKey g_crash_key_reason("wdog_reason");
53
IsMultipleOf(uint32_t number,uint32_t divisor)54 bool IsMultipleOf(uint32_t number, uint32_t divisor) {
55 return number >= divisor && number % divisor == 0;
56 }
57
MeanForArray(const uint64_t array[],size_t size)58 double MeanForArray(const uint64_t array[], size_t size) {
59 uint64_t total = 0;
60 for (size_t i = 0; i < size; i++) {
61 total += array[i];
62 }
63 return static_cast<double>(total / size);
64 }
65
66 } // namespace
67
ReadProcStat(int fd,ProcStat * out)68 bool ReadProcStat(int fd, ProcStat* out) {
69 char c[512];
70 size_t c_pos = 0;
71 while (c_pos < sizeof(c) - 1) {
72 ssize_t rd = PERFETTO_EINTR(read(fd, c + c_pos, sizeof(c) - c_pos));
73 if (rd < 0) {
74 PERFETTO_ELOG("Failed to read stat file to enforce resource limits.");
75 return false;
76 }
77 if (rd == 0)
78 break;
79 c_pos += static_cast<size_t>(rd);
80 }
81 PERFETTO_CHECK(c_pos < sizeof(c));
82 c[c_pos] = '\0';
83
84 if (sscanf(c,
85 "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %lu "
86 "%lu %*d %*d %*d %*d %*d %*d %*u %*u %ld",
87 &out->utime, &out->stime, &out->rss_pages) != 3) {
88 PERFETTO_ELOG("Invalid stat format: %s", c);
89 return false;
90 }
91 return true;
92 }
93
Watchdog(uint32_t polling_interval_ms)94 Watchdog::Watchdog(uint32_t polling_interval_ms)
95 : polling_interval_ms_(polling_interval_ms) {}
96
~Watchdog()97 Watchdog::~Watchdog() {
98 if (!thread_.joinable()) {
99 PERFETTO_DCHECK(!enabled_);
100 return;
101 }
102 PERFETTO_DCHECK(enabled_);
103 enabled_ = false;
104
105 // Rearm the timer to 1ns from now. This will cause the watchdog thread to
106 // wakeup from the poll() and see |enabled_| == false.
107 // This code path is used only in tests. In production code the watchdog is
108 // a singleton and is never destroyed.
109 struct itimerspec ts {};
110 ts.it_value.tv_sec = 0;
111 ts.it_value.tv_nsec = 1;
112 timerfd_settime(*timer_fd_, /*flags=*/0, &ts, nullptr);
113
114 thread_.join();
115 }
116
GetInstance()117 Watchdog* Watchdog::GetInstance() {
118 static Watchdog* watchdog = new Watchdog(kDefaultPollingInterval);
119 return watchdog;
120 }
121
122 // Can be called from any thread.
CreateFatalTimer(uint32_t ms,WatchdogCrashReason crash_reason)123 Watchdog::Timer Watchdog::CreateFatalTimer(uint32_t ms,
124 WatchdogCrashReason crash_reason) {
125 if (!enabled_.load(std::memory_order_relaxed))
126 return Watchdog::Timer(this, 0, crash_reason);
127
128 return Watchdog::Timer(this, ms, crash_reason);
129 }
130
131 // Can be called from any thread.
AddFatalTimer(TimerData timer)132 void Watchdog::AddFatalTimer(TimerData timer) {
133 std::lock_guard<std::mutex> guard(mutex_);
134 timers_.emplace_back(std::move(timer));
135 RearmTimerFd_Locked();
136 }
137
138 // Can be called from any thread.
RemoveFatalTimer(TimerData timer)139 void Watchdog::RemoveFatalTimer(TimerData timer) {
140 std::lock_guard<std::mutex> guard(mutex_);
141 for (auto it = timers_.begin(); it != timers_.end(); it++) {
142 if (*it == timer) {
143 timers_.erase(it);
144 break; // Remove only one. Doesn't matter which one.
145 }
146 }
147 RearmTimerFd_Locked();
148 }
149
RearmTimerFd_Locked()150 void Watchdog::RearmTimerFd_Locked() {
151 if (!enabled_)
152 return;
153 auto it = std::min_element(timers_.begin(), timers_.end());
154
155 // We use one timerfd to handle all the oustanding |timers_|. Keep it armed
156 // to the task expiring soonest.
157 struct itimerspec ts {};
158 if (it != timers_.end()) {
159 ts.it_value = ToPosixTimespec(it->deadline);
160 }
161 // If |timers_| is empty (it == end()) |ts.it_value| will remain
162 // zero-initialized and that will disarm the timer in the call below.
163 int res = timerfd_settime(*timer_fd_, TFD_TIMER_ABSTIME, &ts, nullptr);
164 PERFETTO_DCHECK(res == 0);
165 }
166
Start()167 void Watchdog::Start() {
168 std::lock_guard<std::mutex> guard(mutex_);
169 if (thread_.joinable()) {
170 PERFETTO_DCHECK(enabled_);
171 } else {
172 PERFETTO_DCHECK(!enabled_);
173
174 #if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) || \
175 PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
176 // Kick the thread to start running but only on Android or Linux.
177 timer_fd_.reset(
178 timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK));
179 if (!timer_fd_) {
180 PERFETTO_PLOG(
181 "timerfd_create failed, the Perfetto watchdog is not available");
182 return;
183 }
184 enabled_ = true;
185 RearmTimerFd_Locked(); // Deal with timers created before Start().
186 thread_ = std::thread(&Watchdog::ThreadMain, this);
187 #endif
188 }
189 }
190
SetMemoryLimit(uint64_t bytes,uint32_t window_ms)191 void Watchdog::SetMemoryLimit(uint64_t bytes, uint32_t window_ms) {
192 // Update the fields under the lock.
193 std::lock_guard<std::mutex> guard(mutex_);
194
195 PERFETTO_CHECK(IsMultipleOf(window_ms, polling_interval_ms_) || bytes == 0);
196
197 size_t size = bytes == 0 ? 0 : window_ms / polling_interval_ms_ + 1;
198 memory_window_bytes_.Reset(size);
199 memory_limit_bytes_ = bytes;
200 }
201
SetCpuLimit(uint32_t percentage,uint32_t window_ms)202 void Watchdog::SetCpuLimit(uint32_t percentage, uint32_t window_ms) {
203 std::lock_guard<std::mutex> guard(mutex_);
204
205 PERFETTO_CHECK(percentage <= 100);
206 PERFETTO_CHECK(IsMultipleOf(window_ms, polling_interval_ms_) ||
207 percentage == 0);
208
209 size_t size = percentage == 0 ? 0 : window_ms / polling_interval_ms_ + 1;
210 cpu_window_time_ticks_.Reset(size);
211 cpu_limit_percentage_ = percentage;
212 }
213
ThreadMain()214 void Watchdog::ThreadMain() {
215 // Register crash keys explicitly to avoid running out of slots at crash time.
216 g_crash_key_reason.Register();
217
218 base::ScopedFile stat_fd(base::OpenFile("/proc/self/stat", O_RDONLY));
219 if (!stat_fd) {
220 PERFETTO_ELOG("Failed to open stat file to enforce resource limits.");
221 return;
222 }
223
224 PERFETTO_DCHECK(timer_fd_);
225
226 constexpr uint8_t kFdCount = 1;
227 struct pollfd fds[kFdCount]{};
228 fds[0].fd = *timer_fd_;
229 fds[0].events = POLLIN;
230
231 for (;;) {
232 // We use the poll() timeout to drive the periodic ticks for the cpu/memory
233 // checks. The only other case when the poll() unblocks is when we crash
234 // (or have to quit via enabled_ == false, but that happens only in tests).
235 platform::BeforeMaybeBlockingSyscall();
236 auto ret = poll(fds, kFdCount, static_cast<int>(polling_interval_ms_));
237 platform::AfterMaybeBlockingSyscall();
238 if (!enabled_)
239 return;
240 if (ret < 0) {
241 if (errno == ENOMEM || errno == EINTR) {
242 // Should happen extremely rarely.
243 std::this_thread::sleep_for(std::chrono::milliseconds(100));
244 continue;
245 }
246 PERFETTO_FATAL("watchdog poll() failed");
247 }
248
249 // If we get here either:
250 // 1. poll() timed out, in which case we should process cpu/mem guardrails.
251 // 2. A timer expired, in which case we shall crash.
252
253 uint64_t expired = 0; // Must be exactly 8 bytes.
254 auto res = PERFETTO_EINTR(read(*timer_fd_, &expired, sizeof(expired)));
255 PERFETTO_DCHECK((res < 0 && (errno == EAGAIN)) ||
256 (res == sizeof(expired) && expired > 0));
257 const auto now = GetWallTimeMs();
258
259 // Check if any of the timers expired.
260 int tid_to_kill = 0;
261 WatchdogCrashReason crash_reason{};
262 {
263 std::lock_guard<std::mutex> guard(mutex_);
264 for (const auto& timer : timers_) {
265 if (now >= timer.deadline) {
266 tid_to_kill = timer.thread_id;
267 crash_reason = timer.crash_reason;
268 break;
269 }
270 }
271 }
272
273 if (tid_to_kill)
274 SerializeLogsAndKillThread(tid_to_kill, crash_reason);
275
276 // Check CPU and memory guardrails (if enabled).
277 lseek(stat_fd.get(), 0, SEEK_SET);
278 ProcStat stat;
279 if (!ReadProcStat(stat_fd.get(), &stat))
280 continue;
281 uint64_t cpu_time = stat.utime + stat.stime;
282 uint64_t rss_bytes =
283 static_cast<uint64_t>(stat.rss_pages) * base::GetSysPageSize();
284
285 bool threshold_exceeded = false;
286 {
287 std::lock_guard<std::mutex> guard(mutex_);
288 if (CheckMemory_Locked(rss_bytes) && !IsSyncMemoryTaggingEnabled()) {
289 threshold_exceeded = true;
290 crash_reason = WatchdogCrashReason::kMemGuardrail;
291 } else if (CheckCpu_Locked(cpu_time)) {
292 threshold_exceeded = true;
293 crash_reason = WatchdogCrashReason::kCpuGuardrail;
294 }
295 }
296
297 if (threshold_exceeded)
298 SerializeLogsAndKillThread(getpid(), crash_reason);
299 }
300 }
301
SerializeLogsAndKillThread(int tid,WatchdogCrashReason crash_reason)302 void Watchdog::SerializeLogsAndKillThread(int tid,
303 WatchdogCrashReason crash_reason) {
304 g_crash_key_reason.Set(static_cast<int>(crash_reason));
305
306 // We are about to die. Serialize the logs into the crash buffer so the
307 // debuggerd crash handler picks them up and attaches to the bugreport.
308 // In the case of a PERFETTO_CHECK/PERFETTO_FATAL this is done in logging.h.
309 // But in the watchdog case, we don't hit that codepath and must do ourselves.
310 MaybeSerializeLastLogsForCrashReporting();
311
312 // Send a SIGABRT to the thread that armed the timer. This is to see the
313 // callstack of the thread that is stuck in a long task rather than the
314 // watchdog thread.
315 if (syscall(__NR_tgkill, getpid(), tid, SIGABRT) < 0) {
316 // At this point the process must die. If for any reason the tgkill doesn't
317 // work (e.g. the thread has disappeared), force a crash from here.
318 abort();
319 }
320
321 if (disable_kill_failsafe_for_testing_)
322 return;
323
324 // The tgkill() above will take some milliseconds to cause a crash, as it
325 // involves the kernel to queue the SIGABRT on the target thread (often the
326 // main thread, which is != watchdog thread) and do a scheduling round.
327 // If something goes wrong though (the target thread has signals masked or
328 // is stuck in an uninterruptible+wakekill syscall) force quit from this
329 // thread.
330 std::this_thread::sleep_for(std::chrono::seconds(10));
331 abort();
332 }
333
CheckMemory_Locked(uint64_t rss_bytes)334 bool Watchdog::CheckMemory_Locked(uint64_t rss_bytes) {
335 if (memory_limit_bytes_ == 0)
336 return false;
337
338 // Add the current stat value to the ring buffer and check that the mean
339 // remains under our threshold.
340 if (memory_window_bytes_.Push(rss_bytes)) {
341 if (memory_window_bytes_.Mean() >
342 static_cast<double>(memory_limit_bytes_)) {
343 PERFETTO_ELOG(
344 "Memory watchdog trigger. Memory window of %f bytes is above the "
345 "%" PRIu64 " bytes limit.",
346 memory_window_bytes_.Mean(), memory_limit_bytes_);
347 return true;
348 }
349 }
350 return false;
351 }
352
CheckCpu_Locked(uint64_t cpu_time)353 bool Watchdog::CheckCpu_Locked(uint64_t cpu_time) {
354 if (cpu_limit_percentage_ == 0)
355 return false;
356
357 // Add the cpu time to the ring buffer.
358 if (cpu_window_time_ticks_.Push(cpu_time)) {
359 // Compute the percentage over the whole window and check that it remains
360 // under the threshold.
361 uint64_t difference_ticks = cpu_window_time_ticks_.NewestWhenFull() -
362 cpu_window_time_ticks_.OldestWhenFull();
363 double window_interval_ticks =
364 (static_cast<double>(WindowTimeForRingBuffer(cpu_window_time_ticks_)) /
365 1000.0) *
366 static_cast<double>(sysconf(_SC_CLK_TCK));
367 double percentage = static_cast<double>(difference_ticks) /
368 static_cast<double>(window_interval_ticks) * 100;
369 if (percentage > cpu_limit_percentage_) {
370 PERFETTO_ELOG("CPU watchdog trigger. %f%% CPU use is above the %" PRIu32
371 "%% CPU limit.",
372 percentage, cpu_limit_percentage_);
373 return true;
374 }
375 }
376 return false;
377 }
378
WindowTimeForRingBuffer(const WindowedInterval & window)379 uint32_t Watchdog::WindowTimeForRingBuffer(const WindowedInterval& window) {
380 return static_cast<uint32_t>(window.size() - 1) * polling_interval_ms_;
381 }
382
Push(uint64_t sample)383 bool Watchdog::WindowedInterval::Push(uint64_t sample) {
384 // Add the sample to the current position in the ring buffer.
385 buffer_[position_] = sample;
386
387 // Update the position with next one circularily.
388 position_ = (position_ + 1) % size_;
389
390 // Set the filled flag the first time we wrap.
391 filled_ = filled_ || position_ == 0;
392 return filled_;
393 }
394
Mean() const395 double Watchdog::WindowedInterval::Mean() const {
396 return MeanForArray(buffer_.get(), size_);
397 }
398
Clear()399 void Watchdog::WindowedInterval::Clear() {
400 position_ = 0;
401 buffer_.reset(new uint64_t[size_]());
402 }
403
Reset(size_t new_size)404 void Watchdog::WindowedInterval::Reset(size_t new_size) {
405 position_ = 0;
406 size_ = new_size;
407 buffer_.reset(new_size == 0 ? nullptr : new uint64_t[new_size]());
408 }
409
Timer(Watchdog * watchdog,uint32_t ms,WatchdogCrashReason crash_reason)410 Watchdog::Timer::Timer(Watchdog* watchdog,
411 uint32_t ms,
412 WatchdogCrashReason crash_reason)
413 : watchdog_(watchdog) {
414 if (!ms)
415 return; // No-op timer created when the watchdog is disabled.
416 timer_data_.deadline = GetWallTimeMs() + std::chrono::milliseconds(ms);
417 timer_data_.thread_id = GetThreadId();
418 timer_data_.crash_reason = crash_reason;
419 PERFETTO_DCHECK(watchdog_);
420 watchdog_->AddFatalTimer(timer_data_);
421 }
422
~Timer()423 Watchdog::Timer::~Timer() {
424 if (timer_data_.deadline.count())
425 watchdog_->RemoveFatalTimer(timer_data_);
426 }
427
Timer(Timer && other)428 Watchdog::Timer::Timer(Timer&& other) noexcept {
429 watchdog_ = std::move(other.watchdog_);
430 other.watchdog_ = nullptr;
431 timer_data_ = std::move(other.timer_data_);
432 other.timer_data_ = TimerData();
433 }
434
435 } // namespace base
436 } // namespace perfetto
437
438 #endif // PERFETTO_BUILDFLAG(PERFETTO_WATCHDOG)
439