xref: /aosp_15_r20/external/perfetto/src/base/watchdog_posix.cc (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "perfetto/ext/base/platform.h"
18 #include "perfetto/ext/base/watchdog.h"
19 
20 #if PERFETTO_BUILDFLAG(PERFETTO_WATCHDOG)
21 
22 #include <fcntl.h>
23 #include <poll.h>
24 #include <signal.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <sys/syscall.h>
28 #include <sys/timerfd.h>
29 #include <unistd.h>
30 
31 #include <algorithm>
32 #include <cinttypes>
33 #include <fstream>
34 #include <thread>
35 
36 #include "perfetto/base/build_config.h"
37 #include "perfetto/base/logging.h"
38 #include "perfetto/base/thread_utils.h"
39 #include "perfetto/base/time.h"
40 #include "perfetto/ext/base/crash_keys.h"
41 #include "perfetto/ext/base/file_utils.h"
42 #include "perfetto/ext/base/scoped_file.h"
43 #include "perfetto/ext/base/utils.h"
44 
45 namespace perfetto {
46 namespace base {
47 
48 namespace {
49 
50 constexpr uint32_t kDefaultPollingInterval = 30 * 1000;
51 
52 base::CrashKey g_crash_key_reason("wdog_reason");
53 
IsMultipleOf(uint32_t number,uint32_t divisor)54 bool IsMultipleOf(uint32_t number, uint32_t divisor) {
55   return number >= divisor && number % divisor == 0;
56 }
57 
MeanForArray(const uint64_t array[],size_t size)58 double MeanForArray(const uint64_t array[], size_t size) {
59   uint64_t total = 0;
60   for (size_t i = 0; i < size; i++) {
61     total += array[i];
62   }
63   return static_cast<double>(total / size);
64 }
65 
66 }  //  namespace
67 
ReadProcStat(int fd,ProcStat * out)68 bool ReadProcStat(int fd, ProcStat* out) {
69   char c[512];
70   size_t c_pos = 0;
71   while (c_pos < sizeof(c) - 1) {
72     ssize_t rd = PERFETTO_EINTR(read(fd, c + c_pos, sizeof(c) - c_pos));
73     if (rd < 0) {
74       PERFETTO_ELOG("Failed to read stat file to enforce resource limits.");
75       return false;
76     }
77     if (rd == 0)
78       break;
79     c_pos += static_cast<size_t>(rd);
80   }
81   PERFETTO_CHECK(c_pos < sizeof(c));
82   c[c_pos] = '\0';
83 
84   if (sscanf(c,
85              "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %lu "
86              "%lu %*d %*d %*d %*d %*d %*d %*u %*u %ld",
87              &out->utime, &out->stime, &out->rss_pages) != 3) {
88     PERFETTO_ELOG("Invalid stat format: %s", c);
89     return false;
90   }
91   return true;
92 }
93 
Watchdog(uint32_t polling_interval_ms)94 Watchdog::Watchdog(uint32_t polling_interval_ms)
95     : polling_interval_ms_(polling_interval_ms) {}
96 
~Watchdog()97 Watchdog::~Watchdog() {
98   if (!thread_.joinable()) {
99     PERFETTO_DCHECK(!enabled_);
100     return;
101   }
102   PERFETTO_DCHECK(enabled_);
103   enabled_ = false;
104 
105   // Rearm the timer to 1ns from now. This will cause the watchdog thread to
106   // wakeup from the poll() and see |enabled_| == false.
107   // This code path is used only in tests. In production code the watchdog is
108   // a singleton and is never destroyed.
109   struct itimerspec ts {};
110   ts.it_value.tv_sec = 0;
111   ts.it_value.tv_nsec = 1;
112   timerfd_settime(*timer_fd_, /*flags=*/0, &ts, nullptr);
113 
114   thread_.join();
115 }
116 
GetInstance()117 Watchdog* Watchdog::GetInstance() {
118   static Watchdog* watchdog = new Watchdog(kDefaultPollingInterval);
119   return watchdog;
120 }
121 
122 // Can be called from any thread.
CreateFatalTimer(uint32_t ms,WatchdogCrashReason crash_reason)123 Watchdog::Timer Watchdog::CreateFatalTimer(uint32_t ms,
124                                            WatchdogCrashReason crash_reason) {
125   if (!enabled_.load(std::memory_order_relaxed))
126     return Watchdog::Timer(this, 0, crash_reason);
127 
128   return Watchdog::Timer(this, ms, crash_reason);
129 }
130 
131 // Can be called from any thread.
AddFatalTimer(TimerData timer)132 void Watchdog::AddFatalTimer(TimerData timer) {
133   std::lock_guard<std::mutex> guard(mutex_);
134   timers_.emplace_back(std::move(timer));
135   RearmTimerFd_Locked();
136 }
137 
138 // Can be called from any thread.
RemoveFatalTimer(TimerData timer)139 void Watchdog::RemoveFatalTimer(TimerData timer) {
140   std::lock_guard<std::mutex> guard(mutex_);
141   for (auto it = timers_.begin(); it != timers_.end(); it++) {
142     if (*it == timer) {
143       timers_.erase(it);
144       break;  // Remove only one. Doesn't matter which one.
145     }
146   }
147   RearmTimerFd_Locked();
148 }
149 
RearmTimerFd_Locked()150 void Watchdog::RearmTimerFd_Locked() {
151   if (!enabled_)
152     return;
153   auto it = std::min_element(timers_.begin(), timers_.end());
154 
155   // We use one timerfd to handle all the oustanding |timers_|. Keep it armed
156   // to the task expiring soonest.
157   struct itimerspec ts {};
158   if (it != timers_.end()) {
159     ts.it_value = ToPosixTimespec(it->deadline);
160   }
161   // If |timers_| is empty (it == end()) |ts.it_value| will remain
162   // zero-initialized and that will disarm the timer in the call below.
163   int res = timerfd_settime(*timer_fd_, TFD_TIMER_ABSTIME, &ts, nullptr);
164   PERFETTO_DCHECK(res == 0);
165 }
166 
Start()167 void Watchdog::Start() {
168   std::lock_guard<std::mutex> guard(mutex_);
169   if (thread_.joinable()) {
170     PERFETTO_DCHECK(enabled_);
171   } else {
172     PERFETTO_DCHECK(!enabled_);
173 
174 #if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) || \
175     PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
176     // Kick the thread to start running but only on Android or Linux.
177     timer_fd_.reset(
178         timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK));
179     if (!timer_fd_) {
180       PERFETTO_PLOG(
181           "timerfd_create failed, the Perfetto watchdog is not available");
182       return;
183     }
184     enabled_ = true;
185     RearmTimerFd_Locked();  // Deal with timers created before Start().
186     thread_ = std::thread(&Watchdog::ThreadMain, this);
187 #endif
188   }
189 }
190 
SetMemoryLimit(uint64_t bytes,uint32_t window_ms)191 void Watchdog::SetMemoryLimit(uint64_t bytes, uint32_t window_ms) {
192   // Update the fields under the lock.
193   std::lock_guard<std::mutex> guard(mutex_);
194 
195   PERFETTO_CHECK(IsMultipleOf(window_ms, polling_interval_ms_) || bytes == 0);
196 
197   size_t size = bytes == 0 ? 0 : window_ms / polling_interval_ms_ + 1;
198   memory_window_bytes_.Reset(size);
199   memory_limit_bytes_ = bytes;
200 }
201 
SetCpuLimit(uint32_t percentage,uint32_t window_ms)202 void Watchdog::SetCpuLimit(uint32_t percentage, uint32_t window_ms) {
203   std::lock_guard<std::mutex> guard(mutex_);
204 
205   PERFETTO_CHECK(percentage <= 100);
206   PERFETTO_CHECK(IsMultipleOf(window_ms, polling_interval_ms_) ||
207                  percentage == 0);
208 
209   size_t size = percentage == 0 ? 0 : window_ms / polling_interval_ms_ + 1;
210   cpu_window_time_ticks_.Reset(size);
211   cpu_limit_percentage_ = percentage;
212 }
213 
ThreadMain()214 void Watchdog::ThreadMain() {
215   // Register crash keys explicitly to avoid running out of slots at crash time.
216   g_crash_key_reason.Register();
217 
218   base::ScopedFile stat_fd(base::OpenFile("/proc/self/stat", O_RDONLY));
219   if (!stat_fd) {
220     PERFETTO_ELOG("Failed to open stat file to enforce resource limits.");
221     return;
222   }
223 
224   PERFETTO_DCHECK(timer_fd_);
225 
226   constexpr uint8_t kFdCount = 1;
227   struct pollfd fds[kFdCount]{};
228   fds[0].fd = *timer_fd_;
229   fds[0].events = POLLIN;
230 
231   for (;;) {
232     // We use the poll() timeout to drive the periodic ticks for the cpu/memory
233     // checks. The only other case when the poll() unblocks is when we crash
234     // (or have to quit via enabled_ == false, but that happens only in tests).
235     platform::BeforeMaybeBlockingSyscall();
236     auto ret = poll(fds, kFdCount, static_cast<int>(polling_interval_ms_));
237     platform::AfterMaybeBlockingSyscall();
238     if (!enabled_)
239       return;
240     if (ret < 0) {
241       if (errno == ENOMEM || errno == EINTR) {
242         // Should happen extremely rarely.
243         std::this_thread::sleep_for(std::chrono::milliseconds(100));
244         continue;
245       }
246       PERFETTO_FATAL("watchdog poll() failed");
247     }
248 
249     // If we get here either:
250     // 1. poll() timed out, in which case we should process cpu/mem guardrails.
251     // 2. A timer expired, in which case we shall crash.
252 
253     uint64_t expired = 0;  // Must be exactly 8 bytes.
254     auto res = PERFETTO_EINTR(read(*timer_fd_, &expired, sizeof(expired)));
255     PERFETTO_DCHECK((res < 0 && (errno == EAGAIN)) ||
256                     (res == sizeof(expired) && expired > 0));
257     const auto now = GetWallTimeMs();
258 
259     // Check if any of the timers expired.
260     int tid_to_kill = 0;
261     WatchdogCrashReason crash_reason{};
262     {
263       std::lock_guard<std::mutex> guard(mutex_);
264       for (const auto& timer : timers_) {
265         if (now >= timer.deadline) {
266           tid_to_kill = timer.thread_id;
267           crash_reason = timer.crash_reason;
268           break;
269         }
270       }
271     }
272 
273     if (tid_to_kill)
274       SerializeLogsAndKillThread(tid_to_kill, crash_reason);
275 
276     // Check CPU and memory guardrails (if enabled).
277     lseek(stat_fd.get(), 0, SEEK_SET);
278     ProcStat stat;
279     if (!ReadProcStat(stat_fd.get(), &stat))
280       continue;
281     uint64_t cpu_time = stat.utime + stat.stime;
282     uint64_t rss_bytes =
283         static_cast<uint64_t>(stat.rss_pages) * base::GetSysPageSize();
284 
285     bool threshold_exceeded = false;
286     {
287       std::lock_guard<std::mutex> guard(mutex_);
288       if (CheckMemory_Locked(rss_bytes) && !IsSyncMemoryTaggingEnabled()) {
289         threshold_exceeded = true;
290         crash_reason = WatchdogCrashReason::kMemGuardrail;
291       } else if (CheckCpu_Locked(cpu_time)) {
292         threshold_exceeded = true;
293         crash_reason = WatchdogCrashReason::kCpuGuardrail;
294       }
295     }
296 
297     if (threshold_exceeded)
298       SerializeLogsAndKillThread(getpid(), crash_reason);
299   }
300 }
301 
SerializeLogsAndKillThread(int tid,WatchdogCrashReason crash_reason)302 void Watchdog::SerializeLogsAndKillThread(int tid,
303                                           WatchdogCrashReason crash_reason) {
304   g_crash_key_reason.Set(static_cast<int>(crash_reason));
305 
306   // We are about to die. Serialize the logs into the crash buffer so the
307   // debuggerd crash handler picks them up and attaches to the bugreport.
308   // In the case of a PERFETTO_CHECK/PERFETTO_FATAL this is done in logging.h.
309   // But in the watchdog case, we don't hit that codepath and must do ourselves.
310   MaybeSerializeLastLogsForCrashReporting();
311 
312   // Send a SIGABRT to the thread that armed the timer. This is to see the
313   // callstack of the thread that is stuck in a long task rather than the
314   // watchdog thread.
315   if (syscall(__NR_tgkill, getpid(), tid, SIGABRT) < 0) {
316     // At this point the process must die. If for any reason the tgkill doesn't
317     // work (e.g. the thread has disappeared), force a crash from here.
318     abort();
319   }
320 
321   if (disable_kill_failsafe_for_testing_)
322     return;
323 
324   // The tgkill() above will take some milliseconds to cause a crash, as it
325   // involves the kernel to queue the SIGABRT on the target thread (often the
326   // main thread, which is != watchdog thread) and do a scheduling round.
327   // If something goes wrong though (the target thread has signals masked or
328   // is stuck in an uninterruptible+wakekill syscall) force quit from this
329   // thread.
330   std::this_thread::sleep_for(std::chrono::seconds(10));
331   abort();
332 }
333 
CheckMemory_Locked(uint64_t rss_bytes)334 bool Watchdog::CheckMemory_Locked(uint64_t rss_bytes) {
335   if (memory_limit_bytes_ == 0)
336     return false;
337 
338   // Add the current stat value to the ring buffer and check that the mean
339   // remains under our threshold.
340   if (memory_window_bytes_.Push(rss_bytes)) {
341     if (memory_window_bytes_.Mean() >
342         static_cast<double>(memory_limit_bytes_)) {
343       PERFETTO_ELOG(
344           "Memory watchdog trigger. Memory window of %f bytes is above the "
345           "%" PRIu64 " bytes limit.",
346           memory_window_bytes_.Mean(), memory_limit_bytes_);
347       return true;
348     }
349   }
350   return false;
351 }
352 
CheckCpu_Locked(uint64_t cpu_time)353 bool Watchdog::CheckCpu_Locked(uint64_t cpu_time) {
354   if (cpu_limit_percentage_ == 0)
355     return false;
356 
357   // Add the cpu time to the ring buffer.
358   if (cpu_window_time_ticks_.Push(cpu_time)) {
359     // Compute the percentage over the whole window and check that it remains
360     // under the threshold.
361     uint64_t difference_ticks = cpu_window_time_ticks_.NewestWhenFull() -
362                                 cpu_window_time_ticks_.OldestWhenFull();
363     double window_interval_ticks =
364         (static_cast<double>(WindowTimeForRingBuffer(cpu_window_time_ticks_)) /
365          1000.0) *
366         static_cast<double>(sysconf(_SC_CLK_TCK));
367     double percentage = static_cast<double>(difference_ticks) /
368                         static_cast<double>(window_interval_ticks) * 100;
369     if (percentage > cpu_limit_percentage_) {
370       PERFETTO_ELOG("CPU watchdog trigger. %f%% CPU use is above the %" PRIu32
371                     "%% CPU limit.",
372                     percentage, cpu_limit_percentage_);
373       return true;
374     }
375   }
376   return false;
377 }
378 
WindowTimeForRingBuffer(const WindowedInterval & window)379 uint32_t Watchdog::WindowTimeForRingBuffer(const WindowedInterval& window) {
380   return static_cast<uint32_t>(window.size() - 1) * polling_interval_ms_;
381 }
382 
Push(uint64_t sample)383 bool Watchdog::WindowedInterval::Push(uint64_t sample) {
384   // Add the sample to the current position in the ring buffer.
385   buffer_[position_] = sample;
386 
387   // Update the position with next one circularily.
388   position_ = (position_ + 1) % size_;
389 
390   // Set the filled flag the first time we wrap.
391   filled_ = filled_ || position_ == 0;
392   return filled_;
393 }
394 
Mean() const395 double Watchdog::WindowedInterval::Mean() const {
396   return MeanForArray(buffer_.get(), size_);
397 }
398 
Clear()399 void Watchdog::WindowedInterval::Clear() {
400   position_ = 0;
401   buffer_.reset(new uint64_t[size_]());
402 }
403 
Reset(size_t new_size)404 void Watchdog::WindowedInterval::Reset(size_t new_size) {
405   position_ = 0;
406   size_ = new_size;
407   buffer_.reset(new_size == 0 ? nullptr : new uint64_t[new_size]());
408 }
409 
Timer(Watchdog * watchdog,uint32_t ms,WatchdogCrashReason crash_reason)410 Watchdog::Timer::Timer(Watchdog* watchdog,
411                        uint32_t ms,
412                        WatchdogCrashReason crash_reason)
413     : watchdog_(watchdog) {
414   if (!ms)
415     return;  // No-op timer created when the watchdog is disabled.
416   timer_data_.deadline = GetWallTimeMs() + std::chrono::milliseconds(ms);
417   timer_data_.thread_id = GetThreadId();
418   timer_data_.crash_reason = crash_reason;
419   PERFETTO_DCHECK(watchdog_);
420   watchdog_->AddFatalTimer(timer_data_);
421 }
422 
~Timer()423 Watchdog::Timer::~Timer() {
424   if (timer_data_.deadline.count())
425     watchdog_->RemoveFatalTimer(timer_data_);
426 }
427 
Timer(Timer && other)428 Watchdog::Timer::Timer(Timer&& other) noexcept {
429   watchdog_ = std::move(other.watchdog_);
430   other.watchdog_ = nullptr;
431   timer_data_ = std::move(other.timer_data_);
432   other.timer_data_ = TimerData();
433 }
434 
435 }  // namespace base
436 }  // namespace perfetto
437 
438 #endif  // PERFETTO_BUILDFLAG(PERFETTO_WATCHDOG)
439