1 /*
2  * Copyright (c) 2020, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "carwatchdogd"
18 #define DEBUG false  // STOPSHIP if true.
19 
20 #include "WatchdogPerfService.h"
21 
22 #include <android-base/file.h>
23 #include <android-base/parseint.h>
24 #include <android-base/stringprintf.h>
25 #include <android-base/strings.h>
26 #include <android/util/ProtoOutputStream.h>
27 #include <log/log.h>
28 #include <processgroup/sched_policy.h>
29 
30 #include <pthread.h>
31 
32 #include <iterator>
33 #include <vector>
34 
35 #include <packages/services/Car/service/proto/android/car/watchdog/carwatchdog_daemon_dump.proto.h>
36 #include <packages/services/Car/service/proto/android/car/watchdog/health_check_client_info.proto.h>
37 #include <packages/services/Car/service/proto/android/car/watchdog/performance_stats.proto.h>
38 
39 namespace android {
40 namespace automotive {
41 namespace watchdog {
42 
43 namespace {
44 
45 using ::aidl::android::automotive::watchdog::internal::ResourceStats;
46 using ::aidl::android::automotive::watchdog::internal::UserState;
47 using ::android::sp;
48 using ::android::String16;
49 using ::android::String8;
50 using ::android::base::EqualsIgnoreCase;
51 using ::android::base::Error;
52 using ::android::base::Join;
53 using ::android::base::ParseUint;
54 using ::android::base::Result;
55 using ::android::base::Split;
56 using ::android::base::StringAppendF;
57 using ::android::base::StringPrintf;
58 using ::android::base::WriteStringToFd;
59 using ::android::util::ProtoOutputStream;
60 
61 const int32_t kMaxCachedUnsentResourceStats = 10;
62 const std::chrono::nanoseconds kPrevUnsentResourceStatsDelayNs = 3s;
63 // Minimum required collection polling interval between subsequent collections.
64 const std::chrono::nanoseconds kMinEventInterval = 1s;
65 const std::chrono::seconds kDefaultSystemEventCollectionInterval = 1s;
66 const std::chrono::seconds kDefaultPeriodicCollectionInterval = 20s;
67 const std::chrono::seconds kDefaultPeriodicMonitorInterval = 5s;
68 const std::chrono::nanoseconds kCustomCollectionInterval = 10s;
69 const std::chrono::nanoseconds kCustomCollectionDuration = 30min;
70 
71 constexpr const char* kServiceName = "WatchdogPerfService";
72 static const std::string kDumpMajorDelimiter = std::string(100, '-') + "\n";  // NOLINT
73 constexpr const char* kHelpText =
74         "\n%s dump options:\n"
75         "%s: Starts custom performance data collection. Customize the collection behavior with "
76         "the following optional arguments:\n"
77         "\t%s <seconds>: Modifies the collection interval. Default behavior is to collect once "
78         "every %lld seconds.\n"
79         "\t%s <seconds>: Modifies the maximum collection duration. Default behavior is to collect "
80         "until %ld minutes before automatically stopping the custom collection and discarding "
81         "the collected data.\n"
82         "\t%s <package name>,<package name>,...: Comma-separated value containing package names. "
83         "When provided, the results are filtered only to the provided package names. Default "
84         "behavior is to list the results for the top N packages.\n"
85         "%s: Stops custom performance data collection and generates a dump of "
86         "the collection report.\n\n"
87         "When no options are specified, the car watchdog report contains the performance data "
88         "collected during boot-time and over the last few minutes before the report generation.\n";
89 
parseSecondsFlag(const char ** args,uint32_t numArgs,size_t pos)90 Result<std::chrono::seconds> parseSecondsFlag(const char** args, uint32_t numArgs, size_t pos) {
91     if (numArgs <= pos) {
92         return Error() << "Value not provided";
93     }
94     uint64_t value;
95     if (std::string strValue = std::string(args[pos]); !ParseUint(strValue, &value)) {
96         return Error() << "Invalid value " << strValue << ", must be an integer";
97     }
98     return std::chrono::seconds(value);
99 }
100 
toString(std::variant<EventType,SwitchMessage> what)101 constexpr const char* toString(std::variant<EventType, SwitchMessage> what) {
102     return std::visit(
103             [&](const auto& v) -> const char* {
104                 switch (static_cast<int>(v)) {
105                     case EventType::INIT:
106                         return "INIT";
107                     case EventType::TERMINATED:
108                         return "TERMINATED";
109                     case EventType::BOOT_TIME_COLLECTION:
110                         return "BOOT_TIME_COLLECTION";
111                     case EventType::PERIODIC_COLLECTION:
112                         return "PERIODIC_COLLECTION";
113                     case EventType::USER_SWITCH_COLLECTION:
114                         return "USER_SWITCH_COLLECTION";
115                     case EventType::WAKE_UP_COLLECTION:
116                         return "WAKE_UP_COLLECTION";
117                     case EventType::CUSTOM_COLLECTION:
118                         return "CUSTOM_COLLECTION";
119                     case EventType::PERIODIC_MONITOR:
120                         return "PERIODIC_MONITOR";
121                     case EventType::LAST_EVENT:
122                         return "LAST_EVENT";
123                     case SwitchMessage::END_BOOTTIME_COLLECTION:
124                         return "END_BOOTTIME_COLLECTION";
125                     case SwitchMessage::END_USER_SWITCH_COLLECTION:
126                         return "END_USER_SWITCH_COLLECTION";
127                     case SwitchMessage::END_WAKE_UP_COLLECTION:
128                         return "END_WAKE_UP_COLLECTION";
129                     case SwitchMessage::END_CUSTOM_COLLECTION:
130                         return "END_CUSTOM_COLLECTION";
131                     default:
132                         return "INVALID_EVENT_OR_SWITCH_MESSAGE";
133                 }
134             },
135             what);
136 }
137 
toProtoEventType(EventType eventType)138 constexpr int toProtoEventType(EventType eventType) {
139     switch (eventType) {
140         case EventType::INIT:
141             return PerformanceProfilerDump::INIT;
142         case EventType::TERMINATED:
143             return PerformanceProfilerDump::TERMINATED;
144         case EventType::BOOT_TIME_COLLECTION:
145             return PerformanceProfilerDump::BOOT_TIME_COLLECTION;
146         case EventType::PERIODIC_COLLECTION:
147             return PerformanceProfilerDump::PERIODIC_COLLECTION;
148         case EventType::USER_SWITCH_COLLECTION:
149             return PerformanceProfilerDump::USER_SWITCH_COLLECTION;
150         case EventType::WAKE_UP_COLLECTION:
151             return PerformanceProfilerDump::WAKE_UP_COLLECTION;
152         case EventType::CUSTOM_COLLECTION:
153             return PerformanceProfilerDump::CUSTOM_COLLECTION;
154         default:
155             return PerformanceProfilerDump::EVENT_TYPE_UNSPECIFIED;
156     }
157 }
158 
toString(SystemState systemState)159 constexpr const char* toString(SystemState systemState) {
160     switch (systemState) {
161         case SystemState::NORMAL_MODE:
162             return "NORMAL_MODE";
163         case SystemState::GARAGE_MODE:
164             return "GARAGE_MODE";
165         default:
166             return "UNKNOWN MODE";
167     }
168 }
169 
isEmpty(const ResourceStats & resourceStats)170 bool isEmpty(const ResourceStats& resourceStats) {
171     return !resourceStats.resourceUsageStats.has_value() &&
172             !resourceStats.resourceOveruseStats.has_value();
173 }
174 
175 }  // namespace
176 
toString() const177 std::string WatchdogPerfService::EventMetadata::toString() const {
178     std::string buffer;
179     const auto intervalInSecs =
180             std::chrono::duration_cast<std::chrono::seconds>(pollingIntervalNs).count();
181     StringAppendF(&buffer, "Event polling interval: %lld second%s\n", intervalInSecs,
182                   ((intervalInSecs > 1) ? "s" : ""));
183     if (!filterPackages.empty()) {
184         std::vector<std::string> packages(filterPackages.begin(), filterPackages.end());
185         StringAppendF(&buffer, "Filtered results to packages: %s\n", Join(packages, ", ").c_str());
186     }
187     return buffer;
188 }
189 
registerDataProcessor(sp<DataProcessorInterface> processor)190 Result<void> WatchdogPerfService::registerDataProcessor(sp<DataProcessorInterface> processor) {
191     if (processor == nullptr) {
192         return Error() << "Must provide a valid data processor";
193     }
194     if (const auto result = processor->init(); !result.ok()) {
195         return Error() << "Failed to initialize " << processor->name().c_str() << ": "
196                        << result.error().message();
197     }
198     Mutex::Autolock lock(mMutex);
199     mDataProcessors.push_back(processor);
200     if (DEBUG) {
201         ALOGD("Successfully registered %s to %s", processor->name().c_str(), kServiceName);
202     }
203     return {};
204 }
205 
start()206 Result<void> WatchdogPerfService::start() {
207     {
208         Mutex::Autolock lock(mMutex);
209         if (mCurrCollectionEvent != EventType::INIT || mCollectionThread.joinable()) {
210             return Error(INVALID_OPERATION) << "Cannot start " << kServiceName << " more than once";
211         }
212         if (mWatchdogServiceHelper == nullptr) {
213             return Error(INVALID_OPERATION) << "No watchdog service helper is registered";
214         }
215         std::chrono::nanoseconds systemEventCollectionInterval =
216                 std::chrono::duration_cast<std::chrono::nanoseconds>(
217                         std::chrono::seconds(sysprop::systemEventCollectionInterval().value_or(
218                                 kDefaultSystemEventCollectionInterval.count())));
219         std::chrono::nanoseconds periodicCollectionInterval =
220                 std::chrono::duration_cast<std::chrono::nanoseconds>(
221                         std::chrono::seconds(sysprop::periodicCollectionInterval().value_or(
222                                 kDefaultPeriodicCollectionInterval.count())));
223         std::chrono::nanoseconds periodicMonitorInterval =
224                 std::chrono::duration_cast<std::chrono::nanoseconds>(
225                         std::chrono::seconds(sysprop::periodicMonitorInterval().value_or(
226                                 kDefaultPeriodicMonitorInterval.count())));
227         mBoottimeCollection = {
228                 .eventType = EventType::BOOT_TIME_COLLECTION,
229                 .pollingIntervalNs = systemEventCollectionInterval,
230         };
231         mPeriodicCollection = {
232                 .eventType = EventType::PERIODIC_COLLECTION,
233                 .pollingIntervalNs = periodicCollectionInterval,
234         };
235         mUserSwitchCollection = {{
236                 .eventType = EventType::USER_SWITCH_COLLECTION,
237                 .pollingIntervalNs = systemEventCollectionInterval,
238         }};
239         mWakeUpCollection = {
240                 .eventType = EventType::WAKE_UP_COLLECTION,
241                 .pollingIntervalNs = systemEventCollectionInterval,
242         };
243         mPeriodicMonitor = {
244                 .eventType = EventType::PERIODIC_MONITOR,
245                 .pollingIntervalNs = periodicMonitorInterval,
246         };
247         if (mDataProcessors.empty()) {
248             ALOGE("Terminating %s: No data processor is registered", kServiceName);
249             mCurrCollectionEvent = EventType::TERMINATED;
250             return Error() << "No data processor is registered";
251         }
252         mUidStatsCollector->init();
253         mProcStatCollector->init();
254         mProcDiskStatsCollector->init();
255     }
256 
257     mCollectionThread = std::thread([&]() {
258         {
259             Mutex::Autolock lock(mMutex);
260             if (EventType expected = EventType::INIT; mCurrCollectionEvent != expected) {
261                 ALOGE("Skipping performance data collection as the current collection event "
262                       "%s != %s",
263                       toString(mCurrCollectionEvent), toString(expected));
264                 return;
265             }
266             notifySystemStartUpLocked();
267             mCurrCollectionEvent = EventType::BOOT_TIME_COLLECTION;
268             mBoottimeCollection.lastPollElapsedRealTimeNs = mHandlerLooper->now();
269             mHandlerLooper->setLooper(Looper::prepare(/*opts=*/0));
270             mHandlerLooper->sendMessage(sp<WatchdogPerfService>::fromExisting(this),
271                                         EventType::BOOT_TIME_COLLECTION);
272         }
273         if (set_sched_policy(0, SP_BACKGROUND) != 0) {
274             ALOGW("Failed to set background scheduling priority to %s thread", kServiceName);
275         }
276         if (int result = pthread_setname_np(pthread_self(), "WatchdogPerfSvc"); result != 0) {
277             ALOGE("Failed to set %s thread name: %d", kServiceName, result);
278         }
279         ALOGI("Starting %s performance data collection", toString(mCurrCollectionEvent));
280         bool isCollectionActive = true;
281         /*
282          * Loop until the collection is not active -- performance collection runs on this thread in
283          * a handler.
284          */
285         while (isCollectionActive) {
286             mHandlerLooper->pollAll(/*timeoutMillis=*/-1);
287             Mutex::Autolock lock(mMutex);
288             isCollectionActive = mCurrCollectionEvent != EventType::TERMINATED;
289         }
290     });
291     return {};
292 }
293 
terminate()294 void WatchdogPerfService::terminate() {
295     {
296         Mutex::Autolock lock(mMutex);
297         if (mCurrCollectionEvent == EventType::TERMINATED) {
298             ALOGE("%s was terminated already", kServiceName);
299             return;
300         }
301         ALOGE("Terminating %s as car watchdog is terminating", kServiceName);
302         if (mCurrCollectionEvent != EventType::INIT) {
303             /*
304              * Looper runs only after EventType::INIT has completed so remove looper messages
305              * and wake the looper only when the current collection has changed from INIT.
306              */
307             mHandlerLooper->removeMessages(sp<WatchdogPerfService>::fromExisting(this));
308             mHandlerLooper->wake();
309         }
310         for (const auto& processor : mDataProcessors) {
311             processor->terminate();
312         }
313         mCurrCollectionEvent = EventType::TERMINATED;
314         mUnsentResourceStats.clear();
315     }
316     if (mCollectionThread.joinable()) {
317         mCollectionThread.join();
318         if (DEBUG) {
319             ALOGD("%s collection thread terminated", kServiceName);
320         }
321     }
322 }
323 
setSystemState(SystemState systemState)324 void WatchdogPerfService::setSystemState(SystemState systemState) {
325     Mutex::Autolock lock(mMutex);
326     if (mSystemState != systemState) {
327         ALOGI("%s switching from %s to %s", kServiceName, toString(mSystemState),
328               toString(systemState));
329     }
330     mSystemState = systemState;
331 }
332 
onCarWatchdogServiceRegistered()333 void WatchdogPerfService::onCarWatchdogServiceRegistered() {
334     Mutex::Autolock lock(mMutex);
335     for (const auto& processor : mDataProcessors) {
336         processor->onCarWatchdogServiceRegistered();
337     }
338     if (mUnsentResourceStats.empty()) {
339         return;
340     }
341     mHandlerLooper->sendMessage(sp<WatchdogPerfService>::fromExisting(this),
342                                 TaskMessage::SEND_RESOURCE_STATS);
343 }
344 
onBootFinished()345 Result<void> WatchdogPerfService::onBootFinished() {
346     Mutex::Autolock lock(mMutex);
347 
348     if (mBootCompletedTimeEpochSeconds <= 0) {
349         mBootCompletedTimeEpochSeconds = std::chrono::system_clock::to_time_t(
350             std::chrono::system_clock::now());
351     }
352 
353     if (EventType expected = EventType::BOOT_TIME_COLLECTION; mCurrCollectionEvent != expected) {
354         /*
355          * This case happens when either the WatchdogPerfService has prematurely terminated before
356          * boot complete notification is received or multiple boot complete notifications are
357          * received. In either case don't return error as this will lead to runtime exception and
358          * cause system to boot loop.
359          */
360         ALOGE("Current performance data collection event %s != %s", toString(mCurrCollectionEvent),
361               toString(expected));
362         return {};
363     }
364 
365     mHandlerLooper->sendMessageAtTime(mHandlerLooper->now() + mPostSystemEventDurationNs.count(),
366                                       sp<WatchdogPerfService>::fromExisting(this),
367                                       SwitchMessage::END_BOOTTIME_COLLECTION);
368     if (DEBUG) {
369         ALOGD("Boot complete signal received.");
370     }
371     return {};
372 }
373 
onUserStateChange(userid_t userId,const UserState & userState)374 Result<void> WatchdogPerfService::onUserStateChange(userid_t userId, const UserState& userState) {
375     Mutex::Autolock lock(mMutex);
376     if (mCurrCollectionEvent == EventType::BOOT_TIME_COLLECTION ||
377         mCurrCollectionEvent == EventType::CUSTOM_COLLECTION) {
378         mUserSwitchCollection.from = mUserSwitchCollection.to;
379         mUserSwitchCollection.to = userId;
380         ALOGI("Current collection: %s. Ignoring user switch from userId = %d to userId = %d)",
381               toString(mCurrCollectionEvent), mUserSwitchCollection.from, mUserSwitchCollection.to);
382         // Ignoring the user switch events because the boot-time and custom collections take
383         // precedence over other collections.
384         if (mCurrCollectionEvent == EventType::CUSTOM_COLLECTION) {
385             ALOGW("Unable to start %s. Current performance data collection event: %s",
386                   toString(EventType::USER_SWITCH_COLLECTION), toString(mCurrCollectionEvent));
387         }
388         return {};
389     }
390     switch (static_cast<int>(userState)) {
391         case static_cast<int>(UserState::USER_STATE_SWITCHING):
392             // TODO(b/243984863): Handle multi-user switching scenario.
393             mUserSwitchCollection.from = mUserSwitchCollection.to;
394             mUserSwitchCollection.to = userId;
395             if (mCurrCollectionEvent != EventType::PERIODIC_COLLECTION &&
396                 mCurrCollectionEvent != EventType::USER_SWITCH_COLLECTION) {
397                 ALOGE("Unable to start %s. Current performance data collection event: %s",
398                       toString(EventType::USER_SWITCH_COLLECTION), toString(mCurrCollectionEvent));
399                 return {};
400             }
401             startUserSwitchCollection();
402             ALOGI("Switching to %s (userIds: from = %d, to = %d)", toString(mCurrCollectionEvent),
403                   mUserSwitchCollection.from, mUserSwitchCollection.to);
404             break;
405         case static_cast<int>(UserState::USER_STATE_UNLOCKING):
406             if (mCurrCollectionEvent != EventType::PERIODIC_COLLECTION) {
407                 if (mCurrCollectionEvent != EventType::USER_SWITCH_COLLECTION) {
408                     ALOGE("Unable to start %s. Current performance data collection event: %s",
409                           toString(EventType::USER_SWITCH_COLLECTION),
410                           toString(mCurrCollectionEvent));
411                 }
412                 return {};
413             }
414             if (mUserSwitchCollection.to != userId) {
415                 return {};
416             }
417             startUserSwitchCollection();
418             ALOGI("Switching to %s (userId: %d)", toString(mCurrCollectionEvent), userId);
419             break;
420         case static_cast<int>(UserState::USER_STATE_POST_UNLOCKED): {
421             if (mCurrCollectionEvent != EventType::USER_SWITCH_COLLECTION) {
422                 ALOGE("Ignoring USER_STATE_POST_UNLOCKED because no user switch collection in "
423                       "progress. Current performance data collection event: %s.",
424                       toString(mCurrCollectionEvent));
425                 return {};
426             }
427             if (mUserSwitchCollection.to != userId) {
428                 ALOGE("Ignoring USER_STATE_POST_UNLOCKED signal for user id: %d. "
429                       "Current user being switched to: %d",
430                       userId, mUserSwitchCollection.to);
431                 return {};
432             }
433             auto thiz = sp<WatchdogPerfService>::fromExisting(this);
434             mHandlerLooper->removeMessages(thiz, SwitchMessage::END_USER_SWITCH_COLLECTION);
435             nsecs_t endUserSwitchCollectionTime =
436                     mHandlerLooper->now() + mPostSystemEventDurationNs.count();
437             mHandlerLooper->sendMessageAtTime(endUserSwitchCollectionTime, thiz,
438                                               SwitchMessage::END_USER_SWITCH_COLLECTION);
439             break;
440         }
441         default:
442             ALOGE("Unsupported user state: %d", static_cast<int>(userState));
443             return {};
444     }
445     if (DEBUG) {
446         ALOGD("Handled user state change: userId = %d, userState = %d", userId,
447               static_cast<int>(userState));
448     }
449     return {};
450 }
451 
startUserSwitchCollection()452 Result<void> WatchdogPerfService::startUserSwitchCollection() {
453     auto thiz = sp<WatchdogPerfService>::fromExisting(this);
454     mHandlerLooper->removeMessages(thiz);
455     mUserSwitchCollection.lastPollElapsedRealTimeNs = mHandlerLooper->now();
456     // End |EventType::USER_SWITCH_COLLECTION| after a timeout because the user switch end
457     // signal won't be received within a few seconds when the switch is blocked due to a
458     // keyguard event. Otherwise, polling beyond a few seconds will lead to unnecessary data
459     // collection.
460     mHandlerLooper->sendMessageAtTime(mHandlerLooper->now() + mUserSwitchTimeoutNs.count(), thiz,
461                                       SwitchMessage::END_USER_SWITCH_COLLECTION);
462     mCurrCollectionEvent = EventType::USER_SWITCH_COLLECTION;
463     mHandlerLooper->sendMessage(thiz, EventType::USER_SWITCH_COLLECTION);
464     return {};
465 }
466 
onSuspendExit()467 Result<void> WatchdogPerfService::onSuspendExit() {
468     Mutex::Autolock lock(mMutex);
469     if (mCurrCollectionEvent == EventType::CUSTOM_COLLECTION) {
470         // Ignoring the suspend exit event because the custom collection takes
471         // precedence over other collections.
472         ALOGE("Unable to start %s. Current performance data collection event: %s",
473               toString(EventType::WAKE_UP_COLLECTION), toString(mCurrCollectionEvent));
474         return {};
475     }
476     if (mCurrCollectionEvent == EventType::WAKE_UP_COLLECTION) {
477         ALOGE("The current performance data collection event is already %s",
478               toString(EventType::WAKE_UP_COLLECTION));
479         return {};
480     }
481     notifySystemStartUpLocked();
482     auto thiz = sp<WatchdogPerfService>::fromExisting(this);
483     mHandlerLooper->removeMessages(thiz);
484     nsecs_t now = mHandlerLooper->now();
485     mWakeUpCollection.lastPollElapsedRealTimeNs = now;
486     mHandlerLooper->sendMessageAtTime(now + mWakeUpDurationNs.count(), thiz,
487                                       SwitchMessage::END_WAKE_UP_COLLECTION);
488     mCurrCollectionEvent = EventType::WAKE_UP_COLLECTION;
489     mHandlerLooper->sendMessage(thiz, EventType::WAKE_UP_COLLECTION);
490     ALOGI("Switching to %s", toString(mCurrCollectionEvent));
491     return {};
492 }
493 
onShutdownEnter()494 Result<void> WatchdogPerfService::onShutdownEnter() {
495     Mutex::Autolock lock(mMutex);
496     if (mCurrCollectionEvent == EventType::CUSTOM_COLLECTION) {
497         ALOGI("Unable to switch to %s during shutdown enter. Current performance data collection "
498               "event: %s",
499               toString(EventType::PERIODIC_COLLECTION), toString(mCurrCollectionEvent));
500         return {};
501     }
502     switchToPeriodicLocked(/*startNow=*/true);
503     return {};
504 }
505 
onCustomCollection(int fd,const char ** args,uint32_t numArgs)506 Result<void> WatchdogPerfService::onCustomCollection(int fd, const char** args, uint32_t numArgs) {
507     if (numArgs == 0) {
508         return Error(BAD_VALUE) << "No custom collection dump arguments";
509     }
510 
511     if (EqualsIgnoreCase(args[0], kStartCustomCollectionFlag)) {
512         if (numArgs > 7) {
513             return Error(BAD_VALUE) << "Number of arguments to start custom performance data "
514                                     << "collection cannot exceed 7";
515         }
516         std::chrono::nanoseconds interval = kCustomCollectionInterval;
517         std::chrono::nanoseconds maxDuration = kCustomCollectionDuration;
518         std::unordered_set<std::string> filterPackages;
519         for (uint32_t i = 1; i < numArgs; ++i) {
520             if (EqualsIgnoreCase(args[i], kIntervalFlag)) {
521                 const auto& result = parseSecondsFlag(args, numArgs, i + 1);
522                 if (!result.ok()) {
523                     return Error(BAD_VALUE)
524                             << "Failed to parse " << kIntervalFlag << ": " << result.error();
525                 }
526                 interval = std::chrono::duration_cast<std::chrono::nanoseconds>(*result);
527                 ++i;
528                 continue;
529             }
530             if (EqualsIgnoreCase(args[i], kMaxDurationFlag)) {
531                 const auto& result = parseSecondsFlag(args, numArgs, i + 1);
532                 if (!result.ok()) {
533                     return Error(BAD_VALUE)
534                             << "Failed to parse " << kMaxDurationFlag << ": " << result.error();
535                 }
536                 maxDuration = std::chrono::duration_cast<std::chrono::nanoseconds>(*result);
537                 ++i;
538                 continue;
539             }
540             if (EqualsIgnoreCase(args[i], kFilterPackagesFlag)) {
541                 if (numArgs < i + 1) {
542                     return Error(BAD_VALUE)
543                             << "Must provide value for '" << kFilterPackagesFlag << "' flag";
544                 }
545                 std::vector<std::string> packages = Split(std::string(args[i + 1]), ",");
546                 std::copy(packages.begin(), packages.end(),
547                           std::inserter(filterPackages, filterPackages.end()));
548                 ++i;
549                 continue;
550             }
551             return Error(BAD_VALUE) << "Unknown flag " << args[i]
552                                     << " provided to start custom performance data collection";
553         }
554         if (const auto& result = startCustomCollection(interval, maxDuration, filterPackages);
555             !result.ok()) {
556             return result;
557         }
558         return {};
559     }
560     if (EqualsIgnoreCase(args[0], kEndCustomCollectionFlag)) {
561         if (numArgs != 1) {
562             ALOGW("Number of arguments to stop custom performance data collection cannot exceed 1. "
563                   "Stopping the data collection.");
564             WriteStringToFd("Number of arguments to stop custom performance data collection "
565                             "cannot exceed 1. Stopping the data collection.",
566                             fd);
567         }
568         return endCustomCollection(fd);
569     }
570     return Error(BAD_VALUE) << "Custom perf collection dump arguments start neither with "
571                             << kStartCustomCollectionFlag << " nor with "
572                             << kEndCustomCollectionFlag << " flags";
573 }
574 
onDump(int fd) const575 Result<void> WatchdogPerfService::onDump(int fd) const {
576     Mutex::Autolock lock(mMutex);
577     if (mCurrCollectionEvent == EventType::TERMINATED) {
578         ALOGW("%s not active. Dumping cached data", kServiceName);
579         if (!WriteStringToFd(StringPrintf("%s not active. Dumping cached data.", kServiceName),
580                              fd)) {
581             return Error(FAILED_TRANSACTION) << "Failed to write " << kServiceName << " status";
582         }
583     }
584 
585     if (const auto& result = dumpCollectorsStatusLocked(fd); !result.ok()) {
586         return Error(FAILED_TRANSACTION) << result.error();
587     }
588 
589     std::stringstream kernelStartTimestamp;
590     if (mKernelStartTimeEpochSeconds != 0) {
591         kernelStartTimestamp << std::put_time(std::localtime(&mKernelStartTimeEpochSeconds),
592                                             "%c %Z");
593     } else {
594         kernelStartTimestamp << "Missing";
595     }
596 
597     std::stringstream bootCompletedTimestamp;
598     if (mBootCompletedTimeEpochSeconds != 0) {
599         bootCompletedTimestamp << std::put_time(std::localtime(&mBootCompletedTimeEpochSeconds),
600                                               "%c %Z");
601     } else {
602         bootCompletedTimestamp << "Missing";
603     }
604     if (!WriteStringToFd(StringPrintf("\n%s%s report:\n%sSystem information:\n%s\n"
605                                       "Kernel start time: <%s>\n"
606                                       "Boot completed time: <%s>\n",
607                                       kDumpMajorDelimiter.c_str(), kServiceName,
608                                       kDumpMajorDelimiter.c_str(),
609                                       std::string(33, '=').c_str(),
610                                       kernelStartTimestamp.str().c_str(),
611                                       bootCompletedTimestamp.str().c_str()),
612                          fd) ||
613         !WriteStringToFd(StringPrintf("\nBoot-time collection "
614                       "information:\n%s\n",
615                       std::string(33, '=').c_str()),
616                          fd) ||
617         !WriteStringToFd(mBoottimeCollection.toString(), fd) ||
618         !WriteStringToFd(StringPrintf("\nWake-up collection information:\n%s\n",
619                                       std::string(31, '=').c_str()),
620                          fd) ||
621         !WriteStringToFd(mWakeUpCollection.toString(), fd) ||
622         !WriteStringToFd(StringPrintf("\nUser-switch collection information:\n%s\n",
623                                       std::string(35, '=').c_str()),
624                          fd) ||
625         !WriteStringToFd(mUserSwitchCollection.toString(), fd) ||
626         !WriteStringToFd(StringPrintf("\nPeriodic collection information:\n%s\n",
627                                       std::string(32, '=').c_str()),
628                          fd) ||
629         !WriteStringToFd(mPeriodicCollection.toString(), fd)) {
630         return Error(FAILED_TRANSACTION)
631                 << "Failed to dump the boot-time and periodic collection reports.";
632     }
633 
634     for (const auto& processor : mDataProcessors) {
635         if (const auto result = processor->onDump(fd); !result.ok()) {
636             return result;
637         }
638     }
639 
640     WriteStringToFd(kDumpMajorDelimiter, fd);
641     return {};
642 }
643 
onDumpProto(ProtoOutputStream & outProto) const644 Result<void> WatchdogPerfService::onDumpProto(ProtoOutputStream& outProto) const {
645     Mutex::Autolock lock(mMutex);
646     if (mCurrCollectionEvent == EventType::TERMINATED) {
647         ALOGW("%s not active. Dumping cached data", kServiceName);
648     }
649 
650     uint64_t performanceProfilerDumpToken =
651             outProto.start(CarWatchdogDaemonDump::PERFORMANCE_PROFILER_DUMP);
652 
653     outProto.write(PerformanceProfilerDump::CURRENT_EVENT, toProtoEventType(mCurrCollectionEvent));
654     outProto.write(PerformanceProfilerDump::BOOT_COMPLETED_TIME_EPOCH_SECONDS,
655                    mBootCompletedTimeEpochSeconds);
656     outProto.write(PerformanceProfilerDump::KERNEL_START_TIME_EPOCH_SECONDS,
657                    mKernelStartTimeEpochSeconds);
658 
659     DataProcessorInterface::CollectionIntervals collectionIntervals =
660             {.mBoottimeIntervalMillis = std::chrono::duration_cast<std::chrono::milliseconds>(
661                      mBoottimeCollection.pollingIntervalNs),
662              .mPeriodicIntervalMillis = std::chrono::duration_cast<std::chrono::milliseconds>(
663                      mPeriodicCollection.pollingIntervalNs),
664              .mUserSwitchIntervalMillis = std::chrono::duration_cast<std::chrono::milliseconds>(
665                      mUserSwitchCollection.pollingIntervalNs),
666              .mWakeUpIntervalMillis = std::chrono::duration_cast<std::chrono::milliseconds>(
667                      mWakeUpCollection.pollingIntervalNs),
668              .mCustomIntervalMillis = std::chrono::duration_cast<std::chrono::milliseconds>(
669                      mCustomCollection.pollingIntervalNs)};
670 
671     // Populate Performance Stats
672     for (const auto& processor : mDataProcessors) {
673         processor->onDumpProto(collectionIntervals, outProto);
674     }
675 
676     outProto.end(performanceProfilerDumpToken);
677 
678     return {};
679 }
680 
dumpHelpText(int fd) const681 bool WatchdogPerfService::dumpHelpText(int fd) const {
682     return WriteStringToFd(StringPrintf(kHelpText, kServiceName, kStartCustomCollectionFlag,
683                                         kIntervalFlag,
684                                         std::chrono::duration_cast<std::chrono::seconds>(
685                                                 kCustomCollectionInterval)
686                                                 .count(),
687                                         kMaxDurationFlag,
688                                         std::chrono::duration_cast<std::chrono::minutes>(
689                                                 kCustomCollectionDuration)
690                                                 .count(),
691                                         kFilterPackagesFlag, kEndCustomCollectionFlag),
692                            fd);
693 }
694 
dumpCollectorsStatusLocked(int fd) const695 Result<void> WatchdogPerfService::dumpCollectorsStatusLocked(int fd) const {
696     if (!mUidStatsCollector->enabled() &&
697         !WriteStringToFd(StringPrintf("UidStatsCollector failed to access proc and I/O files"),
698                          fd)) {
699         return Error() << "Failed to write UidStatsCollector status";
700     }
701     if (!mProcStatCollector->enabled() &&
702         !WriteStringToFd(StringPrintf("ProcStat collector failed to access the file %s",
703                                       mProcStatCollector->filePath().c_str()),
704                          fd)) {
705         return Error() << "Failed to write ProcStat collector status";
706     }
707     return {};
708 }
709 
startCustomCollection(std::chrono::nanoseconds interval,std::chrono::nanoseconds maxDuration,const std::unordered_set<std::string> & filterPackages)710 Result<void> WatchdogPerfService::startCustomCollection(
711         std::chrono::nanoseconds interval, std::chrono::nanoseconds maxDuration,
712         const std::unordered_set<std::string>& filterPackages) {
713     if (interval < kMinEventInterval || maxDuration < kMinEventInterval) {
714         return Error(INVALID_OPERATION)
715                 << "Collection polling interval and maximum duration must be >= "
716                 << std::chrono::duration_cast<std::chrono::milliseconds>(kMinEventInterval).count()
717                 << " milliseconds";
718     }
719     Mutex::Autolock lock(mMutex);
720     if (mCurrCollectionEvent == EventType::CUSTOM_COLLECTION) {
721         return Error(INVALID_OPERATION) << "Cannot start custom collection more than once";
722     }
723     nsecs_t now = mHandlerLooper->now();
724     mCustomCollection = {
725             .eventType = EventType::CUSTOM_COLLECTION,
726             .pollingIntervalNs = interval,
727             .lastPollElapsedRealTimeNs = now,
728             .filterPackages = filterPackages,
729     };
730 
731     auto thiz = sp<WatchdogPerfService>::fromExisting(this);
732     mHandlerLooper->removeMessages(thiz);
733     mHandlerLooper->sendMessageAtTime(now + maxDuration.count(), thiz,
734                                       SwitchMessage::END_CUSTOM_COLLECTION);
735     mCurrCollectionEvent = EventType::CUSTOM_COLLECTION;
736     mHandlerLooper->sendMessage(thiz, EventType::CUSTOM_COLLECTION);
737     ALOGI("Starting %s performance data collection", toString(mCurrCollectionEvent));
738     return {};
739 }
740 
endCustomCollection(int fd)741 Result<void> WatchdogPerfService::endCustomCollection(int fd) {
742     Mutex::Autolock lock(mMutex);
743     if (mCurrCollectionEvent != EventType::CUSTOM_COLLECTION) {
744         return Error(INVALID_OPERATION) << "No custom collection is running";
745     }
746 
747     auto thiz = sp<WatchdogPerfService>::fromExisting(this);
748     mHandlerLooper->removeMessages(thiz);
749     mHandlerLooper->sendMessage(thiz, SwitchMessage::END_CUSTOM_COLLECTION);
750 
751     if (const auto result = dumpCollectorsStatusLocked(fd); !result.ok()) {
752         return Error(FAILED_TRANSACTION) << result.error();
753     }
754 
755     if (!WriteStringToFd(StringPrintf("%sPerformance data report for custom collection:\n%s",
756                                       kDumpMajorDelimiter.c_str(), kDumpMajorDelimiter.c_str()),
757                          fd) ||
758         !WriteStringToFd(mCustomCollection.toString(), fd)) {
759         return Error(FAILED_TRANSACTION) << "Failed to write custom collection report.";
760     }
761 
762     for (const auto& processor : mDataProcessors) {
763         if (const auto result = processor->onCustomCollectionDump(fd); !result.ok()) {
764             return Error(FAILED_TRANSACTION)
765                     << processor->name() << " failed on " << toString(mCurrCollectionEvent)
766                     << " collection: " << result.error();
767         }
768     }
769 
770     if (DEBUG) {
771         ALOGD("Custom event finished");
772     }
773     WriteStringToFd(kDumpMajorDelimiter, fd);
774     return {};
775 }
776 
switchToPeriodicLocked(bool startNow)777 void WatchdogPerfService::switchToPeriodicLocked(bool startNow) {
778     if (mCurrCollectionEvent == EventType::PERIODIC_COLLECTION) {
779         ALOGW("The current performance data collection event is already %s",
780               toString(mCurrCollectionEvent));
781         return;
782     }
783     auto thiz = sp<WatchdogPerfService>::fromExisting(this);
784     mHandlerLooper->removeMessages(thiz);
785     mCurrCollectionEvent = EventType::PERIODIC_COLLECTION;
786     mPeriodicCollection.lastPollElapsedRealTimeNs = mHandlerLooper->now();
787     if (startNow) {
788         mHandlerLooper->sendMessage(thiz, EventType::PERIODIC_COLLECTION);
789     } else {
790         mPeriodicCollection.lastPollElapsedRealTimeNs +=
791                 mPeriodicCollection.pollingIntervalNs.count();
792         mHandlerLooper->sendMessageAtTime(mPeriodicCollection.lastPollElapsedRealTimeNs, thiz,
793                                           EventType::PERIODIC_COLLECTION);
794     }
795     mPeriodicMonitor.lastPollElapsedRealTimeNs =
796             mHandlerLooper->now() + mPeriodicMonitor.pollingIntervalNs.count();
797     mHandlerLooper->sendMessageAtTime(mPeriodicMonitor.lastPollElapsedRealTimeNs, thiz,
798                                       EventType::PERIODIC_MONITOR);
799     ALOGI("Switching to %s and %s", toString(mCurrCollectionEvent),
800           toString(EventType::PERIODIC_MONITOR));
801 }
802 
handleMessage(const Message & message)803 void WatchdogPerfService::handleMessage(const Message& message) {
804     Result<void> result;
805 
806     switch (message.what) {
807         case static_cast<int>(EventType::BOOT_TIME_COLLECTION):
808             result = processCollectionEvent(&mBoottimeCollection);
809             break;
810         case static_cast<int>(SwitchMessage::END_BOOTTIME_COLLECTION):
811             mHandlerLooper->removeMessages(sp<WatchdogPerfService>::fromExisting(this));
812             if (result = processCollectionEvent(&mBoottimeCollection); result.ok()) {
813                 Mutex::Autolock lock(mMutex);
814                 switchToPeriodicLocked(/*startNow=*/false);
815             }
816             break;
817         case static_cast<int>(EventType::PERIODIC_COLLECTION):
818             result = processCollectionEvent(&mPeriodicCollection);
819             break;
820         case static_cast<int>(EventType::USER_SWITCH_COLLECTION):
821             result = processCollectionEvent(&mUserSwitchCollection);
822             break;
823         case static_cast<int>(EventType::WAKE_UP_COLLECTION):
824             result = processCollectionEvent(&mWakeUpCollection);
825             break;
826         case static_cast<int>(SwitchMessage::END_USER_SWITCH_COLLECTION):
827         case static_cast<int>(SwitchMessage::END_WAKE_UP_COLLECTION): {
828             mHandlerLooper->removeMessages(sp<WatchdogPerfService>::fromExisting(this));
829             EventMetadata* eventMetadata =
830                     message.what == static_cast<int>(SwitchMessage::END_USER_SWITCH_COLLECTION)
831                     ? &mUserSwitchCollection
832                     : &mWakeUpCollection;
833             if (result = processCollectionEvent(eventMetadata); result.ok()) {
834                 Mutex::Autolock lock(mMutex);
835                 switchToPeriodicLocked(/*startNow=*/false);
836             }
837             break;
838         }
839         case static_cast<int>(EventType::CUSTOM_COLLECTION):
840             result = processCollectionEvent(&mCustomCollection);
841             break;
842         case static_cast<int>(EventType::PERIODIC_MONITOR):
843             result = processMonitorEvent(&mPeriodicMonitor);
844             break;
845         case static_cast<int>(SwitchMessage::END_CUSTOM_COLLECTION): {
846             Mutex::Autolock lock(mMutex);
847             if (EventType expected = EventType::CUSTOM_COLLECTION;
848                 mCurrCollectionEvent != expected) {
849                 ALOGW("Skipping END_CUSTOM_COLLECTION message as the current collection %s != %s",
850                       toString(mCurrCollectionEvent), toString(expected));
851                 return;
852             }
853             mCustomCollection = {};
854             for (const auto& processor : mDataProcessors) {
855                 /*
856                  * Clear custom collection cache on the data processors when the custom collection
857                  * ends.
858                  */
859                 processor->onCustomCollectionDump(-1);
860             }
861             switchToPeriodicLocked(/*startNow=*/true);
862             return;
863         }
864         case static_cast<int>(TaskMessage::SEND_RESOURCE_STATS):
865             result = sendResourceStats();
866             break;
867         default:
868             result = Error() << "Unknown message: " << message.what;
869     }
870 
871     if (!result.ok()) {
872         Mutex::Autolock lock(mMutex);
873         ALOGE("Terminating %s: %s", kServiceName, result.error().message().c_str());
874         /*
875          * DO NOT CALL terminate() as it tries to join the collection thread but this code is
876          * executed on the collection thread. Thus it will result in a deadlock.
877          */
878         mCurrCollectionEvent = EventType::TERMINATED;
879         mHandlerLooper->removeMessages(sp<WatchdogPerfService>::fromExisting(this));
880         mHandlerLooper->wake();
881     }
882 }
883 
processCollectionEvent(WatchdogPerfService::EventMetadata * metadata)884 Result<void> WatchdogPerfService::processCollectionEvent(
885         WatchdogPerfService::EventMetadata* metadata) {
886     Mutex::Autolock lock(mMutex);
887     /*
888      * Messages sent to the looper are intrinsically racy such that a message from the previous
889      * collection event may land in the looper after the current collection has already begun. Thus
890      * verify the current collection event before starting the collection.
891      */
892     if (mCurrCollectionEvent != metadata->eventType) {
893         ALOGW("Skipping %s event on collection event %s", toString(metadata->eventType),
894               toString(mCurrCollectionEvent));
895         return {};
896     }
897     if (DEBUG) {
898         ALOGD("Processing %s collection event", toString(metadata->eventType));
899     }
900     if (metadata->pollingIntervalNs < kMinEventInterval) {
901         return Error()
902                 << "Collection polling interval of "
903                 << std::chrono::duration_cast<std::chrono::seconds>(metadata->pollingIntervalNs)
904                            .count()
905                 << " seconds for " << toString(metadata->eventType)
906                 << " collection cannot be less than "
907                 << std::chrono::duration_cast<std::chrono::seconds>(kMinEventInterval).count()
908                 << " seconds";
909     }
910     if (const auto result = collectLocked(metadata); !result.ok()) {
911         return Error() << toString(metadata->eventType) << " collection failed: " << result.error();
912     }
913     metadata->lastPollElapsedRealTimeNs += metadata->pollingIntervalNs.count();
914     mHandlerLooper->sendMessageAtTime(metadata->lastPollElapsedRealTimeNs,
915                                       sp<WatchdogPerfService>::fromExisting(this),
916                                       metadata->eventType);
917     return {};
918 }
919 
collectLocked(WatchdogPerfService::EventMetadata * metadata)920 Result<void> WatchdogPerfService::collectLocked(WatchdogPerfService::EventMetadata* metadata) {
921     if (!mUidStatsCollector->enabled() && !mProcStatCollector->enabled()) {
922         return Error() << "No collectors enabled";
923     }
924 
925     auto now = std::chrono::time_point_cast<std::chrono::milliseconds>(
926             std::chrono::system_clock::now());
927     int64_t timeSinceBootMillis = kGetElapsedTimeSinceBootMillisFunc();
928 
929     if (mUidStatsCollector->enabled()) {
930         if (const auto result = mUidStatsCollector->collect(); !result.ok()) {
931             return Error() << "Failed to collect per-uid proc and I/O stats: " << result.error();
932         }
933     }
934 
935     if (mProcStatCollector->enabled()) {
936         if (const auto result = mProcStatCollector->collect(); !result.ok()) {
937             return Error() << "Failed to collect proc stats: " << result.error();
938         }
939 
940         if (mKernelStartTimeEpochSeconds <= 0) {
941             mKernelStartTimeEpochSeconds =
942                 mProcStatCollector->getKernelStartTimeEpochSeconds();
943         }
944     }
945 
946     ResourceStats resourceStats = {};
947 
948     for (const auto& processor : mDataProcessors) {
949         Result<void> result;
950         switch (mCurrCollectionEvent) {
951             case EventType::BOOT_TIME_COLLECTION:
952                 result = processor->onBoottimeCollection(now, mUidStatsCollector,
953                                                          mProcStatCollector, &resourceStats);
954                 break;
955             case EventType::PERIODIC_COLLECTION:
956                 result = processor->onPeriodicCollection(now, mSystemState, mUidStatsCollector,
957                                                          mProcStatCollector, &resourceStats);
958                 break;
959             case EventType::USER_SWITCH_COLLECTION: {
960                 WatchdogPerfService::UserSwitchEventMetadata* userSwitchMetadata =
961                         static_cast<WatchdogPerfService::UserSwitchEventMetadata*>(metadata);
962                 result = processor->onUserSwitchCollection(now, userSwitchMetadata->from,
963                                                            userSwitchMetadata->to,
964                                                            mUidStatsCollector, mProcStatCollector);
965                 break;
966             }
967             case EventType::WAKE_UP_COLLECTION:
968                 result = processor->onWakeUpCollection(now, mUidStatsCollector, mProcStatCollector);
969                 break;
970             case EventType::CUSTOM_COLLECTION:
971                 result = processor->onCustomCollection(now, mSystemState, metadata->filterPackages,
972                                                        mUidStatsCollector, mProcStatCollector,
973                                                        &resourceStats);
974                 break;
975             default:
976                 result = Error() << "Invalid collection event " << toString(mCurrCollectionEvent);
977         }
978         if (!result.ok()) {
979             return Error() << processor->name() << " failed on " << toString(mCurrCollectionEvent)
980                            << " collection: " << result.error();
981         }
982     }
983 
984     if (!isEmpty(resourceStats)) {
985         if (resourceStats.resourceUsageStats.has_value()) {
986             resourceStats.resourceUsageStats->durationInMillis =
987                     timeSinceBootMillis - mLastCollectionTimeMillis;
988         }
989         cacheUnsentResourceStatsLocked(std::move(resourceStats));
990     }
991 
992     mLastCollectionTimeMillis = timeSinceBootMillis;
993 
994     if (mUnsentResourceStats.empty() || !mWatchdogServiceHelper->isServiceConnected()) {
995         if (DEBUG && !mWatchdogServiceHelper->isServiceConnected()) {
996             ALOGD("Cannot send resource stats since CarWatchdogService not connected.");
997         }
998         return {};
999     }
1000 
1001     // Send message to send resource stats
1002     mHandlerLooper->sendMessage(sp<WatchdogPerfService>::fromExisting(this),
1003                                 TaskMessage::SEND_RESOURCE_STATS);
1004 
1005     return {};
1006 }
1007 
processMonitorEvent(WatchdogPerfService::EventMetadata * metadata)1008 Result<void> WatchdogPerfService::processMonitorEvent(
1009         WatchdogPerfService::EventMetadata* metadata) {
1010     if (metadata->eventType != static_cast<int>(EventType::PERIODIC_MONITOR)) {
1011         return Error() << "Invalid monitor event " << toString(metadata->eventType);
1012     }
1013     if (DEBUG) {
1014         ALOGD("Processing %s monitor event", toString(metadata->eventType));
1015     }
1016     if (metadata->pollingIntervalNs < kMinEventInterval) {
1017         return Error()
1018                 << "Monitor polling interval of "
1019                 << std::chrono::duration_cast<std::chrono::seconds>(metadata->pollingIntervalNs)
1020                            .count()
1021                 << " seconds for " << toString(metadata->eventType) << " event cannot be less than "
1022                 << std::chrono::duration_cast<std::chrono::seconds>(kMinEventInterval).count()
1023                 << " seconds";
1024     }
1025     Mutex::Autolock lock(mMutex);
1026     if (!mProcDiskStatsCollector->enabled()) {
1027         return Error() << "Cannot access proc disk stats for monitoring";
1028     }
1029     time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
1030     if (const auto result = mProcDiskStatsCollector->collect(); !result.ok()) {
1031         return Error() << "Failed to collect disk stats: " << result.error();
1032     }
1033     auto* currCollectionMetadata = getCurrentCollectionMetadataLocked();
1034     if (currCollectionMetadata == nullptr) {
1035         return Error() << "No metadata available for current collection event: "
1036                        << toString(mCurrCollectionEvent);
1037     }
1038     bool requestedCollection = false;
1039     auto thiz = sp<WatchdogPerfService>::fromExisting(this);
1040     const auto requestCollection = [&]() mutable {
1041         if (requestedCollection) {
1042             return;
1043         }
1044         const nsecs_t prevLastPollElapsedRealTimeNs =
1045                 currCollectionMetadata->lastPollElapsedRealTimeNs -
1046                 currCollectionMetadata->pollingIntervalNs.count();
1047         nsecs_t lastPollElapsedRealTimeNs = mHandlerLooper->now();
1048         if (const auto delta = std::abs(lastPollElapsedRealTimeNs - prevLastPollElapsedRealTimeNs);
1049             delta < kMinEventInterval.count()) {
1050             return;
1051         }
1052         currCollectionMetadata->lastPollElapsedRealTimeNs = lastPollElapsedRealTimeNs;
1053         mHandlerLooper->removeMessages(thiz, currCollectionMetadata->eventType);
1054         mHandlerLooper->sendMessage(thiz, currCollectionMetadata->eventType);
1055         requestedCollection = true;
1056     };
1057     for (const auto& processor : mDataProcessors) {
1058         if (const auto result =
1059                     processor->onPeriodicMonitor(now, mProcDiskStatsCollector, requestCollection);
1060             !result.ok()) {
1061             return Error() << processor->name() << " failed on " << toString(metadata->eventType)
1062                            << ": " << result.error();
1063         }
1064     }
1065     metadata->lastPollElapsedRealTimeNs += metadata->pollingIntervalNs.count();
1066     if (metadata->lastPollElapsedRealTimeNs == currCollectionMetadata->lastPollElapsedRealTimeNs) {
1067         /*
1068          * If the |PERIODIC_MONITOR| and  *_COLLECTION events overlap, skip the |PERIODIC_MONITOR|
1069          * event.
1070          */
1071         metadata->lastPollElapsedRealTimeNs += metadata->pollingIntervalNs.count();
1072     }
1073     mHandlerLooper->sendMessageAtTime(metadata->lastPollElapsedRealTimeNs, thiz,
1074                                       metadata->eventType);
1075     return {};
1076 }
1077 
sendResourceStats()1078 Result<void> WatchdogPerfService::sendResourceStats() {
1079     std::vector<ResourceStats> unsentResourceStats = {};
1080     {
1081         Mutex::Autolock lock(mMutex);
1082         nsecs_t now = mHandlerLooper->now();
1083         for (auto it = mUnsentResourceStats.begin(); it != mUnsentResourceStats.end();) {
1084             if (now - std::get<nsecs_t>(*it) >= kPrevUnsentResourceStatsMaxDurationNs.count()) {
1085                 // Drop the expired stats
1086                 it = mUnsentResourceStats.erase(it);
1087                 continue;
1088             }
1089             unsentResourceStats.push_back(std::get<ResourceStats>(*it));
1090             ++it;
1091         }
1092     }
1093     if (unsentResourceStats.empty()) {
1094         return {};
1095     }
1096     if (auto status = mWatchdogServiceHelper->onLatestResourceStats(unsentResourceStats);
1097         !status.isOk()) {
1098         ALOGW("Failed to push the unsent resource stats to watchdog service: %s",
1099               status.getDescription().c_str());
1100         return {};
1101     }
1102     Mutex::Autolock lock(mMutex);
1103     mUnsentResourceStats.clear();
1104     if (DEBUG) {
1105         ALOGD("Pushed latest resource usage and I/O overuse stats to watchdog service");
1106     }
1107     return {};
1108 }
1109 
notifySystemStartUpLocked()1110 Result<void> WatchdogPerfService::notifySystemStartUpLocked() {
1111     for (const auto& processor : mDataProcessors) {
1112         if (const auto result = processor->onSystemStartup(); !result.ok()) {
1113             ALOGE("%s failed to process system startup event", processor->name().c_str());
1114             return Error() << processor->name() << " failed to process system startup event";
1115         }
1116     }
1117     return {};
1118 }
1119 
cacheUnsentResourceStatsLocked(ResourceStats resourceStats)1120 void WatchdogPerfService::cacheUnsentResourceStatsLocked(ResourceStats resourceStats) {
1121     mUnsentResourceStats.push_back(
1122             std::make_tuple(mHandlerLooper->now(), std::move(resourceStats)));
1123     if (mUnsentResourceStats.size() > kMaxCachedUnsentResourceStats) {
1124         mUnsentResourceStats.erase(mUnsentResourceStats.begin());
1125     }
1126 }
1127 
getCurrentCollectionMetadataLocked()1128 WatchdogPerfService::EventMetadata* WatchdogPerfService::getCurrentCollectionMetadataLocked() {
1129     switch (mCurrCollectionEvent) {
1130         case EventType::BOOT_TIME_COLLECTION:
1131             return &mBoottimeCollection;
1132         case EventType::PERIODIC_COLLECTION:
1133             return &mPeriodicCollection;
1134         case EventType::USER_SWITCH_COLLECTION:
1135             return &mUserSwitchCollection;
1136         case EventType::WAKE_UP_COLLECTION:
1137             return &mWakeUpCollection;
1138         case EventType::CUSTOM_COLLECTION:
1139             return &mCustomCollection;
1140         default:
1141             return nullptr;
1142     }
1143 }
1144 
1145 }  // namespace watchdog
1146 }  // namespace automotive
1147 }  // namespace android
1148