/** * Copyright (c) 2020, The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include "PressureMonitor.h" #include "ProcDiskStatsCollector.h" #include "ProcStatCollector.h" #include "UidStatsCollector.h" #include "WatchdogPerfService.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace android { namespace automotive { namespace watchdog { // Number of periodic collection records to cache in memory. constexpr int32_t kDefaultPeriodicCollectionBufferSize = 180; constexpr const char kEmptyCollectionMessage[] = "No collection recorded\n"; // Forward declaration for testing use only. namespace internal { class PerformanceProfilerPeer; } // namespace internal // Below classes, structs and enums should be used only by the implementation and unit tests. enum ProcStatType { IO_BLOCKED_TASKS_COUNT = 0, MAJOR_FAULTS, CPU_TIME, MEMORY_STATS, PROC_STAT_TYPES, }; // UserPackageStats represents the user package performance stats. class UserPackageStats { public: struct UidIoSingleOpStats { int64_t bytes[UID_STATES] = {0}; int64_t fsync[UID_STATES] = {0}; int64_t totalBytes() const { return std::numeric_limits::max() - bytes[UidState::FOREGROUND] > bytes[UidState::BACKGROUND] ? bytes[UidState::FOREGROUND] + bytes[UidState::BACKGROUND] : std::numeric_limits::max(); } }; struct UidSingleStats { uint64_t value = 0; struct ProcessSingleStats { std::string comm = ""; uint64_t value = 0; }; std::vector topNProcesses = {}; }; struct UidCpuStats { int64_t cpuTimeMillis = 0; int64_t cpuCycles = 0; struct ProcessCpuStats { int32_t pid = -1; std::string comm = ""; int64_t cpuTimeMillis = 0; int64_t cpuCycles = 0; }; std::vector topNProcesses = {}; }; struct MemoryStats { uint64_t rssKb = 0; uint64_t pssKb = 0; uint64_t ussKb = 0; uint64_t swapPssKb = 0; }; struct UidMemoryStats { MemoryStats memoryStats; bool isSmapsRollupSupported; struct ProcessMemoryStats { std::string comm = ""; MemoryStats memoryStats; }; std::vector topNProcesses = {}; }; UserPackageStats(MetricType metricType, const UidStats& uidStats); UserPackageStats(ProcStatType procStatType, const UidStats& uidStats, int topNProcessCount, bool isSmapsRollupSupported); // Class must be DefaultInsertable for std::vector::resize to work UserPackageStats() : uid(0), genericPackageName("") {} // For unit test case only UserPackageStats(uid_t uid, std::string genericPackageName, std::variant statsVariant) : uid(uid), genericPackageName(std::move(genericPackageName)), statsVariant(std::move(statsVariant)) {} // Returns the primary value of the current UidStats. If the variant has value // |std::monostate|, returns 0. // // This value should be used to sort the UidStats. uint64_t getValue() const; std::string toString(MetricType metricsType, const int64_t totalIoStats[][UID_STATES]) const; std::string toString(int64_t totalValue) const; std::string toString(int64_t totalRssKb, int64_t totalPssKb) const; uid_t uid; std::string genericPackageName; std::variant statsVariant; private: void cacheTopNProcessSingleStats( ProcStatType procStatType, const UidStats& uidStats, int topNProcessCount, std::vector* topNProcesses); void cacheTopNProcessCpuStats( const UidStats& uidStats, int topNProcessCount, std::vector* topNProcesses); void cacheTopNProcessMemStats( const UidStats& uidStats, int topNProcessCount, bool isSmapsRollupSupported, std::vector* topNProcesses); }; /** * User package summary performance stats collected from the `/proc/uid_io/stats`, * `/proc/[pid]/stat`, `/proc/[pid]/task/[tid]/stat`, and /proc/[pid]/status` files. */ struct UserPackageSummaryStats { std::vector topNCpuTimes = {}; std::vector topNIoReads = {}; std::vector topNIoWrites = {}; std::vector topNIoBlocked = {}; std::vector topNMajorFaults = {}; std::vector topNMemStats = {}; int64_t totalIoStats[METRIC_TYPES][UID_STATES] = {{0}}; std::unordered_map taskCountByUid = {}; // TODO(b/337115923): Clean up below duplicate fields and report `totalMajorFaults`, // `totalRssKb`, `totalPssKb`, and `majorFaultsPercentChange` as part of `SystemSummaryStats`. int64_t totalCpuTimeMillis = 0; uint64_t totalCpuCycles = 0; uint64_t totalMajorFaults = 0; uint64_t totalRssKb = 0; uint64_t totalPssKb = 0; // Percentage of increase/decrease in the major page faults since last collection. double majorFaultsPercentChange = 0.0; std::string toString() const; }; // TODO(b/268402964): Calculate the total CPU cycles using the per-UID BPF tool. // System performance stats collected from the `/proc/stat` file. struct SystemSummaryStats { int64_t cpuIoWaitTimeMillis = 0; int64_t cpuIdleTimeMillis = 0; int64_t totalCpuTimeMillis = 0; uint64_t totalCpuCycles = 0; uint64_t contextSwitchesCount = 0; uint32_t ioBlockedProcessCount = 0; uint32_t totalProcessCount = 0; std::string toString() const; }; // Performance record collected during a sampling/collection period. struct PerfStatsRecord { time_point_millis collectionTimeMillis; SystemSummaryStats systemSummaryStats; UserPackageSummaryStats userPackageSummaryStats; std::unordered_map memoryPressureLevelDurations; std::string toString() const; }; // Group of performance records collected for a collection event. struct CollectionInfo { size_t maxCacheSize = 0; // Maximum cache size for the collection. std::vector records; // Cache of collected performance records. std::string toString() const; }; // Group of performance records collected for a user switch collection event. struct UserSwitchCollectionInfo : CollectionInfo { userid_t from = 0; userid_t to = 0; }; // PerformanceProfiler implements the I/O performance data collection module. class PerformanceProfiler final : public DataProcessorInterface, public PressureMonitorInterface::PressureChangeCallbackInterface { public: PerformanceProfiler( const android::sp& pressureMonitor, const std::function& getElapsedTimeSinceBootMillisFunc = &elapsedRealtime) : kPressureMonitor(pressureMonitor), kGetElapsedTimeSinceBootMillisFunc(getElapsedTimeSinceBootMillisFunc), mTopNStatsPerCategory(0), mTopNStatsPerSubcategory(0), mMaxUserSwitchEvents(0), mSystemEventDataCacheDurationSec(0), // TODO(b/333722043): Once carwatchdogd has sys_ptrace capability, set // mIsSmapsRollupSupported field from `android::meminfo::IsSmapsRollupSupported()`. // Disabling smaps_rollup support because this file cannot be read without sys_ptrace // capability. mIsSmapsRollupSupported(false), mIsMemoryProfilingEnabled(android::car::feature::car_watchdog_memory_profiling()), mBoottimeCollection({}), mPeriodicCollection({}), mUserSwitchCollections({}), mWakeUpCollection({}), mCustomCollection({}), mLastMajorFaults(0), mDoSendResourceUsageStats(false), mMemoryPressureLevelDeltaInfo(PressureLevelDeltaInfo(getElapsedTimeSinceBootMillisFunc)) { } ~PerformanceProfiler() { terminate(); } std::string name() const override { return "PerformanceProfiler"; } // Implements DataProcessorInterface. android::base::Result onSystemStartup() override; void onCarWatchdogServiceRegistered() override; android::base::Result onBoottimeCollection( time_point_millis time, const android::wp& uidStatsCollector, const android::wp& procStatCollector, aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats) override; android::base::Result onWakeUpCollection( time_point_millis time, const android::wp& uidStatsCollector, const android::wp& procStatCollector) override; android::base::Result onPeriodicCollection( time_point_millis time, SystemState systemState, const android::wp& uidStatsCollector, const android::wp& procStatCollector, aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats) override; android::base::Result onUserSwitchCollection( time_point_millis time, userid_t from, userid_t to, const android::wp& uidStatsCollector, const android::wp& procStatCollector) override; android::base::Result onCustomCollection( time_point_millis time, SystemState systemState, const std::unordered_set& filterPackages, const android::wp& uidStatsCollector, const android::wp& procStatCollector, aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats) override; android::base::Result onPeriodicMonitor( [[maybe_unused]] time_t time, [[maybe_unused]] const android::wp& procDiskStatsCollector, [[maybe_unused]] const std::function& alertHandler) override { // No monitoring done here as this DataProcessor only collects I/O performance records. return {}; } android::base::Result onDump(int fd) const override; android::base::Result onDumpProto( const CollectionIntervals& collectionIntervals, android::util::ProtoOutputStream& outProto) const override; android::base::Result onCustomCollectionDump(int fd) override; void onPressureChanged(PressureMonitorInterface::PressureLevel) override; protected: android::base::Result init(); // Clears in-memory cache. void terminate(); private: class PressureLevelDeltaInfo { public: explicit PressureLevelDeltaInfo( const std::function& getElapsedTimeSinceBootMillisFunc) : kGetElapsedTimeSinceBootMillisFunc(getElapsedTimeSinceBootMillisFunc), mLatestPressureLevel(PressureMonitorInterface::PRESSURE_LEVEL_NONE), mLatestPressureLevelElapsedRealtimeMillis(getElapsedTimeSinceBootMillisFunc()) {} // Calculates the duration for the previously reported pressure level, updates it in // mPressureLevelDurations, and sets the latest pressure level and its elapsed realtime. void setLatestPressureLevelLocked(PressureMonitorInterface::PressureLevel pressureLevel); // Returns the latest pressure stats and flushes stats to mPressureLevelDurations. std::unordered_map onCollectionLocked(); private: // Updated by test for mocking elapsed time. const std::function kGetElapsedTimeSinceBootMillisFunc; // Latest pressure level reported by the PressureMonitor. PressureMonitorInterface::PressureLevel mLatestPressureLevel; // Time when the latest pressure level was recorded. Used to calculate // pressureLevelDurations. int64_t mLatestPressureLevelElapsedRealtimeMillis = 0; // Duration spent in different pressure levels since the last poll. std::unordered_map mPressureLevelDurations = {}; }; // Processes the collected data. android::base::Result processLocked( time_point_millis time, SystemState systemState, const std::unordered_set& filterPackages, const android::sp& uidStatsCollector, const android::sp& procStatCollector, CollectionInfo* collectionInfo, aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats); // Processes per-UID performance data. void processUidStatsLocked( bool isGarageModeActive, int64_t totalCpuTimeMillis, const std::unordered_set& filterPackages, const android::sp& uidStatsCollector, std::vector* uidResourceUsageStats, UserPackageSummaryStats* userPackageSummaryStats); // Processes system performance data from the `/proc/stat` file. void processProcStatLocked(const android::sp& procStatCollector, SystemSummaryStats* systemSummaryStats) const; // Dump the user switch collection android::base::Result onUserSwitchCollectionDump(int fd) const; void clearExpiredSystemEventCollections(time_point_millis time); void dumpStatsRecordsProto(const CollectionInfo& collection, android::util::ProtoOutputStream& outProto) const; void dumpPackageCpuStatsProto(const std::vector& userPackageStats, android::util::ProtoOutputStream& outProto) const; void dumpPackageStorageIoStatsProto(const std::vector& userPackageStats, const uint64_t storageStatsFieldId, android::util::ProtoOutputStream& outProto) const; void dumpPackageTaskStateStatsProto(const std::vector& userPackageStats, const std::unordered_map& taskCountByUid, android::util::ProtoOutputStream& outProto) const; void dumpPackageMajorPageFaultsProto(const std::vector& userPackageStats, android::util::ProtoOutputStream& outProto) const; // Pressure monitor instance. const android::sp kPressureMonitor; // Updated by test for mocking elapsed time. const std::function kGetElapsedTimeSinceBootMillisFunc; // Top N per-UID stats per category. int mTopNStatsPerCategory; // Top N per-process stats per subcategory. int mTopNStatsPerSubcategory; // Max amount of user switch events cached in |mUserSwitchCollections|. size_t mMaxUserSwitchEvents; // Amount of seconds before a system event's cache is cleared. std::chrono::seconds mSystemEventDataCacheDurationSec; // Smaps rollup is supported by kernel or not. bool mIsSmapsRollupSupported; // Memory Profiling feature flag is enabled or not. bool mIsMemoryProfilingEnabled; // Makes sure only one collection is running at any given time. mutable Mutex mMutex; // Info for the boot-time collection event. The cache is persisted until system shutdown/reboot // or a wake-up collection occurs. CollectionInfo mBoottimeCollection GUARDED_BY(mMutex); // Info for the periodic collection event. The cache size is limited by // |ro.carwatchdog.periodic_collection_buffer_size|. CollectionInfo mPeriodicCollection GUARDED_BY(mMutex); // Cache for user switch collection events. Events are cached from oldest to newest. std::vector mUserSwitchCollections GUARDED_BY(mMutex); // Info for the wake-up collection event. Only the latest wake-up collection is cached. CollectionInfo mWakeUpCollection GUARDED_BY(mMutex); // Info for the custom collection event. The info is cleared at the end of every custom // collection. CollectionInfo mCustomCollection GUARDED_BY(mMutex); // Major faults delta from last collection. Useful when calculating the percentage change in // major faults since last collection. uint64_t mLastMajorFaults GUARDED_BY(mMutex); // Enables the sending of resource usage stats to CarService. bool mDoSendResourceUsageStats GUARDED_BY(mMutex); // Aggregated pressure level changes occurred since the last collection. PressureLevelDeltaInfo mMemoryPressureLevelDeltaInfo GUARDED_BY(mMutex); friend class WatchdogPerfService; // For unit tests. friend class internal::PerformanceProfilerPeer; }; } // namespace watchdog } // namespace automotive } // namespace android