// Copyright 2016 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef COMPONENTS_METRICS_FILE_METRICS_PROVIDER_H_ #define COMPONENTS_METRICS_FILE_METRICS_PROVIDER_H_ #include #include #include #include #include "base/files/file_path.h" #include "base/functional/callback_forward.h" #include "base/gtest_prod_util.h" #include "base/memory/raw_ptr.h" #include "base/memory/scoped_refptr.h" #include "base/memory/weak_ptr.h" #include "base/metrics/statistics_recorder.h" #include "base/sequence_checker.h" #include "base/time/time.h" #include "components/metrics/metrics_provider.h" class PrefRegistrySimple; class PrefService; namespace metrics { // FileMetricsProvider gathers and logs histograms written to files on disk. // Any number of files can be registered and will be polled once per upload // cycle (at startup and periodically thereafter -- about every 30 minutes // for desktop) for data to send. class FileMetricsProvider : public MetricsProvider, public base::StatisticsRecorder::HistogramProvider { public: struct Params; enum SourceType { // "Atomic" files are a collection of histograms that are written // completely in a single atomic operation (typically a write followed // by an atomic rename) and the file is never updated again except to // be replaced by a completely new set of histograms. This is the only // option that can be used if the file is not writeable by *this* // process. Once the file has been read, an attempt will be made to // delete it thus providing some measure of safety should different // instantiations (such as by different users of a system-level install) // try to read it. In case the delete operation fails, this class // persistently tracks the last-modified time of the file so it will // not be read a second time. SOURCE_HISTOGRAMS_ATOMIC_FILE, // A directory of atomic PMA files. This handles a directory in which // files of metrics are atomically added. Only files ending with ".pma" // will be read. They are read according to their last-modified time and // never read more that once (unless they change). Only one file will // be read per reporting cycle. Filenames that start with a dot (.) or // an underscore (_) are ignored so temporary files (perhaps created by // the ImportantFileWriter) will not get read. Files that have been // read will be attempted to be deleted; should those files not be // deletable by this process, it is the reponsibility of the producer // to keep the directory pruned in some manner. Added files must have a // timestamp later (not the same or earlier) than the newest file that // already exists or it may be assumed to have been already uploaded. SOURCE_HISTOGRAMS_ATOMIC_DIR, // "Active" files may be open by one or more other processes and updated // at any time with new samples or new histograms. Such files may also be // inactive for any period of time only to be opened again and have new // data written to them. The file should probably never be deleted because // there would be no guarantee that the data has been reported. SOURCE_HISTOGRAMS_ACTIVE_FILE, }; enum SourceAssociation { // Associates the metrics in the file with the current run of the browser. // The reporting will take place as part of the normal logging of // histograms. ASSOCIATE_CURRENT_RUN, // Associates the metrics in the file with the previous run of the browesr. // The reporting will take place as part of the "stability" histograms. // This is important when metrics are dumped as part of a crash of the // previous run. This can only be used with FILE_HISTOGRAMS_ATOMIC. ASSOCIATE_PREVIOUS_RUN, // Associates the metrics in the file with the a profile embedded in the // same file. The reporting will take place at a convenient time after // startup when the browser is otherwise idle. If there is no embedded // system profile, these metrics will be lost. ASSOCIATE_INTERNAL_PROFILE, // Like above but fall back to ASSOCIATE_PREVIOUS_RUN if there is no // embedded profile. This has a small cost during startup as that is // when previous-run metrics are sent so the file has be checked at // that time even though actual transfer will be delayed if an // embedded profile is found. ASSOCIATE_INTERNAL_PROFILE_OR_PREVIOUS_RUN, // Used to only record the metadata of |ASSOCIATE_INTERNAL_PROFILE| but not // merge the metrics. Instead, write metadata such as the samples count etc, // to prefs then delete file. To precisely simulate the // |ASSOCIATE_INTERNAL_PROFILE| behavior, one file record will be read out // and added to the stability prefs each time the metrics service requests // the |ASSOCIATE_INTERNAL_PROFILE| source metrics. Finally, the results // will be recoreded as stability metrics in the next run. ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER, }; enum FilterAction { // Process this file normally. FILTER_PROCESS_FILE, // This file is the active metrics file for the current process. Don't // do anything with it. This is effectively "try later" but isn't // added to the results histogram because the file has to be ignored // throughout the life of the browser and that skews the distribution. FILTER_ACTIVE_THIS_PID, // Try again. This could happen within milliseconds or minutes but no other // files from the same source will get processed in between. The process // must have permission to "touch" the file and alter its last-modified // time because files are always processed in order of those stamps. FILTER_TRY_LATER, // Skip this file. This file will not be processed until it has changed // (i.e. had its last-modifided time updated). If it is "atomic", an // attempt will be made to delete it. FILTER_SKIP_FILE, }; // A "filter" can be defined to determine what to do on a per-file basis. // This is called only after a file has been found to be the next one to // be processed so it's okay if filter calls are relatively expensive. // Calls are made on a background thread of low-priority and capable of // doing I/O. using FilterCallback = base::RepeatingCallback; // Parameters for RegisterSource, defined as a structure to allow new // ones to be added (with default values) that doesn't require changes // to all call sites. struct Params { Params(const base::FilePath& path, SourceType type, SourceAssociation association, base::StringPiece prefs_key = base::StringPiece()); ~Params(); // The standard parameters, set during construction. const base::FilePath path; const SourceType type; const SourceAssociation association; const base::StringPiece prefs_key; // Other parameters that can be set after construction. FilterCallback filter; // Run-time check for what to do with file. base::TimeDelta max_age; // Maximum age of a file (0=unlimited). size_t max_dir_kib = 0; // Maximum bytes in a directory (0=inf). size_t max_dir_files = 100; // Maximum files in a directory (0=inf). }; explicit FileMetricsProvider(PrefService* local_state); FileMetricsProvider(const FileMetricsProvider&) = delete; FileMetricsProvider& operator=(const FileMetricsProvider&) = delete; ~FileMetricsProvider() override; // Indicates a file or directory to be monitored and how the file or files // within that directory are used. Because some metadata may need to persist // across process restarts, preferences entries are used based on the // |prefs_key| name. Call RegisterSourcePrefs() with the same name to create // the necessary keys in advance. Set |prefs_key| empty (nullptr will work) if // no persistence is required. ACTIVE files shouldn't have a pref key as // they update internal state about what has been previously sent. void RegisterSource(const Params& params); // Registers all necessary preferences for maintaining persistent state // about a monitored file across process restarts. The |prefs_key| is // typically the filename. static void RegisterSourcePrefs(PrefRegistrySimple* prefs, const base::StringPiece prefs_key); static void RegisterPrefs(PrefRegistrySimple* prefs); private: friend class FileMetricsProviderTest; friend class TestFileMetricsProvider; // The different results that can occur accessing a file. enum AccessResult { // File was successfully mapped. ACCESS_RESULT_SUCCESS, // File does not exist. ACCESS_RESULT_DOESNT_EXIST, // File exists but not modified since last read. ACCESS_RESULT_NOT_MODIFIED, // File is not valid: is a directory or zero-size. ACCESS_RESULT_INVALID_FILE, // System could not map file into memory. ACCESS_RESULT_SYSTEM_MAP_FAILURE, // File had invalid contents. ACCESS_RESULT_INVALID_CONTENTS, // File could not be opened. ACCESS_RESULT_NO_OPEN, // File contents were internally deleted. ACCESS_RESULT_MEMORY_DELETED, // File is scheduled to be tried again later. ACCESS_RESULT_FILTER_TRY_LATER, // File was skipped according to filtering rules. ACCESS_RESULT_FILTER_SKIP_FILE, // File was skipped because it exceeds the maximum age. ACCESS_RESULT_TOO_OLD, // File was skipped because too many files in directory. ACCESS_RESULT_TOO_MANY_FILES, // File was skipped because too many bytes in directory. ACCESS_RESULT_TOO_MANY_BYTES, // The file was skipped because it's being written by this process. ACCESS_RESULT_THIS_PID, // The file had no embedded system profile. ACCESS_RESULT_NO_PROFILE, // The file had internal data corruption. ACCESS_RESULT_DATA_CORRUPTION, // The file is not writable when it should be. ACCESS_RESULT_NOT_WRITABLE, ACCESS_RESULT_MAX }; // Information about sources being monitored; defined and used exclusively // inside the .cc file. struct SourceInfo; using SourceInfoList = std::list>; // Records an access result in a histogram. static void RecordAccessResult(AccessResult result); // Looks for the next file to read within a directory. Returns true if a // file was found. This is part of CheckAndMapNewMetricSourcesOnTaskRunner // and so runs on an thread capable of I/O. The |source| structure will // be internally updated to indicate the next file to be read. static bool LocateNextFileInDirectory(SourceInfo* source); // Handles the completion of a source. static void FinishedWithSource(SourceInfo* source, AccessResult result); // Checks a list of sources (on a task-runner allowed to do I/O) and merge // any data found within them. // Returns a list of histogram sample counts for sources of type // ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER that were processed. static std::vector CheckAndMergeMetricSourcesOnTaskRunner( SourceInfoList* sources); // Checks a single source and maps it into memory. static AccessResult CheckAndMapMetricSource(SourceInfo* source); // Merges all of the histograms from a |source| to the StatisticsRecorder. // Returns the number of histograms merged. static size_t MergeHistogramDeltasFromSource(SourceInfo* source); // Records all histograms from a given source via a snapshot-manager. Only the // histograms that have |required_flags| will be recorded. static void RecordHistogramSnapshotsFromSource( base::HistogramSnapshotManager* snapshot_manager, SourceInfo* source, base::HistogramBase::Flags required_flags); // Calls source filter (if any) and returns the desired action. static AccessResult HandleFilterSource(SourceInfo* source, const base::FilePath& path); // The part of ProvideIndependentMetrics that runs as a background task. static bool ProvideIndependentMetricsOnTaskRunner( SourceInfo* source, ChromeUserMetricsExtension* uma_proto, base::HistogramSnapshotManager* snapshot_manager, base::OnceClosure serialize_log_callback); // Collects the metadata of the |source|. // Returns the number of histogram samples from that source. static size_t CollectFileMetadataFromSource(SourceInfo* source); // Appends the samples count to pref on UI thread. void AppendToSamplesCountPref(std::vector samples_count); // Creates a task to check all monitored sources for updates. void ScheduleSourcesCheck(); // Takes a list of sources checked by an external task and determines what // to do with each. Virtual for testing. virtual void RecordSourcesChecked(SourceInfoList* checked, std::vector samples_counts); // Schedules the deletion of a file in the background using the task-runner. void DeleteFileAsync(const base::FilePath& path); // Updates the persistent state information to show a source as being read. void RecordSourceAsRead(SourceInfo* source); // metrics::MetricsProvider: void OnDidCreateMetricsLog() override; bool HasIndependentMetrics() override; void ProvideIndependentMetrics( base::OnceClosure serialize_log_callback, base::OnceCallback done_callback, ChromeUserMetricsExtension* uma_proto, base::HistogramSnapshotManager* snapshot_manager) override; bool HasPreviousSessionData() override; void RecordInitialHistogramSnapshots( base::HistogramSnapshotManager* snapshot_manager) override; // base::StatisticsRecorder::HistogramProvider: void MergeHistogramDeltas(bool async, base::OnceClosure done_callback) override; // The part of ProvideIndependentMetrics that runs after background task. void ProvideIndependentMetricsCleanup( base::OnceCallback done_callback, std::unique_ptr source, bool success); // Simulates the independent metrics to read the first item from // kMetricsBrowserMetricsMetadata and updates the stability prefs accordingly, // return true if the pref isn't empty. bool SimulateIndependentMetrics(); // A list of sources not currently active that need to be checked for changes. SourceInfoList sources_to_check_; // A list of currently active sources to be merged when required. SourceInfoList sources_mapped_; // A list of currently active sources to be merged when required. SourceInfoList sources_with_profile_; // A list of sources for a previous run. These are held separately because // they are not subject to the periodic background checking that handles // metrics for the current run. SourceInfoList sources_for_previous_run_; // The preferences-service used to store persistent state about sources. raw_ptr pref_service_; SEQUENCE_CHECKER(sequence_checker_); base::WeakPtrFactory weak_factory_{this}; }; } // namespace metrics #endif // COMPONENTS_METRICS_FILE_METRICS_PROVIDER_H_