1 // Copyright 2016 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef COMPONENTS_METRICS_FILE_METRICS_PROVIDER_H_ 6 #define COMPONENTS_METRICS_FILE_METRICS_PROVIDER_H_ 7 8 #include <stddef.h> 9 10 #include <list> 11 #include <memory> 12 #include <vector> 13 14 #include "base/files/file_path.h" 15 #include "base/functional/callback_forward.h" 16 #include "base/gtest_prod_util.h" 17 #include "base/memory/raw_ptr.h" 18 #include "base/memory/scoped_refptr.h" 19 #include "base/memory/weak_ptr.h" 20 #include "base/metrics/statistics_recorder.h" 21 #include "base/sequence_checker.h" 22 #include "base/time/time.h" 23 #include "components/metrics/metrics_provider.h" 24 25 class PrefRegistrySimple; 26 class PrefService; 27 28 namespace metrics { 29 30 // FileMetricsProvider gathers and logs histograms written to files on disk. 31 // Any number of files can be registered and will be polled once per upload 32 // cycle (at startup and periodically thereafter -- about every 30 minutes 33 // for desktop) for data to send. 34 class FileMetricsProvider : public MetricsProvider, 35 public base::StatisticsRecorder::HistogramProvider { 36 public: 37 struct Params; 38 39 enum SourceType { 40 // "Atomic" files are a collection of histograms that are written 41 // completely in a single atomic operation (typically a write followed 42 // by an atomic rename) and the file is never updated again except to 43 // be replaced by a completely new set of histograms. This is the only 44 // option that can be used if the file is not writeable by *this* 45 // process. Once the file has been read, an attempt will be made to 46 // delete it thus providing some measure of safety should different 47 // instantiations (such as by different users of a system-level install) 48 // try to read it. In case the delete operation fails, this class 49 // persistently tracks the last-modified time of the file so it will 50 // not be read a second time. 51 SOURCE_HISTOGRAMS_ATOMIC_FILE, 52 53 // A directory of atomic PMA files. This handles a directory in which 54 // files of metrics are atomically added. Only files ending with ".pma" 55 // will be read. They are read according to their last-modified time and 56 // never read more that once (unless they change). Only one file will 57 // be read per reporting cycle. Filenames that start with a dot (.) or 58 // an underscore (_) are ignored so temporary files (perhaps created by 59 // the ImportantFileWriter) will not get read. Files that have been 60 // read will be attempted to be deleted; should those files not be 61 // deletable by this process, it is the reponsibility of the producer 62 // to keep the directory pruned in some manner. Added files must have a 63 // timestamp later (not the same or earlier) than the newest file that 64 // already exists or it may be assumed to have been already uploaded. 65 SOURCE_HISTOGRAMS_ATOMIC_DIR, 66 67 // "Active" files may be open by one or more other processes and updated 68 // at any time with new samples or new histograms. Such files may also be 69 // inactive for any period of time only to be opened again and have new 70 // data written to them. The file should probably never be deleted because 71 // there would be no guarantee that the data has been reported. 72 SOURCE_HISTOGRAMS_ACTIVE_FILE, 73 }; 74 75 enum SourceAssociation { 76 // Associates the metrics in the file with the current run of the browser. 77 // The reporting will take place as part of the normal logging of 78 // histograms. 79 ASSOCIATE_CURRENT_RUN, 80 81 // Associates the metrics in the file with the previous run of the browesr. 82 // The reporting will take place as part of the "stability" histograms. 83 // This is important when metrics are dumped as part of a crash of the 84 // previous run. This can only be used with FILE_HISTOGRAMS_ATOMIC. 85 ASSOCIATE_PREVIOUS_RUN, 86 87 // Associates the metrics in the file with the a profile embedded in the 88 // same file. The reporting will take place at a convenient time after 89 // startup when the browser is otherwise idle. If there is no embedded 90 // system profile, these metrics will be lost. 91 ASSOCIATE_INTERNAL_PROFILE, 92 93 // Like above but fall back to ASSOCIATE_PREVIOUS_RUN if there is no 94 // embedded profile. This has a small cost during startup as that is 95 // when previous-run metrics are sent so the file has be checked at 96 // that time even though actual transfer will be delayed if an 97 // embedded profile is found. 98 ASSOCIATE_INTERNAL_PROFILE_OR_PREVIOUS_RUN, 99 100 // Used to only record the metadata of |ASSOCIATE_INTERNAL_PROFILE| but not 101 // merge the metrics. Instead, write metadata such as the samples count etc, 102 // to prefs then delete file. To precisely simulate the 103 // |ASSOCIATE_INTERNAL_PROFILE| behavior, one file record will be read out 104 // and added to the stability prefs each time the metrics service requests 105 // the |ASSOCIATE_INTERNAL_PROFILE| source metrics. Finally, the results 106 // will be recoreded as stability metrics in the next run. 107 ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER, 108 }; 109 110 enum FilterAction { 111 // Process this file normally. 112 FILTER_PROCESS_FILE, 113 114 // This file is the active metrics file for the current process. Don't 115 // do anything with it. This is effectively "try later" but isn't 116 // added to the results histogram because the file has to be ignored 117 // throughout the life of the browser and that skews the distribution. 118 FILTER_ACTIVE_THIS_PID, 119 120 // Try again. This could happen within milliseconds or minutes but no other 121 // files from the same source will get processed in between. The process 122 // must have permission to "touch" the file and alter its last-modified 123 // time because files are always processed in order of those stamps. 124 FILTER_TRY_LATER, 125 126 // Skip this file. This file will not be processed until it has changed 127 // (i.e. had its last-modifided time updated). If it is "atomic", an 128 // attempt will be made to delete it. 129 FILTER_SKIP_FILE, 130 }; 131 132 // A "filter" can be defined to determine what to do on a per-file basis. 133 // This is called only after a file has been found to be the next one to 134 // be processed so it's okay if filter calls are relatively expensive. 135 // Calls are made on a background thread of low-priority and capable of 136 // doing I/O. 137 using FilterCallback = 138 base::RepeatingCallback<FilterAction(const base::FilePath& path)>; 139 140 // Parameters for RegisterSource, defined as a structure to allow new 141 // ones to be added (with default values) that doesn't require changes 142 // to all call sites. 143 struct Params { 144 Params(const base::FilePath& path, 145 SourceType type, 146 SourceAssociation association, 147 base::StringPiece prefs_key = base::StringPiece()); 148 149 ~Params(); 150 151 // The standard parameters, set during construction. 152 const base::FilePath path; 153 const SourceType type; 154 const SourceAssociation association; 155 const base::StringPiece prefs_key; 156 157 // Other parameters that can be set after construction. 158 FilterCallback filter; // Run-time check for what to do with file. 159 base::TimeDelta max_age; // Maximum age of a file (0=unlimited). 160 size_t max_dir_kib = 0; // Maximum bytes in a directory (0=inf). 161 size_t max_dir_files = 100; // Maximum files in a directory (0=inf). 162 }; 163 164 explicit FileMetricsProvider(PrefService* local_state); 165 166 FileMetricsProvider(const FileMetricsProvider&) = delete; 167 FileMetricsProvider& operator=(const FileMetricsProvider&) = delete; 168 169 ~FileMetricsProvider() override; 170 171 // Indicates a file or directory to be monitored and how the file or files 172 // within that directory are used. Because some metadata may need to persist 173 // across process restarts, preferences entries are used based on the 174 // |prefs_key| name. Call RegisterSourcePrefs() with the same name to create 175 // the necessary keys in advance. Set |prefs_key| empty (nullptr will work) if 176 // no persistence is required. ACTIVE files shouldn't have a pref key as 177 // they update internal state about what has been previously sent. 178 void RegisterSource(const Params& params); 179 180 // Registers all necessary preferences for maintaining persistent state 181 // about a monitored file across process restarts. The |prefs_key| is 182 // typically the filename. 183 static void RegisterSourcePrefs(PrefRegistrySimple* prefs, 184 const base::StringPiece prefs_key); 185 186 static void RegisterPrefs(PrefRegistrySimple* prefs); 187 188 private: 189 friend class FileMetricsProviderTest; 190 friend class TestFileMetricsProvider; 191 192 // The different results that can occur accessing a file. 193 enum AccessResult { 194 // File was successfully mapped. 195 ACCESS_RESULT_SUCCESS, 196 197 // File does not exist. 198 ACCESS_RESULT_DOESNT_EXIST, 199 200 // File exists but not modified since last read. 201 ACCESS_RESULT_NOT_MODIFIED, 202 203 // File is not valid: is a directory or zero-size. 204 ACCESS_RESULT_INVALID_FILE, 205 206 // System could not map file into memory. 207 ACCESS_RESULT_SYSTEM_MAP_FAILURE, 208 209 // File had invalid contents. 210 ACCESS_RESULT_INVALID_CONTENTS, 211 212 // File could not be opened. 213 ACCESS_RESULT_NO_OPEN, 214 215 // File contents were internally deleted. 216 ACCESS_RESULT_MEMORY_DELETED, 217 218 // File is scheduled to be tried again later. 219 ACCESS_RESULT_FILTER_TRY_LATER, 220 221 // File was skipped according to filtering rules. 222 ACCESS_RESULT_FILTER_SKIP_FILE, 223 224 // File was skipped because it exceeds the maximum age. 225 ACCESS_RESULT_TOO_OLD, 226 227 // File was skipped because too many files in directory. 228 ACCESS_RESULT_TOO_MANY_FILES, 229 230 // File was skipped because too many bytes in directory. 231 ACCESS_RESULT_TOO_MANY_BYTES, 232 233 // The file was skipped because it's being written by this process. 234 ACCESS_RESULT_THIS_PID, 235 236 // The file had no embedded system profile. 237 ACCESS_RESULT_NO_PROFILE, 238 239 // The file had internal data corruption. 240 ACCESS_RESULT_DATA_CORRUPTION, 241 242 // The file is not writable when it should be. 243 ACCESS_RESULT_NOT_WRITABLE, 244 245 ACCESS_RESULT_MAX 246 }; 247 248 // Information about sources being monitored; defined and used exclusively 249 // inside the .cc file. 250 struct SourceInfo; 251 using SourceInfoList = std::list<std::unique_ptr<SourceInfo>>; 252 253 // Records an access result in a histogram. 254 static void RecordAccessResult(AccessResult result); 255 256 // Looks for the next file to read within a directory. Returns true if a 257 // file was found. This is part of CheckAndMapNewMetricSourcesOnTaskRunner 258 // and so runs on an thread capable of I/O. The |source| structure will 259 // be internally updated to indicate the next file to be read. 260 static bool LocateNextFileInDirectory(SourceInfo* source); 261 262 // Handles the completion of a source. 263 static void FinishedWithSource(SourceInfo* source, AccessResult result); 264 265 // Checks a list of sources (on a task-runner allowed to do I/O) and merge 266 // any data found within them. 267 // Returns a list of histogram sample counts for sources of type 268 // ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER that were processed. 269 static std::vector<size_t> CheckAndMergeMetricSourcesOnTaskRunner( 270 SourceInfoList* sources); 271 272 // Checks a single source and maps it into memory. 273 static AccessResult CheckAndMapMetricSource(SourceInfo* source); 274 275 // Merges all of the histograms from a |source| to the StatisticsRecorder. 276 // Returns the number of histograms merged. 277 static size_t MergeHistogramDeltasFromSource(SourceInfo* source); 278 279 // Records all histograms from a given source via a snapshot-manager. Only the 280 // histograms that have |required_flags| will be recorded. 281 static void RecordHistogramSnapshotsFromSource( 282 base::HistogramSnapshotManager* snapshot_manager, 283 SourceInfo* source, 284 base::HistogramBase::Flags required_flags); 285 286 // Calls source filter (if any) and returns the desired action. 287 static AccessResult HandleFilterSource(SourceInfo* source, 288 const base::FilePath& path); 289 290 // The part of ProvideIndependentMetrics that runs as a background task. 291 static bool ProvideIndependentMetricsOnTaskRunner( 292 SourceInfo* source, 293 ChromeUserMetricsExtension* uma_proto, 294 base::HistogramSnapshotManager* snapshot_manager, 295 base::OnceClosure serialize_log_callback); 296 297 // Collects the metadata of the |source|. 298 // Returns the number of histogram samples from that source. 299 static size_t CollectFileMetadataFromSource(SourceInfo* source); 300 301 // Appends the samples count to pref on UI thread. 302 void AppendToSamplesCountPref(std::vector<size_t> samples_count); 303 304 // Creates a task to check all monitored sources for updates. 305 void ScheduleSourcesCheck(); 306 307 // Takes a list of sources checked by an external task and determines what 308 // to do with each. Virtual for testing. 309 virtual void RecordSourcesChecked(SourceInfoList* checked, 310 std::vector<size_t> samples_counts); 311 312 // Schedules the deletion of a file in the background using the task-runner. 313 void DeleteFileAsync(const base::FilePath& path); 314 315 // Updates the persistent state information to show a source as being read. 316 void RecordSourceAsRead(SourceInfo* source); 317 318 // metrics::MetricsProvider: 319 void OnDidCreateMetricsLog() override; 320 bool HasIndependentMetrics() override; 321 void ProvideIndependentMetrics( 322 base::OnceClosure serialize_log_callback, 323 base::OnceCallback<void(bool)> done_callback, 324 ChromeUserMetricsExtension* uma_proto, 325 base::HistogramSnapshotManager* snapshot_manager) override; 326 bool HasPreviousSessionData() override; 327 void RecordInitialHistogramSnapshots( 328 base::HistogramSnapshotManager* snapshot_manager) override; 329 330 // base::StatisticsRecorder::HistogramProvider: 331 void MergeHistogramDeltas(bool async, 332 base::OnceClosure done_callback) override; 333 334 // The part of ProvideIndependentMetrics that runs after background task. 335 void ProvideIndependentMetricsCleanup( 336 base::OnceCallback<void(bool)> done_callback, 337 std::unique_ptr<SourceInfo> source, 338 bool success); 339 340 // Simulates the independent metrics to read the first item from 341 // kMetricsBrowserMetricsMetadata and updates the stability prefs accordingly, 342 // return true if the pref isn't empty. 343 bool SimulateIndependentMetrics(); 344 345 // A list of sources not currently active that need to be checked for changes. 346 SourceInfoList sources_to_check_; 347 348 // A list of currently active sources to be merged when required. 349 SourceInfoList sources_mapped_; 350 351 // A list of currently active sources to be merged when required. 352 SourceInfoList sources_with_profile_; 353 354 // A list of sources for a previous run. These are held separately because 355 // they are not subject to the periodic background checking that handles 356 // metrics for the current run. 357 SourceInfoList sources_for_previous_run_; 358 359 // The preferences-service used to store persistent state about sources. 360 raw_ptr<PrefService> pref_service_; 361 362 SEQUENCE_CHECKER(sequence_checker_); 363 base::WeakPtrFactory<FileMetricsProvider> weak_factory_{this}; 364 }; 365 366 } // namespace metrics 367 368 #endif // COMPONENTS_METRICS_FILE_METRICS_PROVIDER_H_ 369