xref: /aosp_15_r20/external/cronet/components/metrics/file_metrics_provider.h (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2016 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef COMPONENTS_METRICS_FILE_METRICS_PROVIDER_H_
6 #define COMPONENTS_METRICS_FILE_METRICS_PROVIDER_H_
7 
8 #include <stddef.h>
9 
10 #include <list>
11 #include <memory>
12 #include <vector>
13 
14 #include "base/files/file_path.h"
15 #include "base/functional/callback_forward.h"
16 #include "base/gtest_prod_util.h"
17 #include "base/memory/raw_ptr.h"
18 #include "base/memory/scoped_refptr.h"
19 #include "base/memory/weak_ptr.h"
20 #include "base/metrics/statistics_recorder.h"
21 #include "base/sequence_checker.h"
22 #include "base/time/time.h"
23 #include "components/metrics/metrics_provider.h"
24 
25 class PrefRegistrySimple;
26 class PrefService;
27 
28 namespace metrics {
29 
30 // FileMetricsProvider gathers and logs histograms written to files on disk.
31 // Any number of files can be registered and will be polled once per upload
32 // cycle (at startup and periodically thereafter -- about every 30 minutes
33 // for desktop) for data to send.
34 class FileMetricsProvider : public MetricsProvider,
35                             public base::StatisticsRecorder::HistogramProvider {
36  public:
37   struct Params;
38 
39   enum SourceType {
40     // "Atomic" files are a collection of histograms that are written
41     // completely in a single atomic operation (typically a write followed
42     // by an atomic rename) and the file is never updated again except to
43     // be replaced by a completely new set of histograms. This is the only
44     // option that can be used if the file is not writeable by *this*
45     // process. Once the file has been read, an attempt will be made to
46     // delete it thus providing some measure of safety should different
47     // instantiations (such as by different users of a system-level install)
48     // try to read it. In case the delete operation fails, this class
49     // persistently tracks the last-modified time of the file so it will
50     // not be read a second time.
51     SOURCE_HISTOGRAMS_ATOMIC_FILE,
52 
53     // A directory of atomic PMA files. This handles a directory in which
54     // files of metrics are atomically added. Only files ending with ".pma"
55     // will be read. They are read according to their last-modified time and
56     // never read more that once (unless they change). Only one file will
57     // be read per reporting cycle. Filenames that start with a dot (.) or
58     // an underscore (_) are ignored so temporary files (perhaps created by
59     // the ImportantFileWriter) will not get read. Files that have been
60     // read will be attempted to be deleted; should those files not be
61     // deletable by this process, it is the reponsibility of the producer
62     // to keep the directory pruned in some manner. Added files must have a
63     // timestamp later (not the same or earlier) than the newest file that
64     // already exists or it may be assumed to have been already uploaded.
65     SOURCE_HISTOGRAMS_ATOMIC_DIR,
66 
67     // "Active" files may be open by one or more other processes and updated
68     // at any time with new samples or new histograms. Such files may also be
69     // inactive for any period of time only to be opened again and have new
70     // data written to them. The file should probably never be deleted because
71     // there would be no guarantee that the data has been reported.
72     SOURCE_HISTOGRAMS_ACTIVE_FILE,
73   };
74 
75   enum SourceAssociation {
76     // Associates the metrics in the file with the current run of the browser.
77     // The reporting will take place as part of the normal logging of
78     // histograms.
79     ASSOCIATE_CURRENT_RUN,
80 
81     // Associates the metrics in the file with the previous run of the browesr.
82     // The reporting will take place as part of the "stability" histograms.
83     // This is important when metrics are dumped as part of a crash of the
84     // previous run. This can only be used with FILE_HISTOGRAMS_ATOMIC.
85     ASSOCIATE_PREVIOUS_RUN,
86 
87     // Associates the metrics in the file with the a profile embedded in the
88     // same file. The reporting will take place at a convenient time after
89     // startup when the browser is otherwise idle. If there is no embedded
90     // system profile, these metrics will be lost.
91     ASSOCIATE_INTERNAL_PROFILE,
92 
93     // Like above but fall back to ASSOCIATE_PREVIOUS_RUN if there is no
94     // embedded profile. This has a small cost during startup as that is
95     // when previous-run metrics are sent so the file has be checked at
96     // that time even though actual transfer will be delayed if an
97     // embedded profile is found.
98     ASSOCIATE_INTERNAL_PROFILE_OR_PREVIOUS_RUN,
99 
100     // Used to only record the metadata of |ASSOCIATE_INTERNAL_PROFILE| but not
101     // merge the metrics. Instead, write metadata such as the samples count etc,
102     // to prefs then delete file. To precisely simulate the
103     // |ASSOCIATE_INTERNAL_PROFILE| behavior, one file record will be read out
104     // and added to the stability prefs each time the metrics service requests
105     // the |ASSOCIATE_INTERNAL_PROFILE| source metrics. Finally, the results
106     // will be recoreded as stability metrics in the next run.
107     ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER,
108   };
109 
110   enum FilterAction {
111     // Process this file normally.
112     FILTER_PROCESS_FILE,
113 
114     // This file is the active metrics file for the current process.  Don't
115     // do anything with it. This is effectively "try later" but isn't
116     // added to the results histogram because the file has to be ignored
117     // throughout the life of the browser and that skews the distribution.
118     FILTER_ACTIVE_THIS_PID,
119 
120     // Try again. This could happen within milliseconds or minutes but no other
121     // files from the same source will get processed in between. The process
122     // must have permission to "touch" the file and alter its last-modified
123     // time because files are always processed in order of those stamps.
124     FILTER_TRY_LATER,
125 
126     // Skip this file. This file will not be processed until it has changed
127     // (i.e. had its last-modifided time updated). If it is "atomic", an
128     // attempt will be made to delete it.
129     FILTER_SKIP_FILE,
130   };
131 
132   // A "filter" can be defined to determine what to do on a per-file basis.
133   // This is called only after a file has been found to be the next one to
134   // be processed so it's okay if filter calls are relatively expensive.
135   // Calls are made on a background thread of low-priority and capable of
136   // doing I/O.
137   using FilterCallback =
138       base::RepeatingCallback<FilterAction(const base::FilePath& path)>;
139 
140   // Parameters for RegisterSource, defined as a structure to allow new
141   // ones to be added (with default values) that doesn't require changes
142   // to all call sites.
143   struct Params {
144     Params(const base::FilePath& path,
145            SourceType type,
146            SourceAssociation association,
147            base::StringPiece prefs_key = base::StringPiece());
148 
149     ~Params();
150 
151     // The standard parameters, set during construction.
152     const base::FilePath path;
153     const SourceType type;
154     const SourceAssociation association;
155     const base::StringPiece prefs_key;
156 
157     // Other parameters that can be set after construction.
158     FilterCallback filter;       // Run-time check for what to do with file.
159     base::TimeDelta max_age;     // Maximum age of a file (0=unlimited).
160     size_t max_dir_kib = 0;      // Maximum bytes in a directory (0=inf).
161     size_t max_dir_files = 100;  // Maximum files in a directory (0=inf).
162   };
163 
164   explicit FileMetricsProvider(PrefService* local_state);
165 
166   FileMetricsProvider(const FileMetricsProvider&) = delete;
167   FileMetricsProvider& operator=(const FileMetricsProvider&) = delete;
168 
169   ~FileMetricsProvider() override;
170 
171   // Indicates a file or directory to be monitored and how the file or files
172   // within that directory are used. Because some metadata may need to persist
173   // across process restarts, preferences entries are used based on the
174   // |prefs_key| name. Call RegisterSourcePrefs() with the same name to create
175   // the necessary keys in advance. Set |prefs_key| empty (nullptr will work) if
176   // no persistence is required. ACTIVE files shouldn't have a pref key as
177   // they update internal state about what has been previously sent.
178   void RegisterSource(const Params& params);
179 
180   // Registers all necessary preferences for maintaining persistent state
181   // about a monitored file across process restarts. The |prefs_key| is
182   // typically the filename.
183   static void RegisterSourcePrefs(PrefRegistrySimple* prefs,
184                                   const base::StringPiece prefs_key);
185 
186   static void RegisterPrefs(PrefRegistrySimple* prefs);
187 
188  private:
189   friend class FileMetricsProviderTest;
190   friend class TestFileMetricsProvider;
191 
192   // The different results that can occur accessing a file.
193   enum AccessResult {
194     // File was successfully mapped.
195     ACCESS_RESULT_SUCCESS,
196 
197     // File does not exist.
198     ACCESS_RESULT_DOESNT_EXIST,
199 
200     // File exists but not modified since last read.
201     ACCESS_RESULT_NOT_MODIFIED,
202 
203     // File is not valid: is a directory or zero-size.
204     ACCESS_RESULT_INVALID_FILE,
205 
206     // System could not map file into memory.
207     ACCESS_RESULT_SYSTEM_MAP_FAILURE,
208 
209     // File had invalid contents.
210     ACCESS_RESULT_INVALID_CONTENTS,
211 
212     // File could not be opened.
213     ACCESS_RESULT_NO_OPEN,
214 
215     // File contents were internally deleted.
216     ACCESS_RESULT_MEMORY_DELETED,
217 
218     // File is scheduled to be tried again later.
219     ACCESS_RESULT_FILTER_TRY_LATER,
220 
221     // File was skipped according to filtering rules.
222     ACCESS_RESULT_FILTER_SKIP_FILE,
223 
224     // File was skipped because it exceeds the maximum age.
225     ACCESS_RESULT_TOO_OLD,
226 
227     // File was skipped because too many files in directory.
228     ACCESS_RESULT_TOO_MANY_FILES,
229 
230     // File was skipped because too many bytes in directory.
231     ACCESS_RESULT_TOO_MANY_BYTES,
232 
233     // The file was skipped because it's being written by this process.
234     ACCESS_RESULT_THIS_PID,
235 
236     // The file had no embedded system profile.
237     ACCESS_RESULT_NO_PROFILE,
238 
239     // The file had internal data corruption.
240     ACCESS_RESULT_DATA_CORRUPTION,
241 
242     // The file is not writable when it should be.
243     ACCESS_RESULT_NOT_WRITABLE,
244 
245     ACCESS_RESULT_MAX
246   };
247 
248   // Information about sources being monitored; defined and used exclusively
249   // inside the .cc file.
250   struct SourceInfo;
251   using SourceInfoList = std::list<std::unique_ptr<SourceInfo>>;
252 
253   // Records an access result in a histogram.
254   static void RecordAccessResult(AccessResult result);
255 
256   // Looks for the next file to read within a directory. Returns true if a
257   // file was found. This is part of CheckAndMapNewMetricSourcesOnTaskRunner
258   // and so runs on an thread capable of I/O. The |source| structure will
259   // be internally updated to indicate the next file to be read.
260   static bool LocateNextFileInDirectory(SourceInfo* source);
261 
262   // Handles the completion of a source.
263   static void FinishedWithSource(SourceInfo* source, AccessResult result);
264 
265   // Checks a list of sources (on a task-runner allowed to do I/O) and merge
266   // any data found within them.
267   // Returns a list of histogram sample counts for sources of type
268   // ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER that were processed.
269   static std::vector<size_t> CheckAndMergeMetricSourcesOnTaskRunner(
270       SourceInfoList* sources);
271 
272   // Checks a single source and maps it into memory.
273   static AccessResult CheckAndMapMetricSource(SourceInfo* source);
274 
275   // Merges all of the histograms from a |source| to the StatisticsRecorder.
276   // Returns the number of histograms merged.
277   static size_t MergeHistogramDeltasFromSource(SourceInfo* source);
278 
279   // Records all histograms from a given source via a snapshot-manager. Only the
280   // histograms that have |required_flags| will be recorded.
281   static void RecordHistogramSnapshotsFromSource(
282       base::HistogramSnapshotManager* snapshot_manager,
283       SourceInfo* source,
284       base::HistogramBase::Flags required_flags);
285 
286   // Calls source filter (if any) and returns the desired action.
287   static AccessResult HandleFilterSource(SourceInfo* source,
288                                          const base::FilePath& path);
289 
290   // The part of ProvideIndependentMetrics that runs as a background task.
291   static bool ProvideIndependentMetricsOnTaskRunner(
292       SourceInfo* source,
293       ChromeUserMetricsExtension* uma_proto,
294       base::HistogramSnapshotManager* snapshot_manager,
295       base::OnceClosure serialize_log_callback);
296 
297   // Collects the metadata of the |source|.
298   // Returns the number of histogram samples from that source.
299   static size_t CollectFileMetadataFromSource(SourceInfo* source);
300 
301   // Appends the samples count to pref on UI thread.
302   void AppendToSamplesCountPref(std::vector<size_t> samples_count);
303 
304   // Creates a task to check all monitored sources for updates.
305   void ScheduleSourcesCheck();
306 
307   // Takes a list of sources checked by an external task and determines what
308   // to do with each. Virtual for testing.
309   virtual void RecordSourcesChecked(SourceInfoList* checked,
310                                     std::vector<size_t> samples_counts);
311 
312   // Schedules the deletion of a file in the background using the task-runner.
313   void DeleteFileAsync(const base::FilePath& path);
314 
315   // Updates the persistent state information to show a source as being read.
316   void RecordSourceAsRead(SourceInfo* source);
317 
318   // metrics::MetricsProvider:
319   void OnDidCreateMetricsLog() override;
320   bool HasIndependentMetrics() override;
321   void ProvideIndependentMetrics(
322       base::OnceClosure serialize_log_callback,
323       base::OnceCallback<void(bool)> done_callback,
324       ChromeUserMetricsExtension* uma_proto,
325       base::HistogramSnapshotManager* snapshot_manager) override;
326   bool HasPreviousSessionData() override;
327   void RecordInitialHistogramSnapshots(
328       base::HistogramSnapshotManager* snapshot_manager) override;
329 
330   // base::StatisticsRecorder::HistogramProvider:
331   void MergeHistogramDeltas(bool async,
332                             base::OnceClosure done_callback) override;
333 
334   // The part of ProvideIndependentMetrics that runs after background task.
335   void ProvideIndependentMetricsCleanup(
336       base::OnceCallback<void(bool)> done_callback,
337       std::unique_ptr<SourceInfo> source,
338       bool success);
339 
340   // Simulates the independent metrics to read the first item from
341   // kMetricsBrowserMetricsMetadata and updates the stability prefs accordingly,
342   // return true if the pref isn't empty.
343   bool SimulateIndependentMetrics();
344 
345   // A list of sources not currently active that need to be checked for changes.
346   SourceInfoList sources_to_check_;
347 
348   // A list of currently active sources to be merged when required.
349   SourceInfoList sources_mapped_;
350 
351   // A list of currently active sources to be merged when required.
352   SourceInfoList sources_with_profile_;
353 
354   // A list of sources for a previous run. These are held separately because
355   // they are not subject to the periodic background checking that handles
356   // metrics for the current run.
357   SourceInfoList sources_for_previous_run_;
358 
359   // The preferences-service used to store persistent state about sources.
360   raw_ptr<PrefService> pref_service_;
361 
362   SEQUENCE_CHECKER(sequence_checker_);
363   base::WeakPtrFactory<FileMetricsProvider> weak_factory_{this};
364 };
365 
366 }  // namespace metrics
367 
368 #endif  // COMPONENTS_METRICS_FILE_METRICS_PROVIDER_H_
369