xref: /aosp_15_r20/external/cronet/components/metrics/file_metrics_provider.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2016 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/metrics/file_metrics_provider.h"
6 
7 #include <stddef.h>
8 
9 #include <memory>
10 #include <vector>
11 
12 #include "base/command_line.h"
13 #include "base/containers/flat_map.h"
14 #include "base/debug/crash_logging.h"
15 #include "base/feature_list.h"
16 #include "base/files/file.h"
17 #include "base/files/file_enumerator.h"
18 #include "base/files/file_util.h"
19 #include "base/files/memory_mapped_file.h"
20 #include "base/functional/bind.h"
21 #include "base/logging.h"
22 #include "base/metrics/histogram_base.h"
23 #include "base/metrics/histogram_functions.h"
24 #include "base/metrics/histogram_macros.h"
25 #include "base/metrics/persistent_histogram_allocator.h"
26 #include "base/metrics/persistent_memory_allocator.h"
27 #include "base/metrics/ranges_manager.h"
28 #include "base/strings/string_piece.h"
29 #include "base/strings/stringprintf.h"
30 #include "base/task/task_traits.h"
31 #include "base/task/thread_pool.h"
32 #include "base/time/time.h"
33 #include "components/metrics/metrics_features.h"
34 #include "components/metrics/metrics_log.h"
35 #include "components/metrics/metrics_pref_names.h"
36 #include "components/metrics/metrics_service.h"
37 #include "components/metrics/persistent_histograms.h"
38 #include "components/metrics/persistent_system_profile.h"
39 #include "components/prefs/pref_registry_simple.h"
40 #include "components/prefs/pref_service.h"
41 #include "components/prefs/scoped_user_pref_update.h"
42 
43 namespace metrics {
44 namespace {
45 
46 // These structures provide values used to define how files are opened and
47 // accessed. It obviates the need for multiple code-paths within several of
48 // the methods.
49 struct SourceOptions {
50   // The flags to be used to open a file on disk.
51   int file_open_flags;
52 
53   // The access mode to be used when mapping a file into memory.
54   base::MemoryMappedFile::Access memory_mapped_access;
55 
56   // Indicates if the file is to be accessed read-only.
57   bool is_read_only;
58 };
59 
60 // Opening a file typically requires at least these flags.
61 constexpr int STD_OPEN = base::File::FLAG_OPEN | base::File::FLAG_READ;
62 
63 constexpr SourceOptions kSourceOptions[] = {
64     // SOURCE_HISTOGRAMS_ATOMIC_FILE
65     {
66         // Ensure that no other process reads this at the same time.
67         STD_OPEN | base::File::FLAG_WIN_EXCLUSIVE_READ,
68         base::MemoryMappedFile::READ_ONLY,
69         true,
70     },
71     // SOURCE_HISTOGRAMS_ATOMIC_DIR
72     {
73         // Ensure that no other process reads this at the same time.
74         STD_OPEN | base::File::FLAG_WIN_EXCLUSIVE_READ,
75         base::MemoryMappedFile::READ_ONLY,
76         true,
77     },
78     // SOURCE_HISTOGRAMS_ACTIVE_FILE
79     {
80         // Allow writing to the file. This is needed so we can keep track of
81         // deltas that have been uploaded (by modifying the file), while the
82         // file may still be open by an external process (e.g. Crashpad).
83         STD_OPEN | base::File::FLAG_WRITE,
84         base::MemoryMappedFile::READ_WRITE,
85         false,
86     },
87 };
88 
DeleteFileWhenPossible(const base::FilePath & path)89 void DeleteFileWhenPossible(const base::FilePath& path) {
90   // Open (with delete) and then immediately close the file by going out of
91   // scope. This is the only cross-platform safe way to delete a file that may
92   // be open elsewhere, a distinct possibility given the asynchronous nature
93   // of the delete task.
94   base::File file(path, base::File::FLAG_OPEN | base::File::FLAG_READ |
95                             base::File::FLAG_DELETE_ON_CLOSE);
96 }
97 
98 }  // namespace
99 
100 // This structure stores all the information about the sources being monitored
101 // and their current reporting state.
102 struct FileMetricsProvider::SourceInfo {
SourceInfometrics::FileMetricsProvider::SourceInfo103   explicit SourceInfo(const Params& params)
104       : type(params.type),
105         association(params.association),
106         prefs_key(params.prefs_key),
107         filter(params.filter),
108         max_age(params.max_age),
109         max_dir_kib(params.max_dir_kib),
110         max_dir_files(params.max_dir_files) {
111     switch (type) {
112       case SOURCE_HISTOGRAMS_ACTIVE_FILE:
113         DCHECK(prefs_key.empty());
114         [[fallthrough]];
115       case SOURCE_HISTOGRAMS_ATOMIC_FILE:
116         path = params.path;
117         break;
118       case SOURCE_HISTOGRAMS_ATOMIC_DIR:
119         directory = params.path;
120         break;
121     }
122   }
123 
124   SourceInfo(const SourceInfo&) = delete;
125   SourceInfo& operator=(const SourceInfo&) = delete;
126 
~SourceInfometrics::FileMetricsProvider::SourceInfo127   ~SourceInfo() {}
128 
129   struct FoundFile {
130     base::FilePath path;
131     base::FileEnumerator::FileInfo info;
132   };
133   using FoundFiles = base::flat_map<base::Time, FoundFile>;
134 
135   // How to access this source (file/dir, atomic/active).
136   const SourceType type;
137 
138   // With what run this source is associated.
139   const SourceAssociation association;
140 
141   // Where on disk the directory is located. This will only be populated when
142   // a directory is being monitored.
143   base::FilePath directory;
144 
145   // The files found in the above directory, ordered by last-modified.
146   std::unique_ptr<FoundFiles> found_files;
147 
148   // Where on disk the file is located. If a directory is being monitored,
149   // this will be updated for whatever file is being read.
150   base::FilePath path;
151 
152   // Name used inside prefs to persistent metadata.
153   std::string prefs_key;
154 
155   // The filter callback for determining what to do with found files.
156   FilterCallback filter;
157 
158   // The maximum allowed age of a file.
159   base::TimeDelta max_age;
160 
161   // The maximum allowed bytes in a directory.
162   size_t max_dir_kib;
163 
164   // The maximum allowed files in a directory.
165   size_t max_dir_files;
166 
167   // The last-seen time of this source to detect change.
168   base::Time last_seen;
169 
170   // Indicates if the data has been read out or not.
171   bool read_complete = false;
172 
173   // Once a file has been recognized as needing to be read, it is mapped
174   // into memory and assigned to an |allocator| object.
175   std::unique_ptr<base::PersistentHistogramAllocator> allocator;
176 };
177 
Params(const base::FilePath & path,SourceType type,SourceAssociation association,base::StringPiece prefs_key)178 FileMetricsProvider::Params::Params(const base::FilePath& path,
179                                     SourceType type,
180                                     SourceAssociation association,
181                                     base::StringPiece prefs_key)
182     : path(path), type(type), association(association), prefs_key(prefs_key) {}
183 
184 FileMetricsProvider::Params::~Params() = default;
185 
FileMetricsProvider(PrefService * local_state)186 FileMetricsProvider::FileMetricsProvider(PrefService* local_state)
187     : pref_service_(local_state) {
188   base::StatisticsRecorder::RegisterHistogramProvider(
189       weak_factory_.GetWeakPtr());
190 }
191 
192 FileMetricsProvider::~FileMetricsProvider() = default;
193 
RegisterSource(const Params & params)194 void FileMetricsProvider::RegisterSource(const Params& params) {
195   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
196 
197   // Ensure that kSourceOptions has been filled for this type.
198   DCHECK_GT(std::size(kSourceOptions), static_cast<size_t>(params.type));
199 
200   std::unique_ptr<SourceInfo> source(new SourceInfo(params));
201 
202   // |prefs_key| may be empty if the caller does not wish to persist the
203   // state across instances of the program.
204   if (pref_service_ && !params.prefs_key.empty()) {
205     source->last_seen = pref_service_->GetTime(
206         metrics::prefs::kMetricsLastSeenPrefix + source->prefs_key);
207   }
208 
209   switch (params.association) {
210     case ASSOCIATE_CURRENT_RUN:
211     case ASSOCIATE_INTERNAL_PROFILE:
212     case ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER:
213       sources_to_check_.push_back(std::move(source));
214       break;
215     case ASSOCIATE_PREVIOUS_RUN:
216     case ASSOCIATE_INTERNAL_PROFILE_OR_PREVIOUS_RUN:
217       DCHECK_EQ(SOURCE_HISTOGRAMS_ATOMIC_FILE, source->type);
218       sources_for_previous_run_.push_back(std::move(source));
219       break;
220   }
221 }
222 
223 // static
RegisterSourcePrefs(PrefRegistrySimple * prefs,const base::StringPiece prefs_key)224 void FileMetricsProvider::RegisterSourcePrefs(
225     PrefRegistrySimple* prefs,
226     const base::StringPiece prefs_key) {
227   prefs->RegisterInt64Pref(
228       metrics::prefs::kMetricsLastSeenPrefix + std::string(prefs_key), 0);
229 }
230 
231 //  static
RegisterPrefs(PrefRegistrySimple * prefs)232 void FileMetricsProvider::RegisterPrefs(PrefRegistrySimple* prefs) {
233   prefs->RegisterListPref(metrics::prefs::kMetricsFileMetricsMetadata);
234 }
235 
236 // static
RecordAccessResult(AccessResult result)237 void FileMetricsProvider::RecordAccessResult(AccessResult result) {
238   UMA_HISTOGRAM_ENUMERATION("UMA.FileMetricsProvider.AccessResult", result,
239                             ACCESS_RESULT_MAX);
240 }
241 
242 // static
LocateNextFileInDirectory(SourceInfo * source)243 bool FileMetricsProvider::LocateNextFileInDirectory(SourceInfo* source) {
244   DCHECK_EQ(SOURCE_HISTOGRAMS_ATOMIC_DIR, source->type);
245   DCHECK(!source->directory.empty());
246 
247   // Cumulative directory stats. These will remain zero if the directory isn't
248   // scanned but that's okay since any work they would cause to be done below
249   // would have been done during the first call where the directory was fully
250   // scanned.
251   size_t total_size_kib = 0;  // Using KiB allows 4TiB even on 32-bit builds.
252   size_t file_count = 0;
253 
254   base::Time now_time = base::Time::Now();
255   if (!source->found_files) {
256     source->found_files = std::make_unique<SourceInfo::FoundFiles>();
257     base::FileEnumerator file_iter(source->directory, /*recursive=*/false,
258                                    base::FileEnumerator::FILES);
259     SourceInfo::FoundFile found_file;
260 
261     // Open the directory and find all the files, remembering the last-modified
262     // time of each.
263     for (found_file.path = file_iter.Next(); !found_file.path.empty();
264          found_file.path = file_iter.Next()) {
265       found_file.info = file_iter.GetInfo();
266 
267       // Ignore directories.
268       if (found_file.info.IsDirectory())
269         continue;
270 
271       // Ignore temporary files.
272       base::FilePath::CharType first_character =
273           found_file.path.BaseName().value().front();
274       if (first_character == FILE_PATH_LITERAL('.') ||
275           first_character == FILE_PATH_LITERAL('_')) {
276         continue;
277       }
278 
279       // Ignore non-PMA (Persistent Memory Allocator) files.
280       if (found_file.path.Extension() !=
281           base::PersistentMemoryAllocator::kFileExtension) {
282         continue;
283       }
284 
285       // Process real files.
286       total_size_kib += found_file.info.GetSize() >> 10;
287       base::Time modified = found_file.info.GetLastModifiedTime();
288       if (modified > source->last_seen) {
289         // This file hasn't been read. Remember it (unless from the future).
290         if (modified <= now_time)
291           source->found_files->emplace(modified, std::move(found_file));
292         ++file_count;
293       } else {
294         // This file has been read. Try to delete it. Ignore any errors because
295         // the file may be un-removeable by this process. It could, for example,
296         // have been created by a privileged process like setup.exe. Even if it
297         // is not removed, it will continue to be ignored bacuse of the older
298         // modification time.
299         base::DeleteFile(found_file.path);
300       }
301     }
302   }
303 
304   // Filter files from the front until one is found for processing.
305   bool have_file = false;
306   while (!source->found_files->empty()) {
307     SourceInfo::FoundFile found =
308         std::move(source->found_files->begin()->second);
309     source->found_files->erase(source->found_files->begin());
310 
311     bool too_many =
312         source->max_dir_files > 0 && file_count > source->max_dir_files;
313     bool too_big =
314         source->max_dir_kib > 0 && total_size_kib > source->max_dir_kib;
315     bool too_old =
316         source->max_age != base::TimeDelta() &&
317         now_time - found.info.GetLastModifiedTime() > source->max_age;
318     if (too_many || too_big || too_old) {
319       base::DeleteFile(found.path);
320       --file_count;
321       total_size_kib -= found.info.GetSize() >> 10;
322       RecordAccessResult(too_many ? ACCESS_RESULT_TOO_MANY_FILES
323                                   : too_big ? ACCESS_RESULT_TOO_MANY_BYTES
324                                             : ACCESS_RESULT_TOO_OLD);
325       continue;
326     }
327 
328     AccessResult result = HandleFilterSource(source, found.path);
329     if (result == ACCESS_RESULT_SUCCESS) {
330       source->path = std::move(found.path);
331       have_file = true;
332       break;
333     }
334 
335     // Record the result. Success will be recorded by the caller.
336     if (result != ACCESS_RESULT_THIS_PID)
337       RecordAccessResult(result);
338   }
339 
340   return have_file;
341 }
342 
343 // static
FinishedWithSource(SourceInfo * source,AccessResult result)344 void FileMetricsProvider::FinishedWithSource(SourceInfo* source,
345                                              AccessResult result) {
346   // Different source types require different post-processing.
347   switch (source->type) {
348     case SOURCE_HISTOGRAMS_ATOMIC_FILE:
349     case SOURCE_HISTOGRAMS_ATOMIC_DIR:
350       // Done with this file so delete the allocator and its owned file.
351       source->allocator.reset();
352       // Remove the file if has been recorded. This prevents them from
353       // accumulating or also being recorded by different instances of
354       // the browser.
355       if (result == ACCESS_RESULT_SUCCESS ||
356           result == ACCESS_RESULT_NOT_MODIFIED ||
357           result == ACCESS_RESULT_MEMORY_DELETED ||
358           result == ACCESS_RESULT_TOO_OLD) {
359         DeleteFileWhenPossible(source->path);
360       }
361       break;
362     case SOURCE_HISTOGRAMS_ACTIVE_FILE:
363       // Keep the allocator open so it doesn't have to be re-mapped each
364       // time. This also allows the contents to be merged on-demand.
365       break;
366   }
367 }
368 
369 // static
CheckAndMergeMetricSourcesOnTaskRunner(SourceInfoList * sources)370 std::vector<size_t> FileMetricsProvider::CheckAndMergeMetricSourcesOnTaskRunner(
371     SourceInfoList* sources) {
372   // This method has all state information passed in |sources| and is intended
373   // to run on a worker thread rather than the UI thread.
374   std::vector<size_t> samples_counts;
375 
376   for (std::unique_ptr<SourceInfo>& source : *sources) {
377     AccessResult result;
378     do {
379       result = CheckAndMapMetricSource(source.get());
380 
381       // Some results are not reported in order to keep the dashboard clean.
382       if (result != ACCESS_RESULT_DOESNT_EXIST &&
383           result != ACCESS_RESULT_NOT_MODIFIED &&
384           result != ACCESS_RESULT_THIS_PID) {
385         RecordAccessResult(result);
386       }
387 
388       // If there are no files (or no more files) in this source, stop now.
389       if (result == ACCESS_RESULT_DOESNT_EXIST)
390         break;
391 
392       // Mapping was successful. Merge it.
393       if (result == ACCESS_RESULT_SUCCESS) {
394         // Metrics associated with internal profiles have to be fetched directly
395         // so just keep the mapping for use by the main thread.
396         if (source->association == ASSOCIATE_INTERNAL_PROFILE)
397           break;
398 
399         if (source->association == ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER) {
400           samples_counts.push_back(CollectFileMetadataFromSource(source.get()));
401         } else {
402           size_t histograms_count =
403               MergeHistogramDeltasFromSource(source.get());
404           if (!source->prefs_key.empty()) {
405             base::UmaHistogramCounts1000(
406                 base::StringPrintf(
407                     "UMA.FileMetricsProvider.%s.MergedHistogramsCount",
408                     source->prefs_key.c_str()),
409                 histograms_count);
410           }
411         }
412         DCHECK(source->read_complete);
413       }
414 
415       // All done with this source.
416       FinishedWithSource(source.get(), result);
417 
418       // If it's a directory, keep trying until a file is successfully opened.
419       // When there are no more files, ACCESS_RESULT_DOESNT_EXIST will be
420       // returned and the loop will exit above.
421     } while (result != ACCESS_RESULT_SUCCESS && !source->directory.empty());
422 
423     // If the set of known files is empty, clear the object so the next run
424     // will do a fresh scan of the directory.
425     if (source->found_files && source->found_files->empty())
426       source->found_files.reset();
427   }
428 
429   return samples_counts;
430 }
431 
432 // This method has all state information passed in |source| and is intended
433 // to run on a worker thread rather than the UI thread.
434 // static
CheckAndMapMetricSource(SourceInfo * source)435 FileMetricsProvider::AccessResult FileMetricsProvider::CheckAndMapMetricSource(
436     SourceInfo* source) {
437   // If source was read, clean up after it.
438   if (source->read_complete)
439     FinishedWithSource(source, ACCESS_RESULT_SUCCESS);
440   source->read_complete = false;
441   DCHECK(!source->allocator);
442 
443   // If the source is a directory, look for files within it.
444   if (!source->directory.empty() && !LocateNextFileInDirectory(source))
445     return ACCESS_RESULT_DOESNT_EXIST;
446 
447   // Do basic validation on the file metadata.
448   base::File::Info info;
449   if (!base::GetFileInfo(source->path, &info))
450     return ACCESS_RESULT_DOESNT_EXIST;
451 
452   if (info.is_directory || info.size == 0)
453     return ACCESS_RESULT_INVALID_FILE;
454 
455   if (source->last_seen >= info.last_modified)
456     return ACCESS_RESULT_NOT_MODIFIED;
457   if (source->max_age != base::TimeDelta() &&
458       base::Time::Now() - info.last_modified > source->max_age) {
459     return ACCESS_RESULT_TOO_OLD;
460   }
461 
462   // Non-directory files still need to be filtered.
463   if (source->directory.empty()) {
464     AccessResult result = HandleFilterSource(source, source->path);
465     if (result != ACCESS_RESULT_SUCCESS)
466       return result;
467   }
468 
469   // A new file of metrics has been found.
470   base::File file(source->path, kSourceOptions[source->type].file_open_flags);
471   if (!file.IsValid())
472     return ACCESS_RESULT_NO_OPEN;
473 
474   // Check that file is writable if that is expected. If a write is attempted
475   // on an unwritable memory-mapped file, a SIGBUS will cause a crash.
476   const bool read_only = kSourceOptions[source->type].is_read_only;
477   if (!read_only) {
478     constexpr int kTestSize = 16;
479     char header[kTestSize];
480     int amount = file.Read(0, header, kTestSize);
481     if (amount != kTestSize)
482       return ACCESS_RESULT_INVALID_CONTENTS;
483 
484     char zeros[kTestSize] = {0};
485     file.Write(0, zeros, kTestSize);
486     file.Flush();
487 
488     // A crash here would be unfortunate as the file would be left invalid
489     // and skipped/deleted by later attempts. This is unlikely, however, and
490     // the benefit of avoiding crashes from mapping as read/write a file that
491     // can't be written more than justifies the risk.
492 
493     char check[kTestSize];
494     amount = file.Read(0, check, kTestSize);
495     if (amount != kTestSize)
496       return ACCESS_RESULT_INVALID_CONTENTS;
497     if (memcmp(check, zeros, kTestSize) != 0)
498       return ACCESS_RESULT_NOT_WRITABLE;
499 
500     file.Write(0, header, kTestSize);
501     file.Flush();
502     amount = file.Read(0, check, kTestSize);
503     if (amount != kTestSize)
504       return ACCESS_RESULT_INVALID_CONTENTS;
505     if (memcmp(check, header, kTestSize) != 0)
506       return ACCESS_RESULT_NOT_WRITABLE;
507   }
508 
509   std::unique_ptr<base::MemoryMappedFile> mapped(new base::MemoryMappedFile());
510   if (!mapped->Initialize(std::move(file),
511                           kSourceOptions[source->type].memory_mapped_access)) {
512     return ACCESS_RESULT_SYSTEM_MAP_FAILURE;
513   }
514 
515   // Ensure any problems below don't occur repeatedly.
516   source->last_seen = info.last_modified;
517 
518   // Test the validity of the file contents.
519   if (!base::FilePersistentMemoryAllocator::IsFileAcceptable(*mapped,
520                                                              read_only)) {
521     return ACCESS_RESULT_INVALID_CONTENTS;
522   }
523 
524   // Map the file and validate it.
525   std::unique_ptr<base::FilePersistentMemoryAllocator> memory_allocator =
526       std::make_unique<base::FilePersistentMemoryAllocator>(
527           std::move(mapped), 0, 0, base::StringPiece(),
528           read_only ? base::FilePersistentMemoryAllocator::kReadOnly
529                     : base::FilePersistentMemoryAllocator::kReadWriteExisting);
530   if (memory_allocator->GetMemoryState() ==
531       base::PersistentMemoryAllocator::MEMORY_DELETED) {
532     return ACCESS_RESULT_MEMORY_DELETED;
533   }
534   if (memory_allocator->IsCorrupt())
535     return ACCESS_RESULT_DATA_CORRUPTION;
536 
537   // Cache the file data while running in a background thread so that there
538   // shouldn't be any I/O when the data is accessed from the main thread.
539   // Files with an internal profile, those from previous runs that include
540   // a full system profile and are fetched via ProvideIndependentMetrics(),
541   // are loaded on a background task and so there's no need to cache the
542   // data in advance.
543   if (source->association != ASSOCIATE_INTERNAL_PROFILE)
544     memory_allocator->Cache();
545 
546   // Create an allocator for the mapped file. Ownership passes to the allocator.
547   source->allocator = std::make_unique<base::PersistentHistogramAllocator>(
548       std::move(memory_allocator));
549   // Pass a custom RangesManager so that we do not register the BucketRanges
550   // with the global StatisticsRecorder when creating histogram objects using
551   // the allocator's underlying data. This avoids unnecessary contention on the
552   // global StatisticsRecorder lock.
553   // Note: Since RangesManager is not thread safe, this means that |allocator|
554   // must be iterated over one thread at a time (i.e., not concurrently). This
555   // is the case.
556   source->allocator->SetRangesManager(new base::RangesManager());
557 
558   // Check that an "independent" file has the necessary information present.
559   if (source->association == ASSOCIATE_INTERNAL_PROFILE &&
560       !PersistentSystemProfile::GetSystemProfile(
561           *source->allocator->memory_allocator(), nullptr)) {
562     return ACCESS_RESULT_NO_PROFILE;
563   }
564 
565   return ACCESS_RESULT_SUCCESS;
566 }
567 
568 // static
MergeHistogramDeltasFromSource(SourceInfo * source)569 size_t FileMetricsProvider::MergeHistogramDeltasFromSource(SourceInfo* source) {
570   DCHECK(source->allocator);
571   base::PersistentHistogramAllocator::Iterator histogram_iter(
572       source->allocator.get());
573 
574   const bool read_only = kSourceOptions[source->type].is_read_only;
575   size_t histogram_count = 0;
576   while (true) {
577     std::unique_ptr<base::HistogramBase> histogram = histogram_iter.GetNext();
578     if (!histogram)
579       break;
580 
581     if (read_only) {
582       source->allocator->MergeHistogramFinalDeltaToStatisticsRecorder(
583           histogram.get());
584     } else {
585       source->allocator->MergeHistogramDeltaToStatisticsRecorder(
586           histogram.get());
587     }
588     ++histogram_count;
589   }
590 
591   source->read_complete = true;
592   DVLOG(1) << "Reported " << histogram_count << " histograms from "
593            << source->path.value();
594   return histogram_count;
595 }
596 
597 // static
RecordHistogramSnapshotsFromSource(base::HistogramSnapshotManager * snapshot_manager,SourceInfo * source,base::HistogramBase::Flags required_flags)598 void FileMetricsProvider::RecordHistogramSnapshotsFromSource(
599     base::HistogramSnapshotManager* snapshot_manager,
600     SourceInfo* source,
601     base::HistogramBase::Flags required_flags) {
602   DCHECK_NE(SOURCE_HISTOGRAMS_ACTIVE_FILE, source->type);
603 
604   base::PersistentHistogramAllocator::Iterator histogram_iter(
605       source->allocator.get());
606 
607   int histogram_count = 0;
608   while (true) {
609     std::unique_ptr<base::HistogramBase> histogram = histogram_iter.GetNext();
610     if (!histogram)
611       break;
612     if (histogram->HasFlags(required_flags)) {
613       snapshot_manager->PrepareFinalDelta(histogram.get());
614       ++histogram_count;
615     }
616   }
617 
618   source->read_complete = true;
619   DVLOG(1) << "Reported " << histogram_count << " histograms from "
620            << source->path.value();
621 }
622 
HandleFilterSource(SourceInfo * source,const base::FilePath & path)623 FileMetricsProvider::AccessResult FileMetricsProvider::HandleFilterSource(
624     SourceInfo* source,
625     const base::FilePath& path) {
626   if (!source->filter)
627     return ACCESS_RESULT_SUCCESS;
628 
629   // Alternatively, pass a Params object to the filter like what was originally
630   // used to configure the source.
631   // Params params(path, source->type, source->association, source->prefs_key);
632   FilterAction action = source->filter.Run(path);
633   switch (action) {
634     case FILTER_PROCESS_FILE:
635       // Process the file.
636       return ACCESS_RESULT_SUCCESS;
637 
638     case FILTER_ACTIVE_THIS_PID:
639     // Even the file for the current process has to be touched or its stamp
640     // will be less than "last processed" and thus skipped on future runs,
641     // even those done by new instances of the browser if a pref key is
642     // provided so that the last-uploaded stamp is recorded.
643     case FILTER_TRY_LATER: {
644       // Touch the file with the current timestamp making it (presumably) the
645       // newest file in the directory.
646       base::Time now = base::Time::Now();
647       base::TouchFile(path, /*accessed=*/now, /*modified=*/now);
648       if (action == FILTER_ACTIVE_THIS_PID)
649         return ACCESS_RESULT_THIS_PID;
650       return ACCESS_RESULT_FILTER_TRY_LATER;
651     }
652 
653     case FILTER_SKIP_FILE:
654       switch (source->type) {
655         case SOURCE_HISTOGRAMS_ATOMIC_FILE:
656         case SOURCE_HISTOGRAMS_ATOMIC_DIR:
657           // Only "atomic" files are deleted (best-effort).
658           DeleteFileWhenPossible(path);
659           break;
660         case SOURCE_HISTOGRAMS_ACTIVE_FILE:
661           // File will presumably get modified elsewhere and thus tried again.
662           break;
663       }
664       return ACCESS_RESULT_FILTER_SKIP_FILE;
665   }
666 
667   // Code never gets here but some compilers don't realize that and so complain
668   // that "not all control paths return a value".
669   NOTREACHED();
670   return ACCESS_RESULT_SUCCESS;
671 }
672 
673 /* static */
ProvideIndependentMetricsOnTaskRunner(SourceInfo * source,ChromeUserMetricsExtension * uma_proto,base::HistogramSnapshotManager * snapshot_manager,base::OnceClosure serialize_log_callback)674 bool FileMetricsProvider::ProvideIndependentMetricsOnTaskRunner(
675     SourceInfo* source,
676     ChromeUserMetricsExtension* uma_proto,
677     base::HistogramSnapshotManager* snapshot_manager,
678     base::OnceClosure serialize_log_callback) {
679   // Include various crash keys about the file/allocator being read so that if
680   // there is ever a crash report being dumped while reading its contents, we
681   // have some info about its state.
682   // TODO(crbug.com/1432981): Clean this up.
683 
684   // Useful to know the metadata version of the source (e.g. to know if some
685   // fields like memory_state below are up to date).
686   SCOPED_CRASH_KEY_NUMBER("PMA", "version",
687                           source->allocator->memory_allocator()->version());
688   // Useful to know whether the source comes from a crashed session.
689   SCOPED_CRASH_KEY_NUMBER(
690       "PMA", "memory_state",
691       source->allocator->memory_allocator()->GetMemoryState());
692   // Useful to know the freeptr as it can help determine if the source comes
693   // from a session that crashed due to failing to allocate an object across
694   // different pages.
695   SCOPED_CRASH_KEY_NUMBER("PMA", "freeptr",
696                           source->allocator->memory_allocator()->freeptr());
697   SCOPED_CRASH_KEY_BOOL("PMA", "full",
698                         source->allocator->memory_allocator()->IsFull());
699   SCOPED_CRASH_KEY_BOOL("PMA", "corrupt",
700                         source->allocator->memory_allocator()->IsCorrupt());
701 
702   SystemProfileProto* system_profile_proto =
703       uma_proto->mutable_system_profile();
704 
705   if (PersistentSystemProfile::GetSystemProfile(
706           *source->allocator->memory_allocator(), system_profile_proto)) {
707     system_profile_proto->mutable_stability()->set_from_previous_run(true);
708     RecordHistogramSnapshotsFromSource(
709         snapshot_manager, source,
710         /*required_flags=*/base::HistogramBase::kUmaTargetedHistogramFlag);
711 
712     // NOTE: If you are adding anything here, consider also changing
713     // MetricsStateMetricsProvider::ProvidePreviousSessionData().
714 
715     // Use the client UUID stored in the system profile (if there is one) as the
716     // independent log's client ID. Usually, this has no effect, but there are
717     // scenarios where the log may have come from a session that had a different
718     // client ID than the one currently in use (e.g., client ID was reset due to
719     // being detected as a cloned install), so make sure to associate it with
720     // the proper one.
721     const std::string& client_uuid = system_profile_proto->client_uuid();
722     if (!client_uuid.empty()) {
723       uma_proto->set_client_id(MetricsLog::Hash(client_uuid));
724     }
725 
726     // Serialize the log while we are still in the background, instead of on the
727     // callback that runs on the main thread.
728     std::move(serialize_log_callback).Run();
729 
730     return true;
731   }
732 
733   return false;
734 }
735 
AppendToSamplesCountPref(std::vector<size_t> samples_counts)736 void FileMetricsProvider::AppendToSamplesCountPref(
737     std::vector<size_t> samples_counts) {
738   ScopedListPrefUpdate update(pref_service_,
739                               metrics::prefs::kMetricsFileMetricsMetadata);
740   for (size_t samples_count : samples_counts) {
741     update->Append(static_cast<int>(samples_count));
742   }
743 }
744 
745 // static
CollectFileMetadataFromSource(SourceInfo * source)746 size_t FileMetricsProvider::CollectFileMetadataFromSource(SourceInfo* source) {
747   base::HistogramBase::Count samples_count = 0;
748   base::PersistentHistogramAllocator::Iterator it{source->allocator.get()};
749   std::unique_ptr<base::HistogramBase> histogram;
750   while ((histogram = it.GetNext()) != nullptr) {
751     samples_count += histogram->SnapshotFinalDelta()->TotalCount();
752   }
753   source->read_complete = true;
754   return samples_count;
755 }
756 
ScheduleSourcesCheck()757 void FileMetricsProvider::ScheduleSourcesCheck() {
758   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
759 
760   if (sources_to_check_.empty())
761     return;
762 
763   // Create an independent list of sources for checking. This will be Owned()
764   // by the reply call given to the task-runner, to be deleted when that call
765   // has returned. It is also passed Unretained() to the task itself, safe
766   // because that must complete before the reply runs.
767   SourceInfoList* check_list = new SourceInfoList();
768   std::swap(sources_to_check_, *check_list);
769   base::ThreadPool::PostTaskAndReplyWithResult(
770       FROM_HERE,
771       {base::MayBlock(), base::TaskPriority::BEST_EFFORT,
772        // SKIP_ON_SHUTDOWN because the task must be run to completion once
773        // started. Since the task may merge metrics from files on disk, the task
774        // should be completed so that those files are deleted (to prevent
775        // re-merging them in another session, which would cause duplication).
776        base::TaskShutdownBehavior::SKIP_ON_SHUTDOWN},
777       base::BindOnce(
778           &FileMetricsProvider::CheckAndMergeMetricSourcesOnTaskRunner,
779           base::Unretained(check_list)),
780       base::BindOnce(&FileMetricsProvider::RecordSourcesChecked,
781                      weak_factory_.GetWeakPtr(), base::Owned(check_list)));
782 }
783 
RecordSourcesChecked(SourceInfoList * checked,std::vector<size_t> samples_counts)784 void FileMetricsProvider::RecordSourcesChecked(
785     SourceInfoList* checked,
786     std::vector<size_t> samples_counts) {
787   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
788 
789   AppendToSamplesCountPref(std::move(samples_counts));
790 
791   // Sources that still have an allocator at this point are read/write "active"
792   // files that may need their contents merged on-demand. If there is no
793   // allocator (not a read/write file) but a read was done on the task-runner,
794   // try again immediately to see if more is available (in a directory of
795   // files). Otherwise, remember the source for checking again at a later time.
796   bool did_read = false;
797   for (auto iter = checked->begin(); iter != checked->end();) {
798     auto temp = iter++;
799     SourceInfo* source = temp->get();
800     if (source->read_complete) {
801       RecordSourceAsRead(source);
802       did_read = true;
803     }
804     if (source->allocator) {
805       if (source->association == ASSOCIATE_INTERNAL_PROFILE) {
806         sources_with_profile_.splice(sources_with_profile_.end(), *checked,
807                                      temp);
808       } else {
809         sources_mapped_.splice(sources_mapped_.end(), *checked, temp);
810       }
811     } else {
812       sources_to_check_.splice(sources_to_check_.end(), *checked, temp);
813     }
814   }
815 
816   // If a read was done, schedule another one immediately. In the case of a
817   // directory of files, this ensures that all entries get processed. It's
818   // done here instead of as a loop in CheckAndMergeMetricSourcesOnTaskRunner
819   // so that (a) it gives the disk a rest and (b) testing of individual reads
820   // is possible.
821   if (did_read)
822     ScheduleSourcesCheck();
823 }
824 
DeleteFileAsync(const base::FilePath & path)825 void FileMetricsProvider::DeleteFileAsync(const base::FilePath& path) {
826   base::ThreadPool::PostTask(
827       FROM_HERE,
828       {base::MayBlock(), base::TaskPriority::BEST_EFFORT,
829        // CONTINUE_ON_SHUTDOWN because files that are scheduled to be deleted
830        // asynchronously are not guaranteed to be deleted this session anyway,
831        // so no need to block shutdown if the task has already started running.
832        // Further, for such files, there are different ways to ensure they won't
833        // be consumed again (i.e., prefs).
834        base::TaskShutdownBehavior::CONTINUE_ON_SHUTDOWN},
835       base::BindOnce(DeleteFileWhenPossible, path));
836 }
837 
RecordSourceAsRead(SourceInfo * source)838 void FileMetricsProvider::RecordSourceAsRead(SourceInfo* source) {
839   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
840 
841   // Persistently record the "last seen" timestamp of the source file to
842   // ensure that the file is never read again unless it is modified again.
843   if (pref_service_ && !source->prefs_key.empty()) {
844     pref_service_->SetTime(
845         metrics::prefs::kMetricsLastSeenPrefix + source->prefs_key,
846         source->last_seen);
847   }
848 }
849 
OnDidCreateMetricsLog()850 void FileMetricsProvider::OnDidCreateMetricsLog() {
851   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
852 
853   // Schedule a check to see if there are new metrics to load. If so, they will
854   // be reported during the next collection run after this one. The check is run
855   // off of a MayBlock() TaskRunner so as to not cause delays on the main UI
856   // thread (which is currently where metric collection is done).
857   ScheduleSourcesCheck();
858 
859   // Clear any data for initial metrics since they're always reported
860   // before the first call to this method. It couldn't be released after
861   // being reported in RecordInitialHistogramSnapshots because the data
862   // will continue to be used by the caller after that method returns. Once
863   // here, though, all actions to be done on the data have been completed.
864   for (const std::unique_ptr<SourceInfo>& source : sources_for_previous_run_)
865     DeleteFileAsync(source->path);
866   sources_for_previous_run_.clear();
867 }
868 
HasIndependentMetrics()869 bool FileMetricsProvider::HasIndependentMetrics() {
870   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
871   return !sources_with_profile_.empty() || SimulateIndependentMetrics();
872 }
873 
ProvideIndependentMetrics(base::OnceClosure serialize_log_callback,base::OnceCallback<void (bool)> done_callback,ChromeUserMetricsExtension * uma_proto,base::HistogramSnapshotManager * snapshot_manager)874 void FileMetricsProvider::ProvideIndependentMetrics(
875     base::OnceClosure serialize_log_callback,
876     base::OnceCallback<void(bool)> done_callback,
877     ChromeUserMetricsExtension* uma_proto,
878     base::HistogramSnapshotManager* snapshot_manager) {
879   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
880 
881   if (sources_with_profile_.empty()) {
882     std::move(done_callback).Run(false);
883     return;
884   }
885 
886   std::unique_ptr<SourceInfo> source =
887       std::move(*sources_with_profile_.begin());
888   sources_with_profile_.pop_front();
889   SourceInfo* source_ptr = source.get();
890   DCHECK(source->allocator);
891 
892   // Do the actual work as a background task.
893   base::ThreadPool::PostTaskAndReplyWithResult(
894       FROM_HERE,
895       {base::MayBlock(), base::TaskPriority::BEST_EFFORT,
896        // CONTINUE_ON_SHUTDOWN because the work done is only useful once the
897        // reply task is run (and there are no side effects). So, no need to
898        // block shutdown since the reply task won't be run anyway.
899        base::TaskShutdownBehavior::CONTINUE_ON_SHUTDOWN},
900       base::BindOnce(
901           &FileMetricsProvider::ProvideIndependentMetricsOnTaskRunner,
902           source_ptr, uma_proto, snapshot_manager,
903           std::move(serialize_log_callback)),
904       base::BindOnce(&FileMetricsProvider::ProvideIndependentMetricsCleanup,
905                      weak_factory_.GetWeakPtr(), std::move(done_callback),
906                      std::move(source)));
907 }
908 
ProvideIndependentMetricsCleanup(base::OnceCallback<void (bool)> done_callback,std::unique_ptr<SourceInfo> source,bool success)909 void FileMetricsProvider::ProvideIndependentMetricsCleanup(
910     base::OnceCallback<void(bool)> done_callback,
911     std::unique_ptr<SourceInfo> source,
912     bool success) {
913   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
914 
915   // Regardless of whether this source was successfully recorded, it is
916   // never read again.
917   source->read_complete = true;
918   RecordSourceAsRead(source.get());
919   sources_to_check_.push_back(std::move(source));
920   ScheduleSourcesCheck();
921 
922   std::move(done_callback).Run(success);
923 }
924 
HasPreviousSessionData()925 bool FileMetricsProvider::HasPreviousSessionData() {
926   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
927 
928   // Check all sources for previous run to see if they need to be read.
929   for (auto iter = sources_for_previous_run_.begin();
930        iter != sources_for_previous_run_.end();) {
931     auto temp = iter++;
932     SourceInfo* source = temp->get();
933 
934     // This would normally be done on a background I/O thread but there
935     // hasn't been a chance to run any at the time this method is called.
936     // Do the check in-line.
937     AccessResult result = CheckAndMapMetricSource(source);
938     UMA_HISTOGRAM_ENUMERATION("UMA.FileMetricsProvider.InitialAccessResult",
939                               result, ACCESS_RESULT_MAX);
940 
941     // If it couldn't be accessed, remove it from the list. There is only ever
942     // one chance to record it so no point keeping it around for later. Also
943     // mark it as having been read since uploading it with a future browser
944     // run would associate it with the then-previous run which would no longer
945     // be the run from which it came.
946     if (result != ACCESS_RESULT_SUCCESS) {
947       DCHECK(!source->allocator);
948       RecordSourceAsRead(source);
949       DeleteFileAsync(source->path);
950       sources_for_previous_run_.erase(temp);
951       continue;
952     }
953 
954     DCHECK(source->allocator);
955 
956     // If the source should be associated with an existing internal profile,
957     // move it to |sources_with_profile_| for later upload.
958     if (source->association == ASSOCIATE_INTERNAL_PROFILE_OR_PREVIOUS_RUN) {
959       if (PersistentSystemProfile::HasSystemProfile(
960               *source->allocator->memory_allocator())) {
961         sources_with_profile_.splice(sources_with_profile_.end(),
962                                      sources_for_previous_run_, temp);
963       }
964     }
965   }
966 
967   return !sources_for_previous_run_.empty();
968 }
969 
RecordInitialHistogramSnapshots(base::HistogramSnapshotManager * snapshot_manager)970 void FileMetricsProvider::RecordInitialHistogramSnapshots(
971     base::HistogramSnapshotManager* snapshot_manager) {
972   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
973 
974   for (const std::unique_ptr<SourceInfo>& source : sources_for_previous_run_) {
975     // The source needs to have an allocator attached to it in order to read
976     // histograms out of it.
977     DCHECK(!source->read_complete);
978     DCHECK(source->allocator);
979 
980     // Dump all stability histograms contained within the source to the
981     // snapshot-manager.
982     RecordHistogramSnapshotsFromSource(
983         snapshot_manager, source.get(),
984         /*required_flags=*/base::HistogramBase::kUmaStabilityHistogramFlag);
985 
986     // Update the last-seen time so it isn't read again unless it changes.
987     RecordSourceAsRead(source.get());
988   }
989 }
990 
MergeHistogramDeltas(bool async,base::OnceClosure done_callback)991 void FileMetricsProvider::MergeHistogramDeltas(
992     bool async,
993     base::OnceClosure done_callback) {
994   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
995   // TODO(crbug.com/1293026): Consider if this work can be done asynchronously.
996   for (std::unique_ptr<SourceInfo>& source : sources_mapped_) {
997     MergeHistogramDeltasFromSource(source.get());
998   }
999   std::move(done_callback).Run();
1000 }
1001 
SimulateIndependentMetrics()1002 bool FileMetricsProvider::SimulateIndependentMetrics() {
1003   if (!pref_service_->HasPrefPath(
1004           metrics::prefs::kMetricsFileMetricsMetadata)) {
1005     return false;
1006   }
1007 
1008   ScopedListPrefUpdate list_pref(pref_service_,
1009                                  metrics::prefs::kMetricsFileMetricsMetadata);
1010   base::Value::List& list_value = list_pref.Get();
1011   if (list_value.empty())
1012     return false;
1013 
1014   size_t count = pref_service_->GetInteger(
1015       metrics::prefs::kStabilityFileMetricsUnsentSamplesCount);
1016   pref_service_->SetInteger(
1017       metrics::prefs::kStabilityFileMetricsUnsentSamplesCount,
1018       list_value[0].GetInt() + count);
1019   pref_service_->SetInteger(
1020       metrics::prefs::kStabilityFileMetricsUnsentFilesCount,
1021       list_value.size() - 1);
1022   list_value.erase(list_value.begin());
1023 
1024   return true;
1025 }
1026 
1027 }  // namespace metrics
1028