1 // Copyright 2016 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/metrics/file_metrics_provider.h"
6
7 #include <stddef.h>
8
9 #include <memory>
10 #include <vector>
11
12 #include "base/command_line.h"
13 #include "base/containers/flat_map.h"
14 #include "base/debug/crash_logging.h"
15 #include "base/feature_list.h"
16 #include "base/files/file.h"
17 #include "base/files/file_enumerator.h"
18 #include "base/files/file_util.h"
19 #include "base/files/memory_mapped_file.h"
20 #include "base/functional/bind.h"
21 #include "base/logging.h"
22 #include "base/metrics/histogram_base.h"
23 #include "base/metrics/histogram_functions.h"
24 #include "base/metrics/histogram_macros.h"
25 #include "base/metrics/persistent_histogram_allocator.h"
26 #include "base/metrics/persistent_memory_allocator.h"
27 #include "base/metrics/ranges_manager.h"
28 #include "base/strings/string_piece.h"
29 #include "base/strings/stringprintf.h"
30 #include "base/task/task_traits.h"
31 #include "base/task/thread_pool.h"
32 #include "base/time/time.h"
33 #include "components/metrics/metrics_features.h"
34 #include "components/metrics/metrics_log.h"
35 #include "components/metrics/metrics_pref_names.h"
36 #include "components/metrics/metrics_service.h"
37 #include "components/metrics/persistent_histograms.h"
38 #include "components/metrics/persistent_system_profile.h"
39 #include "components/prefs/pref_registry_simple.h"
40 #include "components/prefs/pref_service.h"
41 #include "components/prefs/scoped_user_pref_update.h"
42
43 namespace metrics {
44 namespace {
45
46 // These structures provide values used to define how files are opened and
47 // accessed. It obviates the need for multiple code-paths within several of
48 // the methods.
49 struct SourceOptions {
50 // The flags to be used to open a file on disk.
51 int file_open_flags;
52
53 // The access mode to be used when mapping a file into memory.
54 base::MemoryMappedFile::Access memory_mapped_access;
55
56 // Indicates if the file is to be accessed read-only.
57 bool is_read_only;
58 };
59
60 // Opening a file typically requires at least these flags.
61 constexpr int STD_OPEN = base::File::FLAG_OPEN | base::File::FLAG_READ;
62
63 constexpr SourceOptions kSourceOptions[] = {
64 // SOURCE_HISTOGRAMS_ATOMIC_FILE
65 {
66 // Ensure that no other process reads this at the same time.
67 STD_OPEN | base::File::FLAG_WIN_EXCLUSIVE_READ,
68 base::MemoryMappedFile::READ_ONLY,
69 true,
70 },
71 // SOURCE_HISTOGRAMS_ATOMIC_DIR
72 {
73 // Ensure that no other process reads this at the same time.
74 STD_OPEN | base::File::FLAG_WIN_EXCLUSIVE_READ,
75 base::MemoryMappedFile::READ_ONLY,
76 true,
77 },
78 // SOURCE_HISTOGRAMS_ACTIVE_FILE
79 {
80 // Allow writing to the file. This is needed so we can keep track of
81 // deltas that have been uploaded (by modifying the file), while the
82 // file may still be open by an external process (e.g. Crashpad).
83 STD_OPEN | base::File::FLAG_WRITE,
84 base::MemoryMappedFile::READ_WRITE,
85 false,
86 },
87 };
88
DeleteFileWhenPossible(const base::FilePath & path)89 void DeleteFileWhenPossible(const base::FilePath& path) {
90 // Open (with delete) and then immediately close the file by going out of
91 // scope. This is the only cross-platform safe way to delete a file that may
92 // be open elsewhere, a distinct possibility given the asynchronous nature
93 // of the delete task.
94 base::File file(path, base::File::FLAG_OPEN | base::File::FLAG_READ |
95 base::File::FLAG_DELETE_ON_CLOSE);
96 }
97
98 } // namespace
99
100 // This structure stores all the information about the sources being monitored
101 // and their current reporting state.
102 struct FileMetricsProvider::SourceInfo {
SourceInfometrics::FileMetricsProvider::SourceInfo103 explicit SourceInfo(const Params& params)
104 : type(params.type),
105 association(params.association),
106 prefs_key(params.prefs_key),
107 filter(params.filter),
108 max_age(params.max_age),
109 max_dir_kib(params.max_dir_kib),
110 max_dir_files(params.max_dir_files) {
111 switch (type) {
112 case SOURCE_HISTOGRAMS_ACTIVE_FILE:
113 DCHECK(prefs_key.empty());
114 [[fallthrough]];
115 case SOURCE_HISTOGRAMS_ATOMIC_FILE:
116 path = params.path;
117 break;
118 case SOURCE_HISTOGRAMS_ATOMIC_DIR:
119 directory = params.path;
120 break;
121 }
122 }
123
124 SourceInfo(const SourceInfo&) = delete;
125 SourceInfo& operator=(const SourceInfo&) = delete;
126
~SourceInfometrics::FileMetricsProvider::SourceInfo127 ~SourceInfo() {}
128
129 struct FoundFile {
130 base::FilePath path;
131 base::FileEnumerator::FileInfo info;
132 };
133 using FoundFiles = base::flat_map<base::Time, FoundFile>;
134
135 // How to access this source (file/dir, atomic/active).
136 const SourceType type;
137
138 // With what run this source is associated.
139 const SourceAssociation association;
140
141 // Where on disk the directory is located. This will only be populated when
142 // a directory is being monitored.
143 base::FilePath directory;
144
145 // The files found in the above directory, ordered by last-modified.
146 std::unique_ptr<FoundFiles> found_files;
147
148 // Where on disk the file is located. If a directory is being monitored,
149 // this will be updated for whatever file is being read.
150 base::FilePath path;
151
152 // Name used inside prefs to persistent metadata.
153 std::string prefs_key;
154
155 // The filter callback for determining what to do with found files.
156 FilterCallback filter;
157
158 // The maximum allowed age of a file.
159 base::TimeDelta max_age;
160
161 // The maximum allowed bytes in a directory.
162 size_t max_dir_kib;
163
164 // The maximum allowed files in a directory.
165 size_t max_dir_files;
166
167 // The last-seen time of this source to detect change.
168 base::Time last_seen;
169
170 // Indicates if the data has been read out or not.
171 bool read_complete = false;
172
173 // Once a file has been recognized as needing to be read, it is mapped
174 // into memory and assigned to an |allocator| object.
175 std::unique_ptr<base::PersistentHistogramAllocator> allocator;
176 };
177
Params(const base::FilePath & path,SourceType type,SourceAssociation association,base::StringPiece prefs_key)178 FileMetricsProvider::Params::Params(const base::FilePath& path,
179 SourceType type,
180 SourceAssociation association,
181 base::StringPiece prefs_key)
182 : path(path), type(type), association(association), prefs_key(prefs_key) {}
183
184 FileMetricsProvider::Params::~Params() = default;
185
FileMetricsProvider(PrefService * local_state)186 FileMetricsProvider::FileMetricsProvider(PrefService* local_state)
187 : pref_service_(local_state) {
188 base::StatisticsRecorder::RegisterHistogramProvider(
189 weak_factory_.GetWeakPtr());
190 }
191
192 FileMetricsProvider::~FileMetricsProvider() = default;
193
RegisterSource(const Params & params)194 void FileMetricsProvider::RegisterSource(const Params& params) {
195 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
196
197 // Ensure that kSourceOptions has been filled for this type.
198 DCHECK_GT(std::size(kSourceOptions), static_cast<size_t>(params.type));
199
200 std::unique_ptr<SourceInfo> source(new SourceInfo(params));
201
202 // |prefs_key| may be empty if the caller does not wish to persist the
203 // state across instances of the program.
204 if (pref_service_ && !params.prefs_key.empty()) {
205 source->last_seen = pref_service_->GetTime(
206 metrics::prefs::kMetricsLastSeenPrefix + source->prefs_key);
207 }
208
209 switch (params.association) {
210 case ASSOCIATE_CURRENT_RUN:
211 case ASSOCIATE_INTERNAL_PROFILE:
212 case ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER:
213 sources_to_check_.push_back(std::move(source));
214 break;
215 case ASSOCIATE_PREVIOUS_RUN:
216 case ASSOCIATE_INTERNAL_PROFILE_OR_PREVIOUS_RUN:
217 DCHECK_EQ(SOURCE_HISTOGRAMS_ATOMIC_FILE, source->type);
218 sources_for_previous_run_.push_back(std::move(source));
219 break;
220 }
221 }
222
223 // static
RegisterSourcePrefs(PrefRegistrySimple * prefs,const base::StringPiece prefs_key)224 void FileMetricsProvider::RegisterSourcePrefs(
225 PrefRegistrySimple* prefs,
226 const base::StringPiece prefs_key) {
227 prefs->RegisterInt64Pref(
228 metrics::prefs::kMetricsLastSeenPrefix + std::string(prefs_key), 0);
229 }
230
231 // static
RegisterPrefs(PrefRegistrySimple * prefs)232 void FileMetricsProvider::RegisterPrefs(PrefRegistrySimple* prefs) {
233 prefs->RegisterListPref(metrics::prefs::kMetricsFileMetricsMetadata);
234 }
235
236 // static
RecordAccessResult(AccessResult result)237 void FileMetricsProvider::RecordAccessResult(AccessResult result) {
238 UMA_HISTOGRAM_ENUMERATION("UMA.FileMetricsProvider.AccessResult", result,
239 ACCESS_RESULT_MAX);
240 }
241
242 // static
LocateNextFileInDirectory(SourceInfo * source)243 bool FileMetricsProvider::LocateNextFileInDirectory(SourceInfo* source) {
244 DCHECK_EQ(SOURCE_HISTOGRAMS_ATOMIC_DIR, source->type);
245 DCHECK(!source->directory.empty());
246
247 // Cumulative directory stats. These will remain zero if the directory isn't
248 // scanned but that's okay since any work they would cause to be done below
249 // would have been done during the first call where the directory was fully
250 // scanned.
251 size_t total_size_kib = 0; // Using KiB allows 4TiB even on 32-bit builds.
252 size_t file_count = 0;
253
254 base::Time now_time = base::Time::Now();
255 if (!source->found_files) {
256 source->found_files = std::make_unique<SourceInfo::FoundFiles>();
257 base::FileEnumerator file_iter(source->directory, /*recursive=*/false,
258 base::FileEnumerator::FILES);
259 SourceInfo::FoundFile found_file;
260
261 // Open the directory and find all the files, remembering the last-modified
262 // time of each.
263 for (found_file.path = file_iter.Next(); !found_file.path.empty();
264 found_file.path = file_iter.Next()) {
265 found_file.info = file_iter.GetInfo();
266
267 // Ignore directories.
268 if (found_file.info.IsDirectory())
269 continue;
270
271 // Ignore temporary files.
272 base::FilePath::CharType first_character =
273 found_file.path.BaseName().value().front();
274 if (first_character == FILE_PATH_LITERAL('.') ||
275 first_character == FILE_PATH_LITERAL('_')) {
276 continue;
277 }
278
279 // Ignore non-PMA (Persistent Memory Allocator) files.
280 if (found_file.path.Extension() !=
281 base::PersistentMemoryAllocator::kFileExtension) {
282 continue;
283 }
284
285 // Process real files.
286 total_size_kib += found_file.info.GetSize() >> 10;
287 base::Time modified = found_file.info.GetLastModifiedTime();
288 if (modified > source->last_seen) {
289 // This file hasn't been read. Remember it (unless from the future).
290 if (modified <= now_time)
291 source->found_files->emplace(modified, std::move(found_file));
292 ++file_count;
293 } else {
294 // This file has been read. Try to delete it. Ignore any errors because
295 // the file may be un-removeable by this process. It could, for example,
296 // have been created by a privileged process like setup.exe. Even if it
297 // is not removed, it will continue to be ignored bacuse of the older
298 // modification time.
299 base::DeleteFile(found_file.path);
300 }
301 }
302 }
303
304 // Filter files from the front until one is found for processing.
305 bool have_file = false;
306 while (!source->found_files->empty()) {
307 SourceInfo::FoundFile found =
308 std::move(source->found_files->begin()->second);
309 source->found_files->erase(source->found_files->begin());
310
311 bool too_many =
312 source->max_dir_files > 0 && file_count > source->max_dir_files;
313 bool too_big =
314 source->max_dir_kib > 0 && total_size_kib > source->max_dir_kib;
315 bool too_old =
316 source->max_age != base::TimeDelta() &&
317 now_time - found.info.GetLastModifiedTime() > source->max_age;
318 if (too_many || too_big || too_old) {
319 base::DeleteFile(found.path);
320 --file_count;
321 total_size_kib -= found.info.GetSize() >> 10;
322 RecordAccessResult(too_many ? ACCESS_RESULT_TOO_MANY_FILES
323 : too_big ? ACCESS_RESULT_TOO_MANY_BYTES
324 : ACCESS_RESULT_TOO_OLD);
325 continue;
326 }
327
328 AccessResult result = HandleFilterSource(source, found.path);
329 if (result == ACCESS_RESULT_SUCCESS) {
330 source->path = std::move(found.path);
331 have_file = true;
332 break;
333 }
334
335 // Record the result. Success will be recorded by the caller.
336 if (result != ACCESS_RESULT_THIS_PID)
337 RecordAccessResult(result);
338 }
339
340 return have_file;
341 }
342
343 // static
FinishedWithSource(SourceInfo * source,AccessResult result)344 void FileMetricsProvider::FinishedWithSource(SourceInfo* source,
345 AccessResult result) {
346 // Different source types require different post-processing.
347 switch (source->type) {
348 case SOURCE_HISTOGRAMS_ATOMIC_FILE:
349 case SOURCE_HISTOGRAMS_ATOMIC_DIR:
350 // Done with this file so delete the allocator and its owned file.
351 source->allocator.reset();
352 // Remove the file if has been recorded. This prevents them from
353 // accumulating or also being recorded by different instances of
354 // the browser.
355 if (result == ACCESS_RESULT_SUCCESS ||
356 result == ACCESS_RESULT_NOT_MODIFIED ||
357 result == ACCESS_RESULT_MEMORY_DELETED ||
358 result == ACCESS_RESULT_TOO_OLD) {
359 DeleteFileWhenPossible(source->path);
360 }
361 break;
362 case SOURCE_HISTOGRAMS_ACTIVE_FILE:
363 // Keep the allocator open so it doesn't have to be re-mapped each
364 // time. This also allows the contents to be merged on-demand.
365 break;
366 }
367 }
368
369 // static
CheckAndMergeMetricSourcesOnTaskRunner(SourceInfoList * sources)370 std::vector<size_t> FileMetricsProvider::CheckAndMergeMetricSourcesOnTaskRunner(
371 SourceInfoList* sources) {
372 // This method has all state information passed in |sources| and is intended
373 // to run on a worker thread rather than the UI thread.
374 std::vector<size_t> samples_counts;
375
376 for (std::unique_ptr<SourceInfo>& source : *sources) {
377 AccessResult result;
378 do {
379 result = CheckAndMapMetricSource(source.get());
380
381 // Some results are not reported in order to keep the dashboard clean.
382 if (result != ACCESS_RESULT_DOESNT_EXIST &&
383 result != ACCESS_RESULT_NOT_MODIFIED &&
384 result != ACCESS_RESULT_THIS_PID) {
385 RecordAccessResult(result);
386 }
387
388 // If there are no files (or no more files) in this source, stop now.
389 if (result == ACCESS_RESULT_DOESNT_EXIST)
390 break;
391
392 // Mapping was successful. Merge it.
393 if (result == ACCESS_RESULT_SUCCESS) {
394 // Metrics associated with internal profiles have to be fetched directly
395 // so just keep the mapping for use by the main thread.
396 if (source->association == ASSOCIATE_INTERNAL_PROFILE)
397 break;
398
399 if (source->association == ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER) {
400 samples_counts.push_back(CollectFileMetadataFromSource(source.get()));
401 } else {
402 size_t histograms_count =
403 MergeHistogramDeltasFromSource(source.get());
404 if (!source->prefs_key.empty()) {
405 base::UmaHistogramCounts1000(
406 base::StringPrintf(
407 "UMA.FileMetricsProvider.%s.MergedHistogramsCount",
408 source->prefs_key.c_str()),
409 histograms_count);
410 }
411 }
412 DCHECK(source->read_complete);
413 }
414
415 // All done with this source.
416 FinishedWithSource(source.get(), result);
417
418 // If it's a directory, keep trying until a file is successfully opened.
419 // When there are no more files, ACCESS_RESULT_DOESNT_EXIST will be
420 // returned and the loop will exit above.
421 } while (result != ACCESS_RESULT_SUCCESS && !source->directory.empty());
422
423 // If the set of known files is empty, clear the object so the next run
424 // will do a fresh scan of the directory.
425 if (source->found_files && source->found_files->empty())
426 source->found_files.reset();
427 }
428
429 return samples_counts;
430 }
431
432 // This method has all state information passed in |source| and is intended
433 // to run on a worker thread rather than the UI thread.
434 // static
CheckAndMapMetricSource(SourceInfo * source)435 FileMetricsProvider::AccessResult FileMetricsProvider::CheckAndMapMetricSource(
436 SourceInfo* source) {
437 // If source was read, clean up after it.
438 if (source->read_complete)
439 FinishedWithSource(source, ACCESS_RESULT_SUCCESS);
440 source->read_complete = false;
441 DCHECK(!source->allocator);
442
443 // If the source is a directory, look for files within it.
444 if (!source->directory.empty() && !LocateNextFileInDirectory(source))
445 return ACCESS_RESULT_DOESNT_EXIST;
446
447 // Do basic validation on the file metadata.
448 base::File::Info info;
449 if (!base::GetFileInfo(source->path, &info))
450 return ACCESS_RESULT_DOESNT_EXIST;
451
452 if (info.is_directory || info.size == 0)
453 return ACCESS_RESULT_INVALID_FILE;
454
455 if (source->last_seen >= info.last_modified)
456 return ACCESS_RESULT_NOT_MODIFIED;
457 if (source->max_age != base::TimeDelta() &&
458 base::Time::Now() - info.last_modified > source->max_age) {
459 return ACCESS_RESULT_TOO_OLD;
460 }
461
462 // Non-directory files still need to be filtered.
463 if (source->directory.empty()) {
464 AccessResult result = HandleFilterSource(source, source->path);
465 if (result != ACCESS_RESULT_SUCCESS)
466 return result;
467 }
468
469 // A new file of metrics has been found.
470 base::File file(source->path, kSourceOptions[source->type].file_open_flags);
471 if (!file.IsValid())
472 return ACCESS_RESULT_NO_OPEN;
473
474 // Check that file is writable if that is expected. If a write is attempted
475 // on an unwritable memory-mapped file, a SIGBUS will cause a crash.
476 const bool read_only = kSourceOptions[source->type].is_read_only;
477 if (!read_only) {
478 constexpr int kTestSize = 16;
479 char header[kTestSize];
480 int amount = file.Read(0, header, kTestSize);
481 if (amount != kTestSize)
482 return ACCESS_RESULT_INVALID_CONTENTS;
483
484 char zeros[kTestSize] = {0};
485 file.Write(0, zeros, kTestSize);
486 file.Flush();
487
488 // A crash here would be unfortunate as the file would be left invalid
489 // and skipped/deleted by later attempts. This is unlikely, however, and
490 // the benefit of avoiding crashes from mapping as read/write a file that
491 // can't be written more than justifies the risk.
492
493 char check[kTestSize];
494 amount = file.Read(0, check, kTestSize);
495 if (amount != kTestSize)
496 return ACCESS_RESULT_INVALID_CONTENTS;
497 if (memcmp(check, zeros, kTestSize) != 0)
498 return ACCESS_RESULT_NOT_WRITABLE;
499
500 file.Write(0, header, kTestSize);
501 file.Flush();
502 amount = file.Read(0, check, kTestSize);
503 if (amount != kTestSize)
504 return ACCESS_RESULT_INVALID_CONTENTS;
505 if (memcmp(check, header, kTestSize) != 0)
506 return ACCESS_RESULT_NOT_WRITABLE;
507 }
508
509 std::unique_ptr<base::MemoryMappedFile> mapped(new base::MemoryMappedFile());
510 if (!mapped->Initialize(std::move(file),
511 kSourceOptions[source->type].memory_mapped_access)) {
512 return ACCESS_RESULT_SYSTEM_MAP_FAILURE;
513 }
514
515 // Ensure any problems below don't occur repeatedly.
516 source->last_seen = info.last_modified;
517
518 // Test the validity of the file contents.
519 if (!base::FilePersistentMemoryAllocator::IsFileAcceptable(*mapped,
520 read_only)) {
521 return ACCESS_RESULT_INVALID_CONTENTS;
522 }
523
524 // Map the file and validate it.
525 std::unique_ptr<base::FilePersistentMemoryAllocator> memory_allocator =
526 std::make_unique<base::FilePersistentMemoryAllocator>(
527 std::move(mapped), 0, 0, base::StringPiece(),
528 read_only ? base::FilePersistentMemoryAllocator::kReadOnly
529 : base::FilePersistentMemoryAllocator::kReadWriteExisting);
530 if (memory_allocator->GetMemoryState() ==
531 base::PersistentMemoryAllocator::MEMORY_DELETED) {
532 return ACCESS_RESULT_MEMORY_DELETED;
533 }
534 if (memory_allocator->IsCorrupt())
535 return ACCESS_RESULT_DATA_CORRUPTION;
536
537 // Cache the file data while running in a background thread so that there
538 // shouldn't be any I/O when the data is accessed from the main thread.
539 // Files with an internal profile, those from previous runs that include
540 // a full system profile and are fetched via ProvideIndependentMetrics(),
541 // are loaded on a background task and so there's no need to cache the
542 // data in advance.
543 if (source->association != ASSOCIATE_INTERNAL_PROFILE)
544 memory_allocator->Cache();
545
546 // Create an allocator for the mapped file. Ownership passes to the allocator.
547 source->allocator = std::make_unique<base::PersistentHistogramAllocator>(
548 std::move(memory_allocator));
549 // Pass a custom RangesManager so that we do not register the BucketRanges
550 // with the global StatisticsRecorder when creating histogram objects using
551 // the allocator's underlying data. This avoids unnecessary contention on the
552 // global StatisticsRecorder lock.
553 // Note: Since RangesManager is not thread safe, this means that |allocator|
554 // must be iterated over one thread at a time (i.e., not concurrently). This
555 // is the case.
556 source->allocator->SetRangesManager(new base::RangesManager());
557
558 // Check that an "independent" file has the necessary information present.
559 if (source->association == ASSOCIATE_INTERNAL_PROFILE &&
560 !PersistentSystemProfile::GetSystemProfile(
561 *source->allocator->memory_allocator(), nullptr)) {
562 return ACCESS_RESULT_NO_PROFILE;
563 }
564
565 return ACCESS_RESULT_SUCCESS;
566 }
567
568 // static
MergeHistogramDeltasFromSource(SourceInfo * source)569 size_t FileMetricsProvider::MergeHistogramDeltasFromSource(SourceInfo* source) {
570 DCHECK(source->allocator);
571 base::PersistentHistogramAllocator::Iterator histogram_iter(
572 source->allocator.get());
573
574 const bool read_only = kSourceOptions[source->type].is_read_only;
575 size_t histogram_count = 0;
576 while (true) {
577 std::unique_ptr<base::HistogramBase> histogram = histogram_iter.GetNext();
578 if (!histogram)
579 break;
580
581 if (read_only) {
582 source->allocator->MergeHistogramFinalDeltaToStatisticsRecorder(
583 histogram.get());
584 } else {
585 source->allocator->MergeHistogramDeltaToStatisticsRecorder(
586 histogram.get());
587 }
588 ++histogram_count;
589 }
590
591 source->read_complete = true;
592 DVLOG(1) << "Reported " << histogram_count << " histograms from "
593 << source->path.value();
594 return histogram_count;
595 }
596
597 // static
RecordHistogramSnapshotsFromSource(base::HistogramSnapshotManager * snapshot_manager,SourceInfo * source,base::HistogramBase::Flags required_flags)598 void FileMetricsProvider::RecordHistogramSnapshotsFromSource(
599 base::HistogramSnapshotManager* snapshot_manager,
600 SourceInfo* source,
601 base::HistogramBase::Flags required_flags) {
602 DCHECK_NE(SOURCE_HISTOGRAMS_ACTIVE_FILE, source->type);
603
604 base::PersistentHistogramAllocator::Iterator histogram_iter(
605 source->allocator.get());
606
607 int histogram_count = 0;
608 while (true) {
609 std::unique_ptr<base::HistogramBase> histogram = histogram_iter.GetNext();
610 if (!histogram)
611 break;
612 if (histogram->HasFlags(required_flags)) {
613 snapshot_manager->PrepareFinalDelta(histogram.get());
614 ++histogram_count;
615 }
616 }
617
618 source->read_complete = true;
619 DVLOG(1) << "Reported " << histogram_count << " histograms from "
620 << source->path.value();
621 }
622
HandleFilterSource(SourceInfo * source,const base::FilePath & path)623 FileMetricsProvider::AccessResult FileMetricsProvider::HandleFilterSource(
624 SourceInfo* source,
625 const base::FilePath& path) {
626 if (!source->filter)
627 return ACCESS_RESULT_SUCCESS;
628
629 // Alternatively, pass a Params object to the filter like what was originally
630 // used to configure the source.
631 // Params params(path, source->type, source->association, source->prefs_key);
632 FilterAction action = source->filter.Run(path);
633 switch (action) {
634 case FILTER_PROCESS_FILE:
635 // Process the file.
636 return ACCESS_RESULT_SUCCESS;
637
638 case FILTER_ACTIVE_THIS_PID:
639 // Even the file for the current process has to be touched or its stamp
640 // will be less than "last processed" and thus skipped on future runs,
641 // even those done by new instances of the browser if a pref key is
642 // provided so that the last-uploaded stamp is recorded.
643 case FILTER_TRY_LATER: {
644 // Touch the file with the current timestamp making it (presumably) the
645 // newest file in the directory.
646 base::Time now = base::Time::Now();
647 base::TouchFile(path, /*accessed=*/now, /*modified=*/now);
648 if (action == FILTER_ACTIVE_THIS_PID)
649 return ACCESS_RESULT_THIS_PID;
650 return ACCESS_RESULT_FILTER_TRY_LATER;
651 }
652
653 case FILTER_SKIP_FILE:
654 switch (source->type) {
655 case SOURCE_HISTOGRAMS_ATOMIC_FILE:
656 case SOURCE_HISTOGRAMS_ATOMIC_DIR:
657 // Only "atomic" files are deleted (best-effort).
658 DeleteFileWhenPossible(path);
659 break;
660 case SOURCE_HISTOGRAMS_ACTIVE_FILE:
661 // File will presumably get modified elsewhere and thus tried again.
662 break;
663 }
664 return ACCESS_RESULT_FILTER_SKIP_FILE;
665 }
666
667 // Code never gets here but some compilers don't realize that and so complain
668 // that "not all control paths return a value".
669 NOTREACHED();
670 return ACCESS_RESULT_SUCCESS;
671 }
672
673 /* static */
ProvideIndependentMetricsOnTaskRunner(SourceInfo * source,ChromeUserMetricsExtension * uma_proto,base::HistogramSnapshotManager * snapshot_manager,base::OnceClosure serialize_log_callback)674 bool FileMetricsProvider::ProvideIndependentMetricsOnTaskRunner(
675 SourceInfo* source,
676 ChromeUserMetricsExtension* uma_proto,
677 base::HistogramSnapshotManager* snapshot_manager,
678 base::OnceClosure serialize_log_callback) {
679 // Include various crash keys about the file/allocator being read so that if
680 // there is ever a crash report being dumped while reading its contents, we
681 // have some info about its state.
682 // TODO(crbug.com/1432981): Clean this up.
683
684 // Useful to know the metadata version of the source (e.g. to know if some
685 // fields like memory_state below are up to date).
686 SCOPED_CRASH_KEY_NUMBER("PMA", "version",
687 source->allocator->memory_allocator()->version());
688 // Useful to know whether the source comes from a crashed session.
689 SCOPED_CRASH_KEY_NUMBER(
690 "PMA", "memory_state",
691 source->allocator->memory_allocator()->GetMemoryState());
692 // Useful to know the freeptr as it can help determine if the source comes
693 // from a session that crashed due to failing to allocate an object across
694 // different pages.
695 SCOPED_CRASH_KEY_NUMBER("PMA", "freeptr",
696 source->allocator->memory_allocator()->freeptr());
697 SCOPED_CRASH_KEY_BOOL("PMA", "full",
698 source->allocator->memory_allocator()->IsFull());
699 SCOPED_CRASH_KEY_BOOL("PMA", "corrupt",
700 source->allocator->memory_allocator()->IsCorrupt());
701
702 SystemProfileProto* system_profile_proto =
703 uma_proto->mutable_system_profile();
704
705 if (PersistentSystemProfile::GetSystemProfile(
706 *source->allocator->memory_allocator(), system_profile_proto)) {
707 system_profile_proto->mutable_stability()->set_from_previous_run(true);
708 RecordHistogramSnapshotsFromSource(
709 snapshot_manager, source,
710 /*required_flags=*/base::HistogramBase::kUmaTargetedHistogramFlag);
711
712 // NOTE: If you are adding anything here, consider also changing
713 // MetricsStateMetricsProvider::ProvidePreviousSessionData().
714
715 // Use the client UUID stored in the system profile (if there is one) as the
716 // independent log's client ID. Usually, this has no effect, but there are
717 // scenarios where the log may have come from a session that had a different
718 // client ID than the one currently in use (e.g., client ID was reset due to
719 // being detected as a cloned install), so make sure to associate it with
720 // the proper one.
721 const std::string& client_uuid = system_profile_proto->client_uuid();
722 if (!client_uuid.empty()) {
723 uma_proto->set_client_id(MetricsLog::Hash(client_uuid));
724 }
725
726 // Serialize the log while we are still in the background, instead of on the
727 // callback that runs on the main thread.
728 std::move(serialize_log_callback).Run();
729
730 return true;
731 }
732
733 return false;
734 }
735
AppendToSamplesCountPref(std::vector<size_t> samples_counts)736 void FileMetricsProvider::AppendToSamplesCountPref(
737 std::vector<size_t> samples_counts) {
738 ScopedListPrefUpdate update(pref_service_,
739 metrics::prefs::kMetricsFileMetricsMetadata);
740 for (size_t samples_count : samples_counts) {
741 update->Append(static_cast<int>(samples_count));
742 }
743 }
744
745 // static
CollectFileMetadataFromSource(SourceInfo * source)746 size_t FileMetricsProvider::CollectFileMetadataFromSource(SourceInfo* source) {
747 base::HistogramBase::Count samples_count = 0;
748 base::PersistentHistogramAllocator::Iterator it{source->allocator.get()};
749 std::unique_ptr<base::HistogramBase> histogram;
750 while ((histogram = it.GetNext()) != nullptr) {
751 samples_count += histogram->SnapshotFinalDelta()->TotalCount();
752 }
753 source->read_complete = true;
754 return samples_count;
755 }
756
ScheduleSourcesCheck()757 void FileMetricsProvider::ScheduleSourcesCheck() {
758 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
759
760 if (sources_to_check_.empty())
761 return;
762
763 // Create an independent list of sources for checking. This will be Owned()
764 // by the reply call given to the task-runner, to be deleted when that call
765 // has returned. It is also passed Unretained() to the task itself, safe
766 // because that must complete before the reply runs.
767 SourceInfoList* check_list = new SourceInfoList();
768 std::swap(sources_to_check_, *check_list);
769 base::ThreadPool::PostTaskAndReplyWithResult(
770 FROM_HERE,
771 {base::MayBlock(), base::TaskPriority::BEST_EFFORT,
772 // SKIP_ON_SHUTDOWN because the task must be run to completion once
773 // started. Since the task may merge metrics from files on disk, the task
774 // should be completed so that those files are deleted (to prevent
775 // re-merging them in another session, which would cause duplication).
776 base::TaskShutdownBehavior::SKIP_ON_SHUTDOWN},
777 base::BindOnce(
778 &FileMetricsProvider::CheckAndMergeMetricSourcesOnTaskRunner,
779 base::Unretained(check_list)),
780 base::BindOnce(&FileMetricsProvider::RecordSourcesChecked,
781 weak_factory_.GetWeakPtr(), base::Owned(check_list)));
782 }
783
RecordSourcesChecked(SourceInfoList * checked,std::vector<size_t> samples_counts)784 void FileMetricsProvider::RecordSourcesChecked(
785 SourceInfoList* checked,
786 std::vector<size_t> samples_counts) {
787 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
788
789 AppendToSamplesCountPref(std::move(samples_counts));
790
791 // Sources that still have an allocator at this point are read/write "active"
792 // files that may need their contents merged on-demand. If there is no
793 // allocator (not a read/write file) but a read was done on the task-runner,
794 // try again immediately to see if more is available (in a directory of
795 // files). Otherwise, remember the source for checking again at a later time.
796 bool did_read = false;
797 for (auto iter = checked->begin(); iter != checked->end();) {
798 auto temp = iter++;
799 SourceInfo* source = temp->get();
800 if (source->read_complete) {
801 RecordSourceAsRead(source);
802 did_read = true;
803 }
804 if (source->allocator) {
805 if (source->association == ASSOCIATE_INTERNAL_PROFILE) {
806 sources_with_profile_.splice(sources_with_profile_.end(), *checked,
807 temp);
808 } else {
809 sources_mapped_.splice(sources_mapped_.end(), *checked, temp);
810 }
811 } else {
812 sources_to_check_.splice(sources_to_check_.end(), *checked, temp);
813 }
814 }
815
816 // If a read was done, schedule another one immediately. In the case of a
817 // directory of files, this ensures that all entries get processed. It's
818 // done here instead of as a loop in CheckAndMergeMetricSourcesOnTaskRunner
819 // so that (a) it gives the disk a rest and (b) testing of individual reads
820 // is possible.
821 if (did_read)
822 ScheduleSourcesCheck();
823 }
824
DeleteFileAsync(const base::FilePath & path)825 void FileMetricsProvider::DeleteFileAsync(const base::FilePath& path) {
826 base::ThreadPool::PostTask(
827 FROM_HERE,
828 {base::MayBlock(), base::TaskPriority::BEST_EFFORT,
829 // CONTINUE_ON_SHUTDOWN because files that are scheduled to be deleted
830 // asynchronously are not guaranteed to be deleted this session anyway,
831 // so no need to block shutdown if the task has already started running.
832 // Further, for such files, there are different ways to ensure they won't
833 // be consumed again (i.e., prefs).
834 base::TaskShutdownBehavior::CONTINUE_ON_SHUTDOWN},
835 base::BindOnce(DeleteFileWhenPossible, path));
836 }
837
RecordSourceAsRead(SourceInfo * source)838 void FileMetricsProvider::RecordSourceAsRead(SourceInfo* source) {
839 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
840
841 // Persistently record the "last seen" timestamp of the source file to
842 // ensure that the file is never read again unless it is modified again.
843 if (pref_service_ && !source->prefs_key.empty()) {
844 pref_service_->SetTime(
845 metrics::prefs::kMetricsLastSeenPrefix + source->prefs_key,
846 source->last_seen);
847 }
848 }
849
OnDidCreateMetricsLog()850 void FileMetricsProvider::OnDidCreateMetricsLog() {
851 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
852
853 // Schedule a check to see if there are new metrics to load. If so, they will
854 // be reported during the next collection run after this one. The check is run
855 // off of a MayBlock() TaskRunner so as to not cause delays on the main UI
856 // thread (which is currently where metric collection is done).
857 ScheduleSourcesCheck();
858
859 // Clear any data for initial metrics since they're always reported
860 // before the first call to this method. It couldn't be released after
861 // being reported in RecordInitialHistogramSnapshots because the data
862 // will continue to be used by the caller after that method returns. Once
863 // here, though, all actions to be done on the data have been completed.
864 for (const std::unique_ptr<SourceInfo>& source : sources_for_previous_run_)
865 DeleteFileAsync(source->path);
866 sources_for_previous_run_.clear();
867 }
868
HasIndependentMetrics()869 bool FileMetricsProvider::HasIndependentMetrics() {
870 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
871 return !sources_with_profile_.empty() || SimulateIndependentMetrics();
872 }
873
ProvideIndependentMetrics(base::OnceClosure serialize_log_callback,base::OnceCallback<void (bool)> done_callback,ChromeUserMetricsExtension * uma_proto,base::HistogramSnapshotManager * snapshot_manager)874 void FileMetricsProvider::ProvideIndependentMetrics(
875 base::OnceClosure serialize_log_callback,
876 base::OnceCallback<void(bool)> done_callback,
877 ChromeUserMetricsExtension* uma_proto,
878 base::HistogramSnapshotManager* snapshot_manager) {
879 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
880
881 if (sources_with_profile_.empty()) {
882 std::move(done_callback).Run(false);
883 return;
884 }
885
886 std::unique_ptr<SourceInfo> source =
887 std::move(*sources_with_profile_.begin());
888 sources_with_profile_.pop_front();
889 SourceInfo* source_ptr = source.get();
890 DCHECK(source->allocator);
891
892 // Do the actual work as a background task.
893 base::ThreadPool::PostTaskAndReplyWithResult(
894 FROM_HERE,
895 {base::MayBlock(), base::TaskPriority::BEST_EFFORT,
896 // CONTINUE_ON_SHUTDOWN because the work done is only useful once the
897 // reply task is run (and there are no side effects). So, no need to
898 // block shutdown since the reply task won't be run anyway.
899 base::TaskShutdownBehavior::CONTINUE_ON_SHUTDOWN},
900 base::BindOnce(
901 &FileMetricsProvider::ProvideIndependentMetricsOnTaskRunner,
902 source_ptr, uma_proto, snapshot_manager,
903 std::move(serialize_log_callback)),
904 base::BindOnce(&FileMetricsProvider::ProvideIndependentMetricsCleanup,
905 weak_factory_.GetWeakPtr(), std::move(done_callback),
906 std::move(source)));
907 }
908
ProvideIndependentMetricsCleanup(base::OnceCallback<void (bool)> done_callback,std::unique_ptr<SourceInfo> source,bool success)909 void FileMetricsProvider::ProvideIndependentMetricsCleanup(
910 base::OnceCallback<void(bool)> done_callback,
911 std::unique_ptr<SourceInfo> source,
912 bool success) {
913 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
914
915 // Regardless of whether this source was successfully recorded, it is
916 // never read again.
917 source->read_complete = true;
918 RecordSourceAsRead(source.get());
919 sources_to_check_.push_back(std::move(source));
920 ScheduleSourcesCheck();
921
922 std::move(done_callback).Run(success);
923 }
924
HasPreviousSessionData()925 bool FileMetricsProvider::HasPreviousSessionData() {
926 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
927
928 // Check all sources for previous run to see if they need to be read.
929 for (auto iter = sources_for_previous_run_.begin();
930 iter != sources_for_previous_run_.end();) {
931 auto temp = iter++;
932 SourceInfo* source = temp->get();
933
934 // This would normally be done on a background I/O thread but there
935 // hasn't been a chance to run any at the time this method is called.
936 // Do the check in-line.
937 AccessResult result = CheckAndMapMetricSource(source);
938 UMA_HISTOGRAM_ENUMERATION("UMA.FileMetricsProvider.InitialAccessResult",
939 result, ACCESS_RESULT_MAX);
940
941 // If it couldn't be accessed, remove it from the list. There is only ever
942 // one chance to record it so no point keeping it around for later. Also
943 // mark it as having been read since uploading it with a future browser
944 // run would associate it with the then-previous run which would no longer
945 // be the run from which it came.
946 if (result != ACCESS_RESULT_SUCCESS) {
947 DCHECK(!source->allocator);
948 RecordSourceAsRead(source);
949 DeleteFileAsync(source->path);
950 sources_for_previous_run_.erase(temp);
951 continue;
952 }
953
954 DCHECK(source->allocator);
955
956 // If the source should be associated with an existing internal profile,
957 // move it to |sources_with_profile_| for later upload.
958 if (source->association == ASSOCIATE_INTERNAL_PROFILE_OR_PREVIOUS_RUN) {
959 if (PersistentSystemProfile::HasSystemProfile(
960 *source->allocator->memory_allocator())) {
961 sources_with_profile_.splice(sources_with_profile_.end(),
962 sources_for_previous_run_, temp);
963 }
964 }
965 }
966
967 return !sources_for_previous_run_.empty();
968 }
969
RecordInitialHistogramSnapshots(base::HistogramSnapshotManager * snapshot_manager)970 void FileMetricsProvider::RecordInitialHistogramSnapshots(
971 base::HistogramSnapshotManager* snapshot_manager) {
972 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
973
974 for (const std::unique_ptr<SourceInfo>& source : sources_for_previous_run_) {
975 // The source needs to have an allocator attached to it in order to read
976 // histograms out of it.
977 DCHECK(!source->read_complete);
978 DCHECK(source->allocator);
979
980 // Dump all stability histograms contained within the source to the
981 // snapshot-manager.
982 RecordHistogramSnapshotsFromSource(
983 snapshot_manager, source.get(),
984 /*required_flags=*/base::HistogramBase::kUmaStabilityHistogramFlag);
985
986 // Update the last-seen time so it isn't read again unless it changes.
987 RecordSourceAsRead(source.get());
988 }
989 }
990
MergeHistogramDeltas(bool async,base::OnceClosure done_callback)991 void FileMetricsProvider::MergeHistogramDeltas(
992 bool async,
993 base::OnceClosure done_callback) {
994 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
995 // TODO(crbug.com/1293026): Consider if this work can be done asynchronously.
996 for (std::unique_ptr<SourceInfo>& source : sources_mapped_) {
997 MergeHistogramDeltasFromSource(source.get());
998 }
999 std::move(done_callback).Run();
1000 }
1001
SimulateIndependentMetrics()1002 bool FileMetricsProvider::SimulateIndependentMetrics() {
1003 if (!pref_service_->HasPrefPath(
1004 metrics::prefs::kMetricsFileMetricsMetadata)) {
1005 return false;
1006 }
1007
1008 ScopedListPrefUpdate list_pref(pref_service_,
1009 metrics::prefs::kMetricsFileMetricsMetadata);
1010 base::Value::List& list_value = list_pref.Get();
1011 if (list_value.empty())
1012 return false;
1013
1014 size_t count = pref_service_->GetInteger(
1015 metrics::prefs::kStabilityFileMetricsUnsentSamplesCount);
1016 pref_service_->SetInteger(
1017 metrics::prefs::kStabilityFileMetricsUnsentSamplesCount,
1018 list_value[0].GetInt() + count);
1019 pref_service_->SetInteger(
1020 metrics::prefs::kStabilityFileMetricsUnsentFilesCount,
1021 list_value.size() - 1);
1022 list_value.erase(list_value.begin());
1023
1024 return true;
1025 }
1026
1027 } // namespace metrics
1028