1 // Copyright 2015 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/metrics/call_stacks/call_stack_profile_metrics_provider.h"
6
7 #include <utility>
8 #include <vector>
9
10 #include "base/check.h"
11 #include "base/feature_list.h"
12 #include "base/functional/bind.h"
13 #include "base/no_destructor.h"
14 #include "base/ranges/algorithm.h"
15 #include "base/synchronization/lock.h"
16 #include "base/thread_annotations.h"
17 #include "base/time/time.h"
18 #include "sampled_profile.pb.h"
19 #include "third_party/metrics_proto/chrome_user_metrics_extension.pb.h"
20
21 namespace metrics {
22
23 namespace {
24
25 constexpr base::FeatureState kSamplingProfilerReportingDefaultState =
26 base::FEATURE_ENABLED_BY_DEFAULT;
27
SamplingProfilerReportingEnabled()28 bool SamplingProfilerReportingEnabled() {
29 // TODO(crbug.com/40246378): Do not call this function before the FeatureList
30 // is registered.
31 if (!base::FeatureList::GetInstance()) {
32 // The FeatureList is not registered: use the feature's default state. This
33 // means that any override from the command line or variations service is
34 // ignored.
35 return kSamplingProfilerReportingDefaultState ==
36 base::FEATURE_ENABLED_BY_DEFAULT;
37 }
38 return base::FeatureList::IsEnabled(kSamplingProfilerReporting);
39 }
40
41 // Cap the number of pending profiles to avoid excessive performance overhead
42 // due to profile deserialization when profile uploads are delayed (e.g. due to
43 // being offline). Capping at this threshold loses approximately 0.5% of
44 // profiles on canary and dev.
45 //
46 // TODO(wittman): Remove this threshold after crbug.com/903972 is fixed.
47 const size_t kMaxPendingProfiles = 1250;
48
49 // Provides access to the singleton interceptor callback instance for CPU
50 // profiles. Accessed asynchronously on the profiling thread after profiling has
51 // been started.
52 CallStackProfileMetricsProvider::InterceptorCallback&
GetCpuInterceptorCallbackInstance()53 GetCpuInterceptorCallbackInstance() {
54 static base::NoDestructor<
55 CallStackProfileMetricsProvider::InterceptorCallback>
56 instance;
57 return *instance;
58 }
59
60 // PendingProfiles ------------------------------------------------------------
61
62 // Singleton class responsible for retaining profiles received from
63 // CallStackProfileBuilder. These are then sent to UMA on the invocation of
64 // CallStackProfileMetricsProvider::ProvideCurrentSessionData(). We need to
65 // store the profiles outside of a CallStackProfileMetricsProvider instance
66 // since callers may start profiling before the CallStackProfileMetricsProvider
67 // is created.
68 //
69 // Member functions on this class may be called on any thread.
70 class PendingProfiles {
71 public:
72 static PendingProfiles* GetInstance();
73
74 PendingProfiles(const PendingProfiles&) = delete;
75 PendingProfiles& operator=(const PendingProfiles&) = delete;
76
77 // Retrieves all the pending profiles.
78 std::vector<SampledProfile> RetrieveProfiles();
79
80 // Enables the collection of profiles by MaybeCollect*Profile if |enabled| is
81 // true. Otherwise, clears the currently collected profiles and ignores
82 // profiles provided to future invocations of MaybeCollect*Profile.
83 void SetCollectionEnabled(bool enabled);
84
85 // Collects |profile|. It may be stored in a serialized form, or ignored,
86 // depending on the pre-defined storage capacity and whether collection is
87 // enabled. |profile| is not const& because it must be passed with std::move.
88 void MaybeCollectProfile(base::TimeTicks profile_start_time,
89 SampledProfile profile);
90
91 // Collects |serialized_profile|. It may be ignored depending on the
92 // pre-defined storage capacity and whether collection is enabled.
93 // |serialized_profile| must be passed with std::move because it could be very
94 // large.
95 void MaybeCollectSerializedProfile(base::TimeTicks profile_start_time,
96 std::string&& serialized_profile);
97
98 #if BUILDFLAG(IS_CHROMEOS)
99 // Returns all the serialized profiles that have been collected but not yet
100 // retrieved. For thread-safety reasons, returns a copy, so this is an
101 // expensive function. Fortunately, it's only called during ChromeOS tast
102 // integration tests.
GetUnretrievedProfiles()103 std::vector<std::string> GetUnretrievedProfiles() {
104 base::AutoLock scoped_lock(lock_);
105 return serialized_profiles_;
106 }
107 #endif // BUILDFLAG(IS_CHROMEOS)
108
109 // Allows testing against the initial state multiple times.
110 void ResetToDefaultStateForTesting();
111
112 private:
113 friend class base::NoDestructor<PendingProfiles>;
114
115 PendingProfiles();
116 ~PendingProfiles() = delete;
117
118 // Returns true if collection is enabled for a given profile based on its
119 // |profile_start_time|. The |lock_| must be held prior to calling this
120 // method.
121 bool IsCollectionEnabledForProfile(base::TimeTicks profile_start_time) const
122 EXCLUSIVE_LOCKS_REQUIRED(lock_);
123
124 mutable base::Lock lock_;
125
126 // If true, profiles provided to MaybeCollect*Profile should be collected.
127 // Otherwise they will be ignored.
128 // |collection_enabled_| is initialized to true to collect any profiles that
129 // are generated prior to creation of the CallStackProfileMetricsProvider.
130 // The ultimate disposition of these pre-creation collected profiles will be
131 // determined by the initial recording state provided to
132 // CallStackProfileMetricsProvider.
133 bool collection_enabled_ GUARDED_BY(lock_) = true;
134
135 // The last time collection was disabled. Used to determine if collection was
136 // disabled at any point since a profile was started.
137 base::TimeTicks last_collection_disable_time_ GUARDED_BY(lock_);
138
139 // The last time collection was enabled. Used to determine if collection was
140 // enabled at any point since a profile was started.
141 base::TimeTicks last_collection_enable_time_ GUARDED_BY(lock_);
142
143 // The set of completed serialized profiles that should be reported.
144 std::vector<std::string> serialized_profiles_ GUARDED_BY(lock_);
145 };
146
147 // static
GetInstance()148 PendingProfiles* PendingProfiles::GetInstance() {
149 // Singleton for performance rather than correctness reasons.
150 static base::NoDestructor<PendingProfiles> instance;
151 return instance.get();
152 }
153
RetrieveProfiles()154 std::vector<SampledProfile> PendingProfiles::RetrieveProfiles() {
155 std::vector<std::string> serialized_profiles;
156
157 {
158 base::AutoLock scoped_lock(lock_);
159 serialized_profiles.swap(serialized_profiles_);
160 }
161
162 // Deserialize all serialized profiles, skipping over any that fail to parse.
163 std::vector<SampledProfile> profiles;
164 profiles.reserve(serialized_profiles.size());
165 for (const auto& serialized_profile : serialized_profiles) {
166 SampledProfile profile;
167 if (profile.ParseFromString(serialized_profile)) {
168 profiles.push_back(std::move(profile));
169 }
170 }
171
172 return profiles;
173 }
174
SetCollectionEnabled(bool enabled)175 void PendingProfiles::SetCollectionEnabled(bool enabled) {
176 base::AutoLock scoped_lock(lock_);
177
178 collection_enabled_ = enabled;
179
180 if (!collection_enabled_) {
181 serialized_profiles_.clear();
182 last_collection_disable_time_ = base::TimeTicks::Now();
183 } else {
184 last_collection_enable_time_ = base::TimeTicks::Now();
185 }
186 }
187
IsCollectionEnabledForProfile(base::TimeTicks profile_start_time) const188 bool PendingProfiles::IsCollectionEnabledForProfile(
189 base::TimeTicks profile_start_time) const {
190 lock_.AssertAcquired();
191
192 // Scenario 1: return false if collection is disabled.
193 if (!collection_enabled_)
194 return false;
195
196 // Scenario 2: return false if collection is disabled after the start of
197 // collection for this profile.
198 if (!last_collection_disable_time_.is_null() &&
199 last_collection_disable_time_ >= profile_start_time) {
200 return false;
201 }
202
203 // Scenario 3: return false if collection is disabled before the start of
204 // collection and re-enabled after the start. Note that this is different from
205 // scenario 1 where re-enabling never happens.
206 if (!last_collection_disable_time_.is_null() &&
207 !last_collection_enable_time_.is_null() &&
208 last_collection_enable_time_ >= profile_start_time) {
209 return false;
210 }
211
212 return true;
213 }
214
MaybeCollectProfile(base::TimeTicks profile_start_time,SampledProfile profile)215 void PendingProfiles::MaybeCollectProfile(base::TimeTicks profile_start_time,
216 SampledProfile profile) {
217 {
218 base::AutoLock scoped_lock(lock_);
219
220 if (!IsCollectionEnabledForProfile(profile_start_time))
221 return;
222 }
223
224 // Serialize the profile without holding the lock.
225 std::string serialized_profile;
226 profile.SerializeToString(&serialized_profile);
227
228 MaybeCollectSerializedProfile(profile_start_time,
229 std::move(serialized_profile));
230 }
231
MaybeCollectSerializedProfile(base::TimeTicks profile_start_time,std::string && serialized_profile)232 void PendingProfiles::MaybeCollectSerializedProfile(
233 base::TimeTicks profile_start_time,
234 std::string&& serialized_profile) {
235 base::AutoLock scoped_lock(lock_);
236
237 // There is no room for additional profiles.
238 if (serialized_profiles_.size() >= kMaxPendingProfiles)
239 return;
240
241 if (IsCollectionEnabledForProfile(profile_start_time))
242 serialized_profiles_.push_back(std::move(serialized_profile));
243 }
244
ResetToDefaultStateForTesting()245 void PendingProfiles::ResetToDefaultStateForTesting() {
246 base::AutoLock scoped_lock(lock_);
247
248 collection_enabled_ = true;
249 last_collection_disable_time_ = base::TimeTicks();
250 last_collection_enable_time_ = base::TimeTicks();
251 serialized_profiles_.clear();
252 }
253
254 PendingProfiles::PendingProfiles() = default;
255
256 #if BUILDFLAG(IS_CHROMEOS)
257 // A class that records the number of minimally-successful profiles received
258 // over time. In ChromeOS, this is used by the ui.StackSampledMetrics tast
259 // integration test to confirm that stack-sampled metrics are working on
260 // all the various ChromeOS boards.
261 class ReceivedProfileCounter {
262 public:
263 static ReceivedProfileCounter* GetInstance();
264
265 ReceivedProfileCounter(const ReceivedProfileCounter&) = delete;
266 ReceivedProfileCounter& operator=(const ReceivedProfileCounter&) = delete;
267 ~ReceivedProfileCounter() = delete;
268
269 // Gets the counts of all successfully collected profiles, broken down by
270 // process type and thread type. "Successfully collected" is defined pretty
271 // minimally (we got a couple of frames).
272 CallStackProfileMetricsProvider::ProcessThreadCount
273 GetSuccessfullyCollectedCounts();
274
275 // Given a list of profiles returned from PendingProfiles::RetrieveProfiles(),
276 // add counts from all the successful profiles in the list to our counts for
277 // later.
278 void OnRetrieveProfiles(const std::vector<SampledProfile>& profiles);
279
280 // Allows testing against the initial state multiple times.
281 void ResetToDefaultStateForTesting(); // IN-TEST
282
283 private:
284 friend class base::NoDestructor<ReceivedProfileCounter>;
285
286 ReceivedProfileCounter() = default;
287
288 // Returns true if the given profile was success enough to be counted in
289 // retrieved_successful_counts_.
290 static bool WasMinimallySuccessful(const SampledProfile& profile);
291
292 mutable base::Lock lock_;
293
294 // Count of successfully-stack-walked SampledProfiles retrieved since startup.
295 // "success" is defined by WasMinimallySuccessful().
296 CallStackProfileMetricsProvider::ProcessThreadCount
297 retrieved_successful_counts_ GUARDED_BY(lock_);
298 };
299
300 // static
GetInstance()301 ReceivedProfileCounter* ReceivedProfileCounter::GetInstance() {
302 static base::NoDestructor<ReceivedProfileCounter> instance;
303 return instance.get();
304 }
305
306 // static
WasMinimallySuccessful(const SampledProfile & profile)307 bool ReceivedProfileCounter::WasMinimallySuccessful(
308 const SampledProfile& profile) {
309 // If we don't have a process or thread, we don't understand the profile.
310 if (!profile.has_process() || !profile.has_thread()) {
311 return false;
312 }
313
314 // Since we can't symbolize the stacks, "successful" here just means that the
315 // stack has at least 2 frames. (The current instruction pointer should always
316 // count as one, so two means we had some luck walking the stack.)
317 const auto& stacks = profile.call_stack_profile().stack();
318 return base::ranges::find_if(stacks,
319 [](const CallStackProfile::Stack& stack) {
320 return stack.frame_size() >= 2;
321 }) != stacks.end();
322 }
323
OnRetrieveProfiles(const std::vector<SampledProfile> & profiles)324 void ReceivedProfileCounter::OnRetrieveProfiles(
325 const std::vector<SampledProfile>& profiles) {
326 base::AutoLock scoped_lock(lock_);
327 for (const auto& profile : profiles) {
328 if (WasMinimallySuccessful(profile)) {
329 ++retrieved_successful_counts_[profile.process()][profile.thread()];
330 }
331 }
332 }
333
334 CallStackProfileMetricsProvider::ProcessThreadCount
GetSuccessfullyCollectedCounts()335 ReceivedProfileCounter::GetSuccessfullyCollectedCounts() {
336 CallStackProfileMetricsProvider::ProcessThreadCount successful_counts;
337
338 {
339 base::AutoLock scoped_lock(lock_);
340 // Start with count of profiles we've already sent
341 successful_counts = retrieved_successful_counts_;
342 }
343
344 // And then add in any pending ones. Copying and then deserializing all the
345 // profiles is expensive, but again, this should only be called during tast
346 // integration tests.
347 std::vector<std::string> unretrieved_profiles(
348 PendingProfiles::GetInstance()->GetUnretrievedProfiles());
349 for (const std::string& serialized_profile : unretrieved_profiles) {
350 SampledProfile profile;
351 if (profile.ParseFromString(serialized_profile)) {
352 if (WasMinimallySuccessful(profile)) {
353 ++successful_counts[profile.process()][profile.thread()];
354 }
355 }
356 }
357
358 return successful_counts;
359 }
360
ResetToDefaultStateForTesting()361 void ReceivedProfileCounter::ResetToDefaultStateForTesting() {
362 base::AutoLock scoped_lock(lock_);
363 retrieved_successful_counts_.clear();
364 }
365
366 #endif // BUILDFLAG(IS_CHROMEOS)
367 } // namespace
368
369 // CallStackProfileMetricsProvider --------------------------------------------
370
371 BASE_FEATURE(kSamplingProfilerReporting,
372 "SamplingProfilerReporting",
373 kSamplingProfilerReportingDefaultState);
374
375 CallStackProfileMetricsProvider::CallStackProfileMetricsProvider() = default;
376 CallStackProfileMetricsProvider::~CallStackProfileMetricsProvider() = default;
377
378 // static
ReceiveProfile(base::TimeTicks profile_start_time,SampledProfile profile)379 void CallStackProfileMetricsProvider::ReceiveProfile(
380 base::TimeTicks profile_start_time,
381 SampledProfile profile) {
382 if (GetCpuInterceptorCallbackInstance() &&
383 (profile.trigger_event() == SampledProfile::PROCESS_STARTUP ||
384 profile.trigger_event() == SampledProfile::PERIODIC_COLLECTION)) {
385 GetCpuInterceptorCallbackInstance().Run(std::move(profile));
386 return;
387 }
388
389 if (profile.trigger_event() != SampledProfile::PERIODIC_HEAP_COLLECTION &&
390 !SamplingProfilerReportingEnabled()) {
391 return;
392 }
393 PendingProfiles::GetInstance()->MaybeCollectProfile(profile_start_time,
394 std::move(profile));
395 }
396
397 // static
ReceiveSerializedProfile(base::TimeTicks profile_start_time,bool is_heap_profile,std::string && serialized_profile)398 void CallStackProfileMetricsProvider::ReceiveSerializedProfile(
399 base::TimeTicks profile_start_time,
400 bool is_heap_profile,
401 std::string&& serialized_profile) {
402 // Note: All parameters of this function come from a Mojo message from an
403 // untrusted process.
404 if (GetCpuInterceptorCallbackInstance()) {
405 // GetCpuInterceptorCallbackInstance() is set only in tests, so it's safe to
406 // trust `is_heap_profile` and `serialized_profile` here.
407 DCHECK(!is_heap_profile);
408 SampledProfile profile;
409 if (profile.ParseFromString(serialized_profile)) {
410 DCHECK(profile.trigger_event() == SampledProfile::PROCESS_STARTUP ||
411 profile.trigger_event() == SampledProfile::PERIODIC_COLLECTION);
412 GetCpuInterceptorCallbackInstance().Run(std::move(profile));
413 }
414 return;
415 }
416
417 // If an attacker spoofs `is_heap_profile` or `profile_start_time`, the worst
418 // they can do is cause `serialized_profile` to be sent to UMA when profile
419 // reporting should be disabled.
420 if (!is_heap_profile && !SamplingProfilerReportingEnabled()) {
421 return;
422 }
423 PendingProfiles::GetInstance()->MaybeCollectSerializedProfile(
424 profile_start_time, std::move(serialized_profile));
425 }
426
427 // static
SetCpuInterceptorCallbackForTesting(InterceptorCallback callback)428 void CallStackProfileMetricsProvider::SetCpuInterceptorCallbackForTesting(
429 InterceptorCallback callback) {
430 GetCpuInterceptorCallbackInstance() = std::move(callback);
431 }
432
433 #if BUILDFLAG(IS_CHROMEOS)
434 // static
435 CallStackProfileMetricsProvider::ProcessThreadCount
GetSuccessfullyCollectedCounts()436 CallStackProfileMetricsProvider::GetSuccessfullyCollectedCounts() {
437 return ReceivedProfileCounter::GetInstance()
438 ->GetSuccessfullyCollectedCounts();
439 }
440 #endif
441
OnRecordingEnabled()442 void CallStackProfileMetricsProvider::OnRecordingEnabled() {
443 PendingProfiles::GetInstance()->SetCollectionEnabled(true);
444 }
445
OnRecordingDisabled()446 void CallStackProfileMetricsProvider::OnRecordingDisabled() {
447 PendingProfiles::GetInstance()->SetCollectionEnabled(false);
448 }
449
ProvideCurrentSessionData(ChromeUserMetricsExtension * uma_proto)450 void CallStackProfileMetricsProvider::ProvideCurrentSessionData(
451 ChromeUserMetricsExtension* uma_proto) {
452 std::vector<SampledProfile> profiles =
453 PendingProfiles::GetInstance()->RetrieveProfiles();
454 #if BUILDFLAG(IS_CHROMEOS)
455 ReceivedProfileCounter::GetInstance()->OnRetrieveProfiles(profiles);
456 #endif
457
458 for (auto& profile : profiles) {
459 // Only heap samples should ever be received if SamplingProfilerReporting is
460 // disabled.
461 DCHECK(SamplingProfilerReportingEnabled() ||
462 profile.trigger_event() == SampledProfile::PERIODIC_HEAP_COLLECTION);
463 *uma_proto->add_sampled_profile() = std::move(profile);
464 }
465 }
466
467 // static
ResetStaticStateForTesting()468 void CallStackProfileMetricsProvider::ResetStaticStateForTesting() {
469 PendingProfiles::GetInstance()->ResetToDefaultStateForTesting();
470 #if BUILDFLAG(IS_CHROMEOS)
471 ReceivedProfileCounter::GetInstance()
472 ->ResetToDefaultStateForTesting(); // IN-TEST
473 #endif
474 }
475
476 } // namespace metrics
477