1 // Copyright 2015 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/metrics/call_stacks/call_stack_profile_metrics_provider.h"
6 
7 #include <utility>
8 #include <vector>
9 
10 #include "base/check.h"
11 #include "base/feature_list.h"
12 #include "base/functional/bind.h"
13 #include "base/no_destructor.h"
14 #include "base/ranges/algorithm.h"
15 #include "base/synchronization/lock.h"
16 #include "base/thread_annotations.h"
17 #include "base/time/time.h"
18 #include "sampled_profile.pb.h"
19 #include "third_party/metrics_proto/chrome_user_metrics_extension.pb.h"
20 
21 namespace metrics {
22 
23 namespace {
24 
25 constexpr base::FeatureState kSamplingProfilerReportingDefaultState =
26     base::FEATURE_ENABLED_BY_DEFAULT;
27 
SamplingProfilerReportingEnabled()28 bool SamplingProfilerReportingEnabled() {
29   // TODO(crbug.com/40246378): Do not call this function before the FeatureList
30   // is registered.
31   if (!base::FeatureList::GetInstance()) {
32     // The FeatureList is not registered: use the feature's default state. This
33     // means that any override from the command line or variations service is
34     // ignored.
35     return kSamplingProfilerReportingDefaultState ==
36            base::FEATURE_ENABLED_BY_DEFAULT;
37   }
38   return base::FeatureList::IsEnabled(kSamplingProfilerReporting);
39 }
40 
41 // Cap the number of pending profiles to avoid excessive performance overhead
42 // due to profile deserialization when profile uploads are delayed (e.g. due to
43 // being offline). Capping at this threshold loses approximately 0.5% of
44 // profiles on canary and dev.
45 //
46 // TODO(wittman): Remove this threshold after crbug.com/903972 is fixed.
47 const size_t kMaxPendingProfiles = 1250;
48 
49 // Provides access to the singleton interceptor callback instance for CPU
50 // profiles. Accessed asynchronously on the profiling thread after profiling has
51 // been started.
52 CallStackProfileMetricsProvider::InterceptorCallback&
GetCpuInterceptorCallbackInstance()53 GetCpuInterceptorCallbackInstance() {
54   static base::NoDestructor<
55       CallStackProfileMetricsProvider::InterceptorCallback>
56       instance;
57   return *instance;
58 }
59 
60 // PendingProfiles ------------------------------------------------------------
61 
62 // Singleton class responsible for retaining profiles received from
63 // CallStackProfileBuilder. These are then sent to UMA on the invocation of
64 // CallStackProfileMetricsProvider::ProvideCurrentSessionData(). We need to
65 // store the profiles outside of a CallStackProfileMetricsProvider instance
66 // since callers may start profiling before the CallStackProfileMetricsProvider
67 // is created.
68 //
69 // Member functions on this class may be called on any thread.
70 class PendingProfiles {
71  public:
72   static PendingProfiles* GetInstance();
73 
74   PendingProfiles(const PendingProfiles&) = delete;
75   PendingProfiles& operator=(const PendingProfiles&) = delete;
76 
77   // Retrieves all the pending profiles.
78   std::vector<SampledProfile> RetrieveProfiles();
79 
80   // Enables the collection of profiles by MaybeCollect*Profile if |enabled| is
81   // true. Otherwise, clears the currently collected profiles and ignores
82   // profiles provided to future invocations of MaybeCollect*Profile.
83   void SetCollectionEnabled(bool enabled);
84 
85   // Collects |profile|. It may be stored in a serialized form, or ignored,
86   // depending on the pre-defined storage capacity and whether collection is
87   // enabled. |profile| is not const& because it must be passed with std::move.
88   void MaybeCollectProfile(base::TimeTicks profile_start_time,
89                            SampledProfile profile);
90 
91   // Collects |serialized_profile|. It may be ignored depending on the
92   // pre-defined storage capacity and whether collection is enabled.
93   // |serialized_profile| must be passed with std::move because it could be very
94   // large.
95   void MaybeCollectSerializedProfile(base::TimeTicks profile_start_time,
96                                      std::string&& serialized_profile);
97 
98 #if BUILDFLAG(IS_CHROMEOS)
99   // Returns all the serialized profiles that have been collected but not yet
100   // retrieved. For thread-safety reasons, returns a copy, so this is an
101   // expensive function. Fortunately, it's only called during ChromeOS tast
102   // integration tests.
GetUnretrievedProfiles()103   std::vector<std::string> GetUnretrievedProfiles() {
104     base::AutoLock scoped_lock(lock_);
105     return serialized_profiles_;
106   }
107 #endif  // BUILDFLAG(IS_CHROMEOS)
108 
109   // Allows testing against the initial state multiple times.
110   void ResetToDefaultStateForTesting();
111 
112  private:
113   friend class base::NoDestructor<PendingProfiles>;
114 
115   PendingProfiles();
116   ~PendingProfiles() = delete;
117 
118   // Returns true if collection is enabled for a given profile based on its
119   // |profile_start_time|. The |lock_| must be held prior to calling this
120   // method.
121   bool IsCollectionEnabledForProfile(base::TimeTicks profile_start_time) const
122       EXCLUSIVE_LOCKS_REQUIRED(lock_);
123 
124   mutable base::Lock lock_;
125 
126   // If true, profiles provided to MaybeCollect*Profile should be collected.
127   // Otherwise they will be ignored.
128   // |collection_enabled_| is initialized to true to collect any profiles that
129   // are generated prior to creation of the CallStackProfileMetricsProvider.
130   // The ultimate disposition of these pre-creation collected profiles will be
131   // determined by the initial recording state provided to
132   // CallStackProfileMetricsProvider.
133   bool collection_enabled_ GUARDED_BY(lock_) = true;
134 
135   // The last time collection was disabled. Used to determine if collection was
136   // disabled at any point since a profile was started.
137   base::TimeTicks last_collection_disable_time_ GUARDED_BY(lock_);
138 
139   // The last time collection was enabled. Used to determine if collection was
140   // enabled at any point since a profile was started.
141   base::TimeTicks last_collection_enable_time_ GUARDED_BY(lock_);
142 
143   // The set of completed serialized profiles that should be reported.
144   std::vector<std::string> serialized_profiles_ GUARDED_BY(lock_);
145 };
146 
147 // static
GetInstance()148 PendingProfiles* PendingProfiles::GetInstance() {
149   // Singleton for performance rather than correctness reasons.
150   static base::NoDestructor<PendingProfiles> instance;
151   return instance.get();
152 }
153 
RetrieveProfiles()154 std::vector<SampledProfile> PendingProfiles::RetrieveProfiles() {
155   std::vector<std::string> serialized_profiles;
156 
157   {
158     base::AutoLock scoped_lock(lock_);
159     serialized_profiles.swap(serialized_profiles_);
160   }
161 
162   // Deserialize all serialized profiles, skipping over any that fail to parse.
163   std::vector<SampledProfile> profiles;
164   profiles.reserve(serialized_profiles.size());
165   for (const auto& serialized_profile : serialized_profiles) {
166     SampledProfile profile;
167     if (profile.ParseFromString(serialized_profile)) {
168       profiles.push_back(std::move(profile));
169     }
170   }
171 
172   return profiles;
173 }
174 
SetCollectionEnabled(bool enabled)175 void PendingProfiles::SetCollectionEnabled(bool enabled) {
176   base::AutoLock scoped_lock(lock_);
177 
178   collection_enabled_ = enabled;
179 
180   if (!collection_enabled_) {
181     serialized_profiles_.clear();
182     last_collection_disable_time_ = base::TimeTicks::Now();
183   } else {
184     last_collection_enable_time_ = base::TimeTicks::Now();
185   }
186 }
187 
IsCollectionEnabledForProfile(base::TimeTicks profile_start_time) const188 bool PendingProfiles::IsCollectionEnabledForProfile(
189     base::TimeTicks profile_start_time) const {
190   lock_.AssertAcquired();
191 
192   // Scenario 1: return false if collection is disabled.
193   if (!collection_enabled_)
194     return false;
195 
196   // Scenario 2: return false if collection is disabled after the start of
197   // collection for this profile.
198   if (!last_collection_disable_time_.is_null() &&
199       last_collection_disable_time_ >= profile_start_time) {
200     return false;
201   }
202 
203   // Scenario 3: return false if collection is disabled before the start of
204   // collection and re-enabled after the start. Note that this is different from
205   // scenario 1 where re-enabling never happens.
206   if (!last_collection_disable_time_.is_null() &&
207       !last_collection_enable_time_.is_null() &&
208       last_collection_enable_time_ >= profile_start_time) {
209     return false;
210   }
211 
212   return true;
213 }
214 
MaybeCollectProfile(base::TimeTicks profile_start_time,SampledProfile profile)215 void PendingProfiles::MaybeCollectProfile(base::TimeTicks profile_start_time,
216                                           SampledProfile profile) {
217   {
218     base::AutoLock scoped_lock(lock_);
219 
220     if (!IsCollectionEnabledForProfile(profile_start_time))
221       return;
222   }
223 
224   // Serialize the profile without holding the lock.
225   std::string serialized_profile;
226   profile.SerializeToString(&serialized_profile);
227 
228   MaybeCollectSerializedProfile(profile_start_time,
229                                 std::move(serialized_profile));
230 }
231 
MaybeCollectSerializedProfile(base::TimeTicks profile_start_time,std::string && serialized_profile)232 void PendingProfiles::MaybeCollectSerializedProfile(
233     base::TimeTicks profile_start_time,
234     std::string&& serialized_profile) {
235   base::AutoLock scoped_lock(lock_);
236 
237   // There is no room for additional profiles.
238   if (serialized_profiles_.size() >= kMaxPendingProfiles)
239     return;
240 
241   if (IsCollectionEnabledForProfile(profile_start_time))
242     serialized_profiles_.push_back(std::move(serialized_profile));
243 }
244 
ResetToDefaultStateForTesting()245 void PendingProfiles::ResetToDefaultStateForTesting() {
246   base::AutoLock scoped_lock(lock_);
247 
248   collection_enabled_ = true;
249   last_collection_disable_time_ = base::TimeTicks();
250   last_collection_enable_time_ = base::TimeTicks();
251   serialized_profiles_.clear();
252 }
253 
254 PendingProfiles::PendingProfiles() = default;
255 
256 #if BUILDFLAG(IS_CHROMEOS)
257 // A class that records the number of minimally-successful profiles received
258 // over time. In ChromeOS, this is used by the ui.StackSampledMetrics tast
259 // integration test to confirm that stack-sampled metrics are working on
260 // all the various ChromeOS boards.
261 class ReceivedProfileCounter {
262  public:
263   static ReceivedProfileCounter* GetInstance();
264 
265   ReceivedProfileCounter(const ReceivedProfileCounter&) = delete;
266   ReceivedProfileCounter& operator=(const ReceivedProfileCounter&) = delete;
267   ~ReceivedProfileCounter() = delete;
268 
269   // Gets the counts of all successfully collected profiles, broken down by
270   // process type and thread type. "Successfully collected" is defined pretty
271   // minimally (we got a couple of frames).
272   CallStackProfileMetricsProvider::ProcessThreadCount
273   GetSuccessfullyCollectedCounts();
274 
275   // Given a list of profiles returned from PendingProfiles::RetrieveProfiles(),
276   // add counts from all the successful profiles in the list to our counts for
277   // later.
278   void OnRetrieveProfiles(const std::vector<SampledProfile>& profiles);
279 
280   // Allows testing against the initial state multiple times.
281   void ResetToDefaultStateForTesting();  // IN-TEST
282 
283  private:
284   friend class base::NoDestructor<ReceivedProfileCounter>;
285 
286   ReceivedProfileCounter() = default;
287 
288   // Returns true if the given profile was success enough to be counted in
289   // retrieved_successful_counts_.
290   static bool WasMinimallySuccessful(const SampledProfile& profile);
291 
292   mutable base::Lock lock_;
293 
294   // Count of successfully-stack-walked SampledProfiles retrieved since startup.
295   // "success" is defined by WasMinimallySuccessful().
296   CallStackProfileMetricsProvider::ProcessThreadCount
297       retrieved_successful_counts_ GUARDED_BY(lock_);
298 };
299 
300 // static
GetInstance()301 ReceivedProfileCounter* ReceivedProfileCounter::GetInstance() {
302   static base::NoDestructor<ReceivedProfileCounter> instance;
303   return instance.get();
304 }
305 
306 // static
WasMinimallySuccessful(const SampledProfile & profile)307 bool ReceivedProfileCounter::WasMinimallySuccessful(
308     const SampledProfile& profile) {
309   // If we don't have a process or thread, we don't understand the profile.
310   if (!profile.has_process() || !profile.has_thread()) {
311     return false;
312   }
313 
314   // Since we can't symbolize the stacks, "successful" here just means that the
315   // stack has at least 2 frames. (The current instruction pointer should always
316   // count as one, so two means we had some luck walking the stack.)
317   const auto& stacks = profile.call_stack_profile().stack();
318   return base::ranges::find_if(stacks,
319                                [](const CallStackProfile::Stack& stack) {
320                                  return stack.frame_size() >= 2;
321                                }) != stacks.end();
322 }
323 
OnRetrieveProfiles(const std::vector<SampledProfile> & profiles)324 void ReceivedProfileCounter::OnRetrieveProfiles(
325     const std::vector<SampledProfile>& profiles) {
326   base::AutoLock scoped_lock(lock_);
327   for (const auto& profile : profiles) {
328     if (WasMinimallySuccessful(profile)) {
329       ++retrieved_successful_counts_[profile.process()][profile.thread()];
330     }
331   }
332 }
333 
334 CallStackProfileMetricsProvider::ProcessThreadCount
GetSuccessfullyCollectedCounts()335 ReceivedProfileCounter::GetSuccessfullyCollectedCounts() {
336   CallStackProfileMetricsProvider::ProcessThreadCount successful_counts;
337 
338   {
339     base::AutoLock scoped_lock(lock_);
340     // Start with count of profiles we've already sent
341     successful_counts = retrieved_successful_counts_;
342   }
343 
344   // And then add in any pending ones. Copying and then deserializing all the
345   // profiles is expensive, but again, this should only be called during tast
346   // integration tests.
347   std::vector<std::string> unretrieved_profiles(
348       PendingProfiles::GetInstance()->GetUnretrievedProfiles());
349   for (const std::string& serialized_profile : unretrieved_profiles) {
350     SampledProfile profile;
351     if (profile.ParseFromString(serialized_profile)) {
352       if (WasMinimallySuccessful(profile)) {
353         ++successful_counts[profile.process()][profile.thread()];
354       }
355     }
356   }
357 
358   return successful_counts;
359 }
360 
ResetToDefaultStateForTesting()361 void ReceivedProfileCounter::ResetToDefaultStateForTesting() {
362   base::AutoLock scoped_lock(lock_);
363   retrieved_successful_counts_.clear();
364 }
365 
366 #endif  // BUILDFLAG(IS_CHROMEOS)
367 }  // namespace
368 
369 // CallStackProfileMetricsProvider --------------------------------------------
370 
371 BASE_FEATURE(kSamplingProfilerReporting,
372              "SamplingProfilerReporting",
373              kSamplingProfilerReportingDefaultState);
374 
375 CallStackProfileMetricsProvider::CallStackProfileMetricsProvider() = default;
376 CallStackProfileMetricsProvider::~CallStackProfileMetricsProvider() = default;
377 
378 // static
ReceiveProfile(base::TimeTicks profile_start_time,SampledProfile profile)379 void CallStackProfileMetricsProvider::ReceiveProfile(
380     base::TimeTicks profile_start_time,
381     SampledProfile profile) {
382   if (GetCpuInterceptorCallbackInstance() &&
383       (profile.trigger_event() == SampledProfile::PROCESS_STARTUP ||
384        profile.trigger_event() == SampledProfile::PERIODIC_COLLECTION)) {
385     GetCpuInterceptorCallbackInstance().Run(std::move(profile));
386     return;
387   }
388 
389   if (profile.trigger_event() != SampledProfile::PERIODIC_HEAP_COLLECTION &&
390       !SamplingProfilerReportingEnabled()) {
391     return;
392   }
393   PendingProfiles::GetInstance()->MaybeCollectProfile(profile_start_time,
394                                                       std::move(profile));
395 }
396 
397 // static
ReceiveSerializedProfile(base::TimeTicks profile_start_time,bool is_heap_profile,std::string && serialized_profile)398 void CallStackProfileMetricsProvider::ReceiveSerializedProfile(
399     base::TimeTicks profile_start_time,
400     bool is_heap_profile,
401     std::string&& serialized_profile) {
402   // Note: All parameters of this function come from a Mojo message from an
403   // untrusted process.
404   if (GetCpuInterceptorCallbackInstance()) {
405     // GetCpuInterceptorCallbackInstance() is set only in tests, so it's safe to
406     // trust `is_heap_profile` and `serialized_profile` here.
407     DCHECK(!is_heap_profile);
408     SampledProfile profile;
409     if (profile.ParseFromString(serialized_profile)) {
410       DCHECK(profile.trigger_event() == SampledProfile::PROCESS_STARTUP ||
411              profile.trigger_event() == SampledProfile::PERIODIC_COLLECTION);
412       GetCpuInterceptorCallbackInstance().Run(std::move(profile));
413     }
414     return;
415   }
416 
417   // If an attacker spoofs `is_heap_profile` or `profile_start_time`, the worst
418   // they can do is cause `serialized_profile` to be sent to UMA when profile
419   // reporting should be disabled.
420   if (!is_heap_profile && !SamplingProfilerReportingEnabled()) {
421     return;
422   }
423   PendingProfiles::GetInstance()->MaybeCollectSerializedProfile(
424       profile_start_time, std::move(serialized_profile));
425 }
426 
427 // static
SetCpuInterceptorCallbackForTesting(InterceptorCallback callback)428 void CallStackProfileMetricsProvider::SetCpuInterceptorCallbackForTesting(
429     InterceptorCallback callback) {
430   GetCpuInterceptorCallbackInstance() = std::move(callback);
431 }
432 
433 #if BUILDFLAG(IS_CHROMEOS)
434 // static
435 CallStackProfileMetricsProvider::ProcessThreadCount
GetSuccessfullyCollectedCounts()436 CallStackProfileMetricsProvider::GetSuccessfullyCollectedCounts() {
437   return ReceivedProfileCounter::GetInstance()
438       ->GetSuccessfullyCollectedCounts();
439 }
440 #endif
441 
OnRecordingEnabled()442 void CallStackProfileMetricsProvider::OnRecordingEnabled() {
443   PendingProfiles::GetInstance()->SetCollectionEnabled(true);
444 }
445 
OnRecordingDisabled()446 void CallStackProfileMetricsProvider::OnRecordingDisabled() {
447   PendingProfiles::GetInstance()->SetCollectionEnabled(false);
448 }
449 
ProvideCurrentSessionData(ChromeUserMetricsExtension * uma_proto)450 void CallStackProfileMetricsProvider::ProvideCurrentSessionData(
451     ChromeUserMetricsExtension* uma_proto) {
452   std::vector<SampledProfile> profiles =
453       PendingProfiles::GetInstance()->RetrieveProfiles();
454 #if BUILDFLAG(IS_CHROMEOS)
455   ReceivedProfileCounter::GetInstance()->OnRetrieveProfiles(profiles);
456 #endif
457 
458   for (auto& profile : profiles) {
459     // Only heap samples should ever be received if SamplingProfilerReporting is
460     // disabled.
461     DCHECK(SamplingProfilerReportingEnabled() ||
462            profile.trigger_event() == SampledProfile::PERIODIC_HEAP_COLLECTION);
463     *uma_proto->add_sampled_profile() = std::move(profile);
464   }
465 }
466 
467 // static
ResetStaticStateForTesting()468 void CallStackProfileMetricsProvider::ResetStaticStateForTesting() {
469   PendingProfiles::GetInstance()->ResetToDefaultStateForTesting();
470 #if BUILDFLAG(IS_CHROMEOS)
471   ReceivedProfileCounter::GetInstance()
472       ->ResetToDefaultStateForTesting();  // IN-TEST
473 #endif
474 }
475 
476 }  // namespace metrics
477