xref: /aosp_15_r20/external/cronet/base/threading/hang_watcher.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2020 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/threading/hang_watcher.h"
6 
7 #include <atomic>
8 #include <utility>
9 
10 #include "base/containers/flat_map.h"
11 #include "base/debug/alias.h"
12 #include "base/debug/crash_logging.h"
13 #include "base/debug/dump_without_crashing.h"
14 #include "base/debug/leak_annotations.h"
15 #include "base/feature_list.h"
16 #include "base/functional/bind.h"
17 #include "base/functional/callback_helpers.h"
18 #include "base/metrics/field_trial_params.h"
19 #include "base/metrics/histogram_macros.h"
20 #include "base/power_monitor/power_monitor.h"
21 #include "base/ranges/algorithm.h"
22 #include "base/strings/string_number_conversions.h"
23 #include "base/synchronization/lock.h"
24 #include "base/synchronization/waitable_event.h"
25 #include "base/threading/platform_thread.h"
26 #include "base/threading/thread_checker.h"
27 #include "base/threading/thread_restrictions.h"
28 #include "base/threading/threading_features.h"
29 #include "base/time/default_tick_clock.h"
30 #include "base/time/time.h"
31 #include "base/trace_event/base_tracing.h"
32 #include "build/build_config.h"
33 #include "third_party/abseil-cpp/absl/base/attributes.h"
34 
35 namespace base {
36 
37 namespace {
38 
39 // Defines how much logging happens when the HangWatcher monitors the threads.
40 // Logging levels are set per thread type through Finch. It's important that
41 // the order of the enum members stay the same and that their numerical
42 // values be in increasing order. The implementation of
43 // ThreadTypeLoggingLevelGreaterOrEqual() depends on it.
44 enum class LoggingLevel { kNone = 0, kUmaOnly = 1, kUmaAndCrash = 2 };
45 
46 HangWatcher* g_instance = nullptr;
47 ABSL_CONST_INIT thread_local internal::HangWatchState* hang_watch_state =
48     nullptr;
49 std::atomic<bool> g_use_hang_watcher{false};
50 std::atomic<HangWatcher::ProcessType> g_hang_watcher_process_type{
51     HangWatcher::ProcessType::kBrowserProcess};
52 
53 std::atomic<LoggingLevel> g_threadpool_log_level{LoggingLevel::kNone};
54 std::atomic<LoggingLevel> g_io_thread_log_level{LoggingLevel::kNone};
55 std::atomic<LoggingLevel> g_main_thread_log_level{LoggingLevel::kNone};
56 
57 // Indicates whether HangWatcher::Run() should return after the next monitoring.
58 std::atomic<bool> g_keep_monitoring{true};
59 
60 // Emits the hung thread count histogram. |count| is the number of threads
61 // of type |thread_type| that were hung or became hung during the last
62 // monitoring window. This function should be invoked for each thread type
63 // encountered on each call to Monitor().
LogHungThreadCountHistogram(HangWatcher::ThreadType thread_type,int count)64 void LogHungThreadCountHistogram(HangWatcher::ThreadType thread_type,
65                                  int count) {
66   // In the case of unique threads like the IO or UI/Main thread a count does
67   // not make sense.
68   const bool any_thread_hung = count >= 1;
69 
70   const HangWatcher::ProcessType process_type =
71       g_hang_watcher_process_type.load(std::memory_order_relaxed);
72   switch (process_type) {
73     case HangWatcher::ProcessType::kUnknownProcess:
74       break;
75 
76     case HangWatcher::ProcessType::kBrowserProcess:
77       switch (thread_type) {
78         case HangWatcher::ThreadType::kIOThread:
79           UMA_HISTOGRAM_BOOLEAN(
80               "HangWatcher.IsThreadHung.BrowserProcess."
81               "IOThread",
82               any_thread_hung);
83           break;
84         case HangWatcher::ThreadType::kMainThread:
85           UMA_HISTOGRAM_BOOLEAN(
86               "HangWatcher.IsThreadHung.BrowserProcess."
87               "UIThread",
88               any_thread_hung);
89           break;
90         case HangWatcher::ThreadType::kThreadPoolThread:
91           // Not recorded for now.
92           break;
93       }
94       break;
95 
96     case HangWatcher::ProcessType::kGPUProcess:
97       // Not recorded for now.
98       break;
99 
100     case HangWatcher::ProcessType::kRendererProcess:
101       switch (thread_type) {
102         case HangWatcher::ThreadType::kIOThread:
103           UMA_HISTOGRAM_BOOLEAN(
104               "HangWatcher.IsThreadHung.RendererProcess."
105               "IOThread",
106               any_thread_hung);
107           break;
108         case HangWatcher::ThreadType::kMainThread:
109           UMA_HISTOGRAM_BOOLEAN(
110               "HangWatcher.IsThreadHung.RendererProcess."
111               "MainThread",
112               any_thread_hung);
113           break;
114         case HangWatcher::ThreadType::kThreadPoolThread:
115           // Not recorded for now.
116           break;
117       }
118       break;
119 
120     case HangWatcher::ProcessType::kUtilityProcess:
121       switch (thread_type) {
122         case HangWatcher::ThreadType::kIOThread:
123           UMA_HISTOGRAM_BOOLEAN(
124               "HangWatcher.IsThreadHung.UtilityProcess."
125               "IOThread",
126               any_thread_hung);
127           break;
128         case HangWatcher::ThreadType::kMainThread:
129           UMA_HISTOGRAM_BOOLEAN(
130               "HangWatcher.IsThreadHung.UtilityProcess."
131               "MainThread",
132               any_thread_hung);
133           break;
134         case HangWatcher::ThreadType::kThreadPoolThread:
135           // Not recorded for now.
136           break;
137       }
138       break;
139   }
140 }
141 
142 // Returns true if |thread_type| was configured through Finch to have a logging
143 // level that is equal to or exceeds |logging_level|.
ThreadTypeLoggingLevelGreaterOrEqual(HangWatcher::ThreadType thread_type,LoggingLevel logging_level)144 bool ThreadTypeLoggingLevelGreaterOrEqual(HangWatcher::ThreadType thread_type,
145                                           LoggingLevel logging_level) {
146   switch (thread_type) {
147     case HangWatcher::ThreadType::kIOThread:
148       return g_io_thread_log_level.load(std::memory_order_relaxed) >=
149              logging_level;
150     case HangWatcher::ThreadType::kMainThread:
151       return g_main_thread_log_level.load(std::memory_order_relaxed) >=
152              logging_level;
153     case HangWatcher::ThreadType::kThreadPoolThread:
154       return g_threadpool_log_level.load(std::memory_order_relaxed) >=
155              logging_level;
156   }
157 }
158 
159 }  // namespace
160 
161 // Determines if the HangWatcher is activated. When false the HangWatcher
162 // thread never started.
163 BASE_FEATURE(kEnableHangWatcher,
164              "EnableHangWatcher",
165              FEATURE_ENABLED_BY_DEFAULT);
166 
167 BASE_FEATURE(kEnableHangWatcherInZygoteChildren,
168              "EnableHangWatcherInZygoteChildren",
169              FEATURE_ENABLED_BY_DEFAULT);
170 
171 // Browser process.
172 constexpr base::FeatureParam<int> kIOThreadLogLevel{
173     &kEnableHangWatcher, "io_thread_log_level",
174     static_cast<int>(LoggingLevel::kUmaOnly)};
175 constexpr base::FeatureParam<int> kUIThreadLogLevel{
176     &kEnableHangWatcher, "ui_thread_log_level",
177     static_cast<int>(LoggingLevel::kUmaOnly)};
178 constexpr base::FeatureParam<int> kThreadPoolLogLevel{
179     &kEnableHangWatcher, "threadpool_log_level",
180     static_cast<int>(LoggingLevel::kUmaOnly)};
181 
182 // GPU process.
183 constexpr base::FeatureParam<int> kGPUProcessIOThreadLogLevel{
184     &kEnableHangWatcher, "gpu_process_io_thread_log_level",
185     static_cast<int>(LoggingLevel::kNone)};
186 constexpr base::FeatureParam<int> kGPUProcessMainThreadLogLevel{
187     &kEnableHangWatcher, "gpu_process_main_thread_log_level",
188     static_cast<int>(LoggingLevel::kNone)};
189 constexpr base::FeatureParam<int> kGPUProcessThreadPoolLogLevel{
190     &kEnableHangWatcher, "gpu_process_threadpool_log_level",
191     static_cast<int>(LoggingLevel::kNone)};
192 
193 // Renderer process.
194 constexpr base::FeatureParam<int> kRendererProcessIOThreadLogLevel{
195     &kEnableHangWatcher, "renderer_process_io_thread_log_level",
196     static_cast<int>(LoggingLevel::kUmaOnly)};
197 constexpr base::FeatureParam<int> kRendererProcessMainThreadLogLevel{
198     &kEnableHangWatcher, "renderer_process_main_thread_log_level",
199     static_cast<int>(LoggingLevel::kUmaOnly)};
200 constexpr base::FeatureParam<int> kRendererProcessThreadPoolLogLevel{
201     &kEnableHangWatcher, "renderer_process_threadpool_log_level",
202     static_cast<int>(LoggingLevel::kUmaOnly)};
203 
204 // Utility process.
205 constexpr base::FeatureParam<int> kUtilityProcessIOThreadLogLevel{
206     &kEnableHangWatcher, "utility_process_io_thread_log_level",
207     static_cast<int>(LoggingLevel::kUmaOnly)};
208 constexpr base::FeatureParam<int> kUtilityProcessMainThreadLogLevel{
209     &kEnableHangWatcher, "utility_process_main_thread_log_level",
210     static_cast<int>(LoggingLevel::kUmaOnly)};
211 constexpr base::FeatureParam<int> kUtilityProcessThreadPoolLogLevel{
212     &kEnableHangWatcher, "utility_process_threadpool_log_level",
213     static_cast<int>(LoggingLevel::kUmaOnly)};
214 
215 constexpr const char* kThreadName = "HangWatcher";
216 
217 // The time that the HangWatcher thread will sleep for between calls to
218 // Monitor(). Increasing or decreasing this does not modify the type of hangs
219 // that can be detected. It instead increases the probability that a call to
220 // Monitor() will happen at the right time to catch a hang. This has to be
221 // balanced with power/cpu use concerns as busy looping would catch amost all
222 // hangs but present unacceptable overhead. NOTE: If this period is ever changed
223 // then all metrics that depend on it like
224 // HangWatcher.IsThreadHung need to be updated.
225 constexpr auto kMonitoringPeriod = base::Seconds(10);
226 
WatchHangsInScope(TimeDelta timeout)227 WatchHangsInScope::WatchHangsInScope(TimeDelta timeout) {
228   internal::HangWatchState* current_hang_watch_state =
229       HangWatcher::IsEnabled()
230           ? internal::HangWatchState::GetHangWatchStateForCurrentThread()
231           : nullptr;
232 
233   DCHECK(timeout >= base::TimeDelta()) << "Negative timeouts are invalid.";
234 
235   // Thread is not monitored, noop.
236   if (!current_hang_watch_state) {
237     took_effect_ = false;
238     return;
239   }
240 
241 #if DCHECK_IS_ON()
242   previous_watch_hangs_in_scope_ =
243       current_hang_watch_state->GetCurrentWatchHangsInScope();
244   current_hang_watch_state->SetCurrentWatchHangsInScope(this);
245 #endif
246 
247   auto [old_flags, old_deadline] =
248       current_hang_watch_state->GetFlagsAndDeadline();
249 
250   // TODO(crbug.com/1034046): Check whether we are over deadline already for the
251   // previous WatchHangsInScope here by issuing only one TimeTicks::Now()
252   // and resuing the value.
253 
254   previous_deadline_ = old_deadline;
255   TimeTicks deadline = TimeTicks::Now() + timeout;
256   current_hang_watch_state->SetDeadline(deadline);
257   current_hang_watch_state->IncrementNestingLevel();
258 
259   const bool hangs_ignored_for_current_scope =
260       internal::HangWatchDeadline::IsFlagSet(
261           internal::HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope,
262           old_flags);
263 
264   // If the current WatchHangsInScope is ignored, temporarily reactivate hang
265   // watching for newly created WatchHangsInScopes. On exiting hang watching
266   // is suspended again to return to the original state.
267   if (hangs_ignored_for_current_scope) {
268     current_hang_watch_state->UnsetIgnoreCurrentWatchHangsInScope();
269     set_hangs_ignored_on_exit_ = true;
270   }
271 }
272 
~WatchHangsInScope()273 WatchHangsInScope::~WatchHangsInScope() {
274   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
275 
276   // If hang watching was not enabled at construction time there is nothing to
277   // validate or undo.
278   if (!took_effect_) {
279     return;
280   }
281 
282   // If the thread was unregistered since construction there is also nothing to
283   // do.
284   auto* const state =
285       internal::HangWatchState::GetHangWatchStateForCurrentThread();
286   if (!state) {
287     return;
288   }
289 
290   // If a hang is currently being captured we should block here so execution
291   // stops and we avoid recording unrelated stack frames in the crash.
292   if (state->IsFlagSet(internal::HangWatchDeadline::Flag::kShouldBlockOnHang)) {
293     base::HangWatcher::GetInstance()->BlockIfCaptureInProgress();
294   }
295 
296 #if DCHECK_IS_ON()
297   // Verify that no Scope was destructed out of order.
298   DCHECK_EQ(this, state->GetCurrentWatchHangsInScope());
299   state->SetCurrentWatchHangsInScope(previous_watch_hangs_in_scope_);
300 #endif
301 
302   if (state->nesting_level() == 1) {
303     // If a call to InvalidateActiveExpectations() suspended hang watching
304     // during the lifetime of this or any nested WatchHangsInScope it can now
305     // safely be reactivated by clearing the ignore bit since this is the
306     // outer-most scope.
307     state->UnsetIgnoreCurrentWatchHangsInScope();
308   } else if (set_hangs_ignored_on_exit_) {
309     // Return to ignoring hangs since this was the previous state before hang
310     // watching was temporarily enabled for this WatchHangsInScope only in the
311     // constructor.
312     state->SetIgnoreCurrentWatchHangsInScope();
313   }
314 
315   // Reset the deadline to the value it had before entering this
316   // WatchHangsInScope.
317   state->SetDeadline(previous_deadline_);
318   // TODO(crbug.com/1034046): Log when a WatchHangsInScope exits after its
319   // deadline and that went undetected by the HangWatcher.
320 
321   state->DecrementNestingLevel();
322 }
323 
324 // static
InitializeOnMainThread(ProcessType process_type,bool is_zygote_child,bool emit_crashes)325 void HangWatcher::InitializeOnMainThread(ProcessType process_type,
326                                          bool is_zygote_child,
327                                          bool emit_crashes) {
328   DCHECK(!g_use_hang_watcher);
329   DCHECK(g_io_thread_log_level == LoggingLevel::kNone);
330   DCHECK(g_main_thread_log_level == LoggingLevel::kNone);
331   DCHECK(g_threadpool_log_level == LoggingLevel::kNone);
332 
333   bool enable_hang_watcher = base::FeatureList::IsEnabled(kEnableHangWatcher);
334 
335 #if BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS)
336   if (is_zygote_child) {
337     enable_hang_watcher =
338         enable_hang_watcher &&
339         base::FeatureList::IsEnabled(kEnableHangWatcherInZygoteChildren);
340   }
341 #endif
342 
343   // Do not start HangWatcher in the GPU process until the issue related to
344   // invalid magic signature in the GPU WatchDog is fixed
345   // (https://crbug.com/1297760).
346   if (process_type == ProcessType::kGPUProcess)
347     enable_hang_watcher = false;
348 
349   g_use_hang_watcher.store(enable_hang_watcher, std::memory_order_relaxed);
350 
351   // Keep the process type.
352   g_hang_watcher_process_type.store(process_type, std::memory_order_relaxed);
353 
354   // If hang watching is disabled as a whole there is no need to read the
355   // params.
356   if (!enable_hang_watcher)
357     return;
358 
359   // Retrieve thread-specific config for hang watching.
360   if (process_type == HangWatcher::ProcessType::kBrowserProcess) {
361     // Crashes are set to always emit. Override any feature flags.
362     if (emit_crashes) {
363       g_io_thread_log_level.store(
364           static_cast<LoggingLevel>(LoggingLevel::kUmaAndCrash),
365           std::memory_order_relaxed);
366       g_main_thread_log_level.store(
367           static_cast<LoggingLevel>(LoggingLevel::kUmaAndCrash),
368           std::memory_order_relaxed);
369     } else {
370       g_io_thread_log_level.store(
371           static_cast<LoggingLevel>(kIOThreadLogLevel.Get()),
372           std::memory_order_relaxed);
373       g_main_thread_log_level.store(
374           static_cast<LoggingLevel>(kUIThreadLogLevel.Get()),
375           std::memory_order_relaxed);
376     }
377 
378     g_threadpool_log_level.store(
379         static_cast<LoggingLevel>(kThreadPoolLogLevel.Get()),
380         std::memory_order_relaxed);
381   } else if (process_type == HangWatcher::ProcessType::kGPUProcess) {
382     g_threadpool_log_level.store(
383         static_cast<LoggingLevel>(kGPUProcessThreadPoolLogLevel.Get()),
384         std::memory_order_relaxed);
385     g_io_thread_log_level.store(
386         static_cast<LoggingLevel>(kGPUProcessIOThreadLogLevel.Get()),
387         std::memory_order_relaxed);
388     g_main_thread_log_level.store(
389         static_cast<LoggingLevel>(kGPUProcessMainThreadLogLevel.Get()),
390         std::memory_order_relaxed);
391   } else if (process_type == HangWatcher::ProcessType::kRendererProcess) {
392     g_threadpool_log_level.store(
393         static_cast<LoggingLevel>(kRendererProcessThreadPoolLogLevel.Get()),
394         std::memory_order_relaxed);
395     g_io_thread_log_level.store(
396         static_cast<LoggingLevel>(kRendererProcessIOThreadLogLevel.Get()),
397         std::memory_order_relaxed);
398     g_main_thread_log_level.store(
399         static_cast<LoggingLevel>(kRendererProcessMainThreadLogLevel.Get()),
400         std::memory_order_relaxed);
401   } else if (process_type == HangWatcher::ProcessType::kUtilityProcess) {
402     g_threadpool_log_level.store(
403         static_cast<LoggingLevel>(kUtilityProcessThreadPoolLogLevel.Get()),
404         std::memory_order_relaxed);
405     g_io_thread_log_level.store(
406         static_cast<LoggingLevel>(kUtilityProcessIOThreadLogLevel.Get()),
407         std::memory_order_relaxed);
408     g_main_thread_log_level.store(
409         static_cast<LoggingLevel>(kUtilityProcessMainThreadLogLevel.Get()),
410         std::memory_order_relaxed);
411   }
412 }
413 
UnitializeOnMainThreadForTesting()414 void HangWatcher::UnitializeOnMainThreadForTesting() {
415   g_use_hang_watcher.store(false, std::memory_order_relaxed);
416   g_threadpool_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
417   g_io_thread_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
418   g_main_thread_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
419 }
420 
421 // static
IsEnabled()422 bool HangWatcher::IsEnabled() {
423   return g_use_hang_watcher.load(std::memory_order_relaxed);
424 }
425 
426 // static
IsThreadPoolHangWatchingEnabled()427 bool HangWatcher::IsThreadPoolHangWatchingEnabled() {
428   return g_threadpool_log_level.load(std::memory_order_relaxed) !=
429          LoggingLevel::kNone;
430 }
431 
432 // static
IsIOThreadHangWatchingEnabled()433 bool HangWatcher::IsIOThreadHangWatchingEnabled() {
434   return g_io_thread_log_level.load(std::memory_order_relaxed) !=
435          LoggingLevel::kNone;
436 }
437 
438 // static
IsCrashReportingEnabled()439 bool HangWatcher::IsCrashReportingEnabled() {
440   if (g_main_thread_log_level.load(std::memory_order_relaxed) ==
441       LoggingLevel::kUmaAndCrash) {
442     return true;
443   }
444   if (g_io_thread_log_level.load(std::memory_order_relaxed) ==
445       LoggingLevel::kUmaAndCrash) {
446     return true;
447   }
448   if (g_threadpool_log_level.load(std::memory_order_relaxed) ==
449       LoggingLevel::kUmaAndCrash) {
450     return true;
451   }
452   return false;
453 }
454 
455 // static
InvalidateActiveExpectations()456 void HangWatcher::InvalidateActiveExpectations() {
457   auto* const state =
458       internal::HangWatchState::GetHangWatchStateForCurrentThread();
459   if (!state) {
460     // If the current thread is not under watch there is nothing to invalidate.
461     return;
462   }
463   state->SetIgnoreCurrentWatchHangsInScope();
464 }
465 
HangWatcher()466 HangWatcher::HangWatcher()
467     : monitor_period_(kMonitoringPeriod),
468       should_monitor_(WaitableEvent::ResetPolicy::AUTOMATIC),
469       thread_(this, kThreadName),
470       tick_clock_(base::DefaultTickClock::GetInstance()),
471       memory_pressure_listener_(
472           FROM_HERE,
473           base::BindRepeating(&HangWatcher::OnMemoryPressure,
474                               base::Unretained(this))) {
475   // |thread_checker_| should not be bound to the constructing thread.
476   DETACH_FROM_THREAD(hang_watcher_thread_checker_);
477 
478   should_monitor_.declare_only_used_while_idle();
479 
480   DCHECK(!g_instance);
481   g_instance = this;
482 }
483 
484 // static
CreateHangWatcherInstance()485 void HangWatcher::CreateHangWatcherInstance() {
486   DCHECK(!g_instance);
487   g_instance = new base::HangWatcher();
488   // The hang watcher is leaked to make sure it survives all watched threads.
489   ANNOTATE_LEAKING_OBJECT_PTR(g_instance);
490 }
491 
492 #if !BUILDFLAG(IS_NACL)
493 debug::ScopedCrashKeyString
GetTimeSinceLastCriticalMemoryPressureCrashKey()494 HangWatcher::GetTimeSinceLastCriticalMemoryPressureCrashKey() {
495   DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
496 
497   // The crash key size is large enough to hold the biggest possible return
498   // value from base::TimeDelta::InSeconds().
499   constexpr debug::CrashKeySize kCrashKeyContentSize =
500       debug::CrashKeySize::Size32;
501   DCHECK_GE(static_cast<uint64_t>(kCrashKeyContentSize),
502             base::NumberToString(std::numeric_limits<int64_t>::max()).size());
503 
504   static debug::CrashKeyString* crash_key = AllocateCrashKeyString(
505       "seconds-since-last-memory-pressure", kCrashKeyContentSize);
506 
507   const base::TimeTicks last_critical_memory_pressure_time =
508       last_critical_memory_pressure_.load(std::memory_order_relaxed);
509   if (last_critical_memory_pressure_time.is_null()) {
510     constexpr char kNoMemoryPressureMsg[] = "No critical memory pressure";
511     static_assert(
512         std::size(kNoMemoryPressureMsg) <=
513             static_cast<uint64_t>(kCrashKeyContentSize),
514         "The crash key is too small to hold \"No critical memory pressure\".");
515     return debug::ScopedCrashKeyString(crash_key, kNoMemoryPressureMsg);
516   } else {
517     base::TimeDelta time_since_last_critical_memory_pressure =
518         base::TimeTicks::Now() - last_critical_memory_pressure_time;
519     return debug::ScopedCrashKeyString(
520         crash_key, base::NumberToString(
521                        time_since_last_critical_memory_pressure.InSeconds()));
522   }
523 }
524 #endif
525 
GetTimeSinceLastSystemPowerResumeCrashKeyValue() const526 std::string HangWatcher::GetTimeSinceLastSystemPowerResumeCrashKeyValue()
527     const {
528   DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
529 
530   const TimeTicks last_system_power_resume_time =
531       PowerMonitor::GetLastSystemResumeTime();
532   if (last_system_power_resume_time.is_null())
533     return "Never suspended";
534   if (last_system_power_resume_time == TimeTicks::Max())
535     return "Power suspended";
536 
537   const TimeDelta time_since_last_system_resume =
538       TimeTicks::Now() - last_system_power_resume_time;
539   return NumberToString(time_since_last_system_resume.InSeconds());
540 }
541 
OnMemoryPressure(base::MemoryPressureListener::MemoryPressureLevel memory_pressure_level)542 void HangWatcher::OnMemoryPressure(
543     base::MemoryPressureListener::MemoryPressureLevel memory_pressure_level) {
544   if (memory_pressure_level ==
545       base::MemoryPressureListener::MEMORY_PRESSURE_LEVEL_CRITICAL) {
546     last_critical_memory_pressure_.store(base::TimeTicks::Now(),
547                                          std::memory_order_relaxed);
548   }
549 }
550 
~HangWatcher()551 HangWatcher::~HangWatcher() {
552   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
553   DCHECK_EQ(g_instance, this);
554   DCHECK(watch_states_.empty());
555   g_instance = nullptr;
556   Stop();
557 }
558 
Start()559 void HangWatcher::Start() {
560   thread_.Start();
561   thread_started_ = true;
562 }
563 
Stop()564 void HangWatcher::Stop() {
565   g_keep_monitoring.store(false, std::memory_order_relaxed);
566   should_monitor_.Signal();
567   thread_.Join();
568   thread_started_ = false;
569 
570   // In production HangWatcher is always leaked but during testing it's possibly
571   // stopped and restarted using a new instance. This makes sure the next call
572   // to Start() will actually monitor in that case.
573   g_keep_monitoring.store(true, std::memory_order_relaxed);
574 }
575 
IsWatchListEmpty()576 bool HangWatcher::IsWatchListEmpty() {
577   AutoLock auto_lock(watch_state_lock_);
578   return watch_states_.empty();
579 }
580 
Wait()581 void HangWatcher::Wait() {
582   while (true) {
583     // Amount by which the actual time spent sleeping can deviate from
584     // the target time and still be considered timely.
585     constexpr base::TimeDelta kWaitDriftTolerance = base::Milliseconds(100);
586 
587     const base::TimeTicks time_before_wait = tick_clock_->NowTicks();
588 
589     // Sleep until next scheduled monitoring or until signaled.
590     const bool was_signaled = should_monitor_.TimedWait(monitor_period_);
591 
592     if (after_wait_callback_)
593       after_wait_callback_.Run(time_before_wait);
594 
595     const base::TimeTicks time_after_wait = tick_clock_->NowTicks();
596     const base::TimeDelta wait_time = time_after_wait - time_before_wait;
597     const bool wait_was_normal =
598         wait_time <= (monitor_period_ + kWaitDriftTolerance);
599 
600     UMA_HISTOGRAM_TIMES("HangWatcher.SleepDrift.BrowserProcess",
601                         wait_time - monitor_period_);
602 
603     if (!wait_was_normal) {
604       // If the time spent waiting was too high it might indicate the machine is
605       // very slow or that that it went to sleep. In any case we can't trust the
606       // WatchHangsInScopes that are currently live. Update the ignore
607       // threshold to make sure they don't trigger a hang on subsequent monitors
608       // then keep waiting.
609 
610       base::AutoLock auto_lock(watch_state_lock_);
611 
612       // Find the latest deadline among the live watch states. They might change
613       // atomically while iterating but that's fine because if they do that
614       // means the new WatchHangsInScope was constructed very soon after the
615       // abnormal sleep happened and might be affected by the root cause still.
616       // Ignoring it is cautious and harmless.
617       base::TimeTicks latest_deadline;
618       for (const auto& state : watch_states_) {
619         base::TimeTicks deadline = state->GetDeadline();
620         if (deadline > latest_deadline) {
621           latest_deadline = deadline;
622         }
623       }
624 
625       deadline_ignore_threshold_ = latest_deadline;
626     }
627 
628     // Stop waiting.
629     if (wait_was_normal || was_signaled)
630       return;
631   }
632 }
633 
Run()634 void HangWatcher::Run() {
635   // Monitor() should only run on |thread_|. Bind |thread_checker_| here to make
636   // sure of that.
637   DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
638 
639   while (g_keep_monitoring.load(std::memory_order_relaxed)) {
640     Wait();
641 
642     if (!IsWatchListEmpty() &&
643         g_keep_monitoring.load(std::memory_order_relaxed)) {
644       Monitor();
645       if (after_monitor_closure_for_testing_) {
646         after_monitor_closure_for_testing_.Run();
647       }
648     }
649   }
650 }
651 
652 // static
GetInstance()653 HangWatcher* HangWatcher::GetInstance() {
654   return g_instance;
655 }
656 
657 // static
RecordHang()658 void HangWatcher::RecordHang() {
659   base::debug::DumpWithoutCrashing();
660   NO_CODE_FOLDING();
661 }
662 
RegisterThreadInternal(ThreadType thread_type)663 ScopedClosureRunner HangWatcher::RegisterThreadInternal(
664     ThreadType thread_type) {
665   AutoLock auto_lock(watch_state_lock_);
666   CHECK(base::FeatureList::GetInstance());
667 
668   // Do not install a WatchState if the results would never be observable.
669   if (!ThreadTypeLoggingLevelGreaterOrEqual(thread_type,
670                                             LoggingLevel::kUmaOnly)) {
671     return ScopedClosureRunner(base::DoNothing());
672   }
673 
674   watch_states_.push_back(
675       internal::HangWatchState::CreateHangWatchStateForCurrentThread(
676           thread_type));
677   return ScopedClosureRunner(BindOnce(&HangWatcher::UnregisterThread,
678                                       Unretained(HangWatcher::GetInstance())));
679 }
680 
681 // static
RegisterThread(ThreadType thread_type)682 ScopedClosureRunner HangWatcher::RegisterThread(ThreadType thread_type) {
683   if (!GetInstance()) {
684     return ScopedClosureRunner();
685   }
686 
687   return GetInstance()->RegisterThreadInternal(thread_type);
688 }
689 
GetHighestDeadline() const690 base::TimeTicks HangWatcher::WatchStateSnapShot::GetHighestDeadline() const {
691   DCHECK(IsActionable());
692 
693   // Since entries are sorted in increasing order the last entry is the largest
694   // one.
695   return hung_watch_state_copies_.back().deadline;
696 }
697 
698 HangWatcher::WatchStateSnapShot::WatchStateSnapShot() = default;
699 
Init(const HangWatchStates & watch_states,base::TimeTicks deadline_ignore_threshold)700 void HangWatcher::WatchStateSnapShot::Init(
701     const HangWatchStates& watch_states,
702     base::TimeTicks deadline_ignore_threshold) {
703   DCHECK(!initialized_);
704 
705   // No matter if the snapshot is actionable or not after this function
706   // it will have been initialized.
707   initialized_ = true;
708 
709   const base::TimeTicks now = base::TimeTicks::Now();
710   bool all_threads_marked = true;
711   bool found_deadline_before_ignore_threshold = false;
712 
713   // Use an std::array to store the hang counts to avoid allocations. The
714   // numerical values of the HangWatcher::ThreadType enum is used to index into
715   // the array. A |kInvalidHangCount| is used to signify there were no threads
716   // of the type found.
717   constexpr size_t kHangCountArraySize =
718       static_cast<std::size_t>(base::HangWatcher::ThreadType::kMax) + 1;
719   std::array<int, kHangCountArraySize> hung_counts_per_thread_type;
720 
721   constexpr int kInvalidHangCount = -1;
722   hung_counts_per_thread_type.fill(kInvalidHangCount);
723 
724   // Will be true if any of the hung threads has a logging level high enough,
725   // as defined through finch params, to warant dumping a crash.
726   bool any_hung_thread_has_dumping_enabled = false;
727 
728   // Copy hung thread information.
729   for (const auto& watch_state : watch_states) {
730     uint64_t flags;
731     TimeTicks deadline;
732     std::tie(flags, deadline) = watch_state->GetFlagsAndDeadline();
733 
734     if (deadline <= deadline_ignore_threshold) {
735       found_deadline_before_ignore_threshold = true;
736     }
737 
738     if (internal::HangWatchDeadline::IsFlagSet(
739             internal::HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope,
740             flags)) {
741       continue;
742     }
743 
744     // If a thread type is monitored and did not hang it still needs to be
745     // logged as a zero count;
746     const size_t hang_count_index =
747         static_cast<size_t>(watch_state.get()->thread_type());
748     if (hung_counts_per_thread_type[hang_count_index] == kInvalidHangCount) {
749       hung_counts_per_thread_type[hang_count_index] = 0;
750     }
751 
752     // Only copy hung threads.
753     if (deadline <= now) {
754       ++hung_counts_per_thread_type[hang_count_index];
755 
756       if (ThreadTypeLoggingLevelGreaterOrEqual(watch_state.get()->thread_type(),
757                                                LoggingLevel::kUmaAndCrash)) {
758         any_hung_thread_has_dumping_enabled = true;
759       }
760 
761 #if BUILDFLAG(ENABLE_BASE_TRACING)
762       // Emit trace events for monitored threads.
763       if (ThreadTypeLoggingLevelGreaterOrEqual(watch_state.get()->thread_type(),
764                                                LoggingLevel::kUmaOnly)) {
765         const PlatformThreadId thread_id = watch_state.get()->GetThreadID();
766         const auto track = perfetto::Track::FromPointer(
767             this, perfetto::ThreadTrack::ForThread(thread_id));
768         TRACE_EVENT_BEGIN("base", "HangWatcher::ThreadHung", track, deadline);
769         TRACE_EVENT_END("base", track, now);
770         // TODO(crbug.com/1021571): Remove this once fixed.
771         PERFETTO_INTERNAL_ADD_EMPTY_EVENT();
772       }
773 #endif
774 
775       // Attempt to mark the thread as needing to stay within its current
776       // WatchHangsInScope until capture is complete.
777       bool thread_marked = watch_state->SetShouldBlockOnHang(flags, deadline);
778 
779       // If marking some threads already failed the snapshot won't be kept so
780       // there is no need to keep adding to it. The loop doesn't abort though
781       // to keep marking the other threads. If these threads remain hung until
782       // the next capture then they'll already be marked and will be included
783       // in the capture at that time.
784       if (thread_marked && all_threads_marked) {
785         hung_watch_state_copies_.push_back(
786             WatchStateCopy{deadline, watch_state.get()->GetThreadID()});
787       } else {
788         all_threads_marked = false;
789       }
790     }
791   }
792 
793   // Log the hung thread counts to histograms for each thread type if any thread
794   // of the type were found.
795   for (size_t i = 0; i < kHangCountArraySize; ++i) {
796     const int hang_count = hung_counts_per_thread_type[i];
797     const HangWatcher::ThreadType thread_type =
798         static_cast<HangWatcher::ThreadType>(i);
799     if (hang_count != kInvalidHangCount &&
800         ThreadTypeLoggingLevelGreaterOrEqual(thread_type,
801                                              LoggingLevel::kUmaOnly)) {
802       LogHungThreadCountHistogram(thread_type, hang_count);
803     }
804   }
805 
806   // Three cases can invalidate this snapshot and prevent the capture of the
807   // hang.
808   //
809   // 1. Some threads could not be marked for blocking so this snapshot isn't
810   // actionable since marked threads could be hung because of unmarked ones.
811   // If only the marked threads were captured the information would be
812   // incomplete.
813   //
814   // 2. Any of the threads have a deadline before |deadline_ignore_threshold|.
815   // If any thread is ignored it reduces the confidence in the whole state and
816   // it's better to avoid capturing misleading data.
817   //
818   // 3. The hung threads found were all of types that are not configured through
819   // Finch to trigger a crash dump.
820   //
821   if (!all_threads_marked || found_deadline_before_ignore_threshold ||
822       !any_hung_thread_has_dumping_enabled) {
823     hung_watch_state_copies_.clear();
824     return;
825   }
826 
827   // Sort |hung_watch_state_copies_| by order of decreasing hang severity so the
828   // most severe hang is first in the list.
829   ranges::sort(hung_watch_state_copies_,
830                [](const WatchStateCopy& lhs, const WatchStateCopy& rhs) {
831                  return lhs.deadline < rhs.deadline;
832                });
833 }
834 
Clear()835 void HangWatcher::WatchStateSnapShot::Clear() {
836   hung_watch_state_copies_.clear();
837   initialized_ = false;
838 }
839 
840 HangWatcher::WatchStateSnapShot::WatchStateSnapShot(
841     const WatchStateSnapShot& other) = default;
842 
843 HangWatcher::WatchStateSnapShot::~WatchStateSnapShot() = default;
844 
PrepareHungThreadListCrashKey() const845 std::string HangWatcher::WatchStateSnapShot::PrepareHungThreadListCrashKey()
846     const {
847   DCHECK(IsActionable());
848 
849   // Build a crash key string that contains the ids of the hung threads.
850   constexpr char kSeparator{'|'};
851   std::string list_of_hung_thread_ids;
852 
853   // Add as many thread ids to the crash key as possible.
854   for (const WatchStateCopy& copy : hung_watch_state_copies_) {
855     std::string fragment = base::NumberToString(copy.thread_id) + kSeparator;
856     if (list_of_hung_thread_ids.size() + fragment.size() <
857         static_cast<std::size_t>(debug::CrashKeySize::Size256)) {
858       list_of_hung_thread_ids += fragment;
859     } else {
860       // Respect the by priority ordering of thread ids in the crash key by
861       // stopping the construction as soon as one does not fit. This avoids
862       // including lesser priority ids while omitting more important ones.
863       break;
864     }
865   }
866 
867   return list_of_hung_thread_ids;
868 }
869 
IsActionable() const870 bool HangWatcher::WatchStateSnapShot::IsActionable() const {
871   DCHECK(initialized_);
872   return !hung_watch_state_copies_.empty();
873 }
874 
GrabWatchStateSnapshotForTesting() const875 HangWatcher::WatchStateSnapShot HangWatcher::GrabWatchStateSnapshotForTesting()
876     const {
877   WatchStateSnapShot snapshot;
878   snapshot.Init(watch_states_, deadline_ignore_threshold_);
879   return snapshot;
880 }
881 
Monitor()882 void HangWatcher::Monitor() {
883   DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
884   AutoLock auto_lock(watch_state_lock_);
885 
886   // If all threads unregistered since this function was invoked there's
887   // nothing to do anymore.
888   if (watch_states_.empty())
889     return;
890 
891   watch_state_snapshot_.Init(watch_states_, deadline_ignore_threshold_);
892 
893   if (watch_state_snapshot_.IsActionable()) {
894     DoDumpWithoutCrashing(watch_state_snapshot_);
895   }
896 
897   watch_state_snapshot_.Clear();
898 }
899 
DoDumpWithoutCrashing(const WatchStateSnapShot & watch_state_snapshot)900 void HangWatcher::DoDumpWithoutCrashing(
901     const WatchStateSnapShot& watch_state_snapshot) {
902   TRACE_EVENT("base", "HangWatcher::DoDumpWithoutCrashing");
903 
904   capture_in_progress_.store(true, std::memory_order_relaxed);
905   base::AutoLock scope_lock(capture_lock_);
906 
907 #if !BUILDFLAG(IS_NACL)
908   const std::string list_of_hung_thread_ids =
909       watch_state_snapshot.PrepareHungThreadListCrashKey();
910 
911   static debug::CrashKeyString* crash_key = AllocateCrashKeyString(
912       "list-of-hung-threads", debug::CrashKeySize::Size256);
913 
914   const debug::ScopedCrashKeyString list_of_hung_threads_crash_key_string(
915       crash_key, list_of_hung_thread_ids);
916 
917   const debug::ScopedCrashKeyString
918       time_since_last_critical_memory_pressure_crash_key_string =
919           GetTimeSinceLastCriticalMemoryPressureCrashKey();
920 
921   SCOPED_CRASH_KEY_STRING32("HangWatcher", "seconds-since-last-resume",
922                             GetTimeSinceLastSystemPowerResumeCrashKeyValue());
923 #endif
924 
925   // To avoid capturing more than one hang that blames a subset of the same
926   // threads it's necessary to keep track of what is the furthest deadline
927   // that contributed to declaring a hang. Only once
928   // all threads have deadlines past this point can we be sure that a newly
929   // discovered hang is not directly related.
930   // Example:
931   // **********************************************************************
932   // Timeline A : L------1-------2----------3-------4----------N-----------
933   // Timeline B : -------2----------3-------4----------L----5------N-------
934   // Timeline C : L----------------------------5------6----7---8------9---N
935   // **********************************************************************
936   // In the example when a Monitor() happens during timeline A
937   // |deadline_ignore_threshold_| (L) is at time zero and deadlines (1-4)
938   // are before Now() (N) . A hang is captured and L is updated. During
939   // the next Monitor() (timeline B) a new deadline is over but we can't
940   // capture a hang because deadlines 2-4 are still live and already counted
941   // toward a hang. During a third monitor (timeline C) all live deadlines
942   // are now after L and a second hang can be recorded.
943   base::TimeTicks latest_expired_deadline =
944       watch_state_snapshot.GetHighestDeadline();
945 
946   if (on_hang_closure_for_testing_)
947     on_hang_closure_for_testing_.Run();
948   else
949     RecordHang();
950 
951   // Update after running the actual capture.
952   deadline_ignore_threshold_ = latest_expired_deadline;
953 
954   capture_in_progress_.store(false, std::memory_order_relaxed);
955 }
956 
SetAfterMonitorClosureForTesting(base::RepeatingClosure closure)957 void HangWatcher::SetAfterMonitorClosureForTesting(
958     base::RepeatingClosure closure) {
959   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
960   after_monitor_closure_for_testing_ = std::move(closure);
961 }
962 
SetOnHangClosureForTesting(base::RepeatingClosure closure)963 void HangWatcher::SetOnHangClosureForTesting(base::RepeatingClosure closure) {
964   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
965   on_hang_closure_for_testing_ = std::move(closure);
966 }
967 
SetMonitoringPeriodForTesting(base::TimeDelta period)968 void HangWatcher::SetMonitoringPeriodForTesting(base::TimeDelta period) {
969   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
970   monitor_period_ = period;
971 }
972 
SetAfterWaitCallbackForTesting(RepeatingCallback<void (TimeTicks)> callback)973 void HangWatcher::SetAfterWaitCallbackForTesting(
974     RepeatingCallback<void(TimeTicks)> callback) {
975   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
976   after_wait_callback_ = callback;
977 }
978 
SignalMonitorEventForTesting()979 void HangWatcher::SignalMonitorEventForTesting() {
980   DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
981   should_monitor_.Signal();
982 }
983 
984 // static
StopMonitoringForTesting()985 void HangWatcher::StopMonitoringForTesting() {
986   g_keep_monitoring.store(false, std::memory_order_relaxed);
987 }
988 
SetTickClockForTesting(const base::TickClock * tick_clock)989 void HangWatcher::SetTickClockForTesting(const base::TickClock* tick_clock) {
990   tick_clock_ = tick_clock;
991 }
992 
BlockIfCaptureInProgress()993 void HangWatcher::BlockIfCaptureInProgress() {
994   // Makes a best-effort attempt to block execution if a hang is currently being
995   // captured. Only block on |capture_lock| if |capture_in_progress_| hints that
996   // it's already held to avoid serializing all threads on this function when no
997   // hang capture is in-progress.
998   if (capture_in_progress_.load(std::memory_order_relaxed))
999     base::AutoLock hang_lock(capture_lock_);
1000 }
1001 
UnregisterThread()1002 void HangWatcher::UnregisterThread() {
1003   AutoLock auto_lock(watch_state_lock_);
1004 
1005   auto it = ranges::find(
1006       watch_states_,
1007       internal::HangWatchState::GetHangWatchStateForCurrentThread(),
1008       &std::unique_ptr<internal::HangWatchState>::get);
1009 
1010   // Thread should be registered to get unregistered.
1011   CHECK(it != watch_states_.end(), base::NotFatalUntil::M125);
1012 
1013   watch_states_.erase(it);
1014 }
1015 
1016 namespace internal {
1017 namespace {
1018 
1019 constexpr uint64_t kOnlyDeadlineMask = 0x00FF'FFFF'FFFF'FFFFu;
1020 constexpr uint64_t kOnlyFlagsMask = ~kOnlyDeadlineMask;
1021 constexpr uint64_t kMaximumFlag = 0x8000'0000'0000'0000u;
1022 
1023 // Use as a mask to keep persistent flags and the deadline.
1024 constexpr uint64_t kPersistentFlagsAndDeadlineMask =
1025     kOnlyDeadlineMask |
1026     static_cast<uint64_t>(
1027         HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope);
1028 }  // namespace
1029 
1030 // Flag binary representation assertions.
1031 static_assert(
1032     static_cast<uint64_t>(HangWatchDeadline::Flag::kMinValue) >
1033         kOnlyDeadlineMask,
1034     "Invalid numerical value for flag. Would interfere with bits of data.");
1035 static_assert(static_cast<uint64_t>(HangWatchDeadline::Flag::kMaxValue) <=
1036                   kMaximumFlag,
1037               "A flag can only set a single bit.");
1038 
1039 HangWatchDeadline::HangWatchDeadline() = default;
1040 HangWatchDeadline::~HangWatchDeadline() = default;
1041 
GetFlagsAndDeadline() const1042 std::pair<uint64_t, TimeTicks> HangWatchDeadline::GetFlagsAndDeadline() const {
1043   uint64_t bits = bits_.load(std::memory_order_relaxed);
1044   return std::make_pair(ExtractFlags(bits),
1045                         DeadlineFromBits(ExtractDeadline((bits))));
1046 }
1047 
GetDeadline() const1048 TimeTicks HangWatchDeadline::GetDeadline() const {
1049   return DeadlineFromBits(
1050       ExtractDeadline(bits_.load(std::memory_order_relaxed)));
1051 }
1052 
1053 // static
Max()1054 TimeTicks HangWatchDeadline::Max() {
1055   // |kOnlyDeadlineMask| has all the bits reserved for the TimeTicks value
1056   // set. This means it also represents the highest representable value.
1057   return DeadlineFromBits(kOnlyDeadlineMask);
1058 }
1059 
1060 // static
IsFlagSet(Flag flag,uint64_t flags)1061 bool HangWatchDeadline::IsFlagSet(Flag flag, uint64_t flags) {
1062   return static_cast<uint64_t>(flag) & flags;
1063 }
1064 
SetDeadline(TimeTicks new_deadline)1065 void HangWatchDeadline::SetDeadline(TimeTicks new_deadline) {
1066   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1067   DCHECK(new_deadline <= Max()) << "Value too high to be represented.";
1068   DCHECK(new_deadline >= TimeTicks{}) << "Value cannot be negative.";
1069 
1070   if (switch_bits_callback_for_testing_) {
1071     const uint64_t switched_in_bits = SwitchBitsForTesting();
1072     // If a concurrent deadline change is tested it cannot have a deadline or
1073     // persistent flag change since those always happen on the same thread.
1074     DCHECK((switched_in_bits & kPersistentFlagsAndDeadlineMask) == 0u);
1075   }
1076 
1077   // Discard all non-persistent flags and apply deadline change.
1078   const uint64_t old_bits = bits_.load(std::memory_order_relaxed);
1079   const uint64_t new_flags =
1080       ExtractFlags(old_bits & kPersistentFlagsAndDeadlineMask);
1081   bits_.store(new_flags | ExtractDeadline(static_cast<uint64_t>(
1082                               new_deadline.ToInternalValue())),
1083               std::memory_order_relaxed);
1084 }
1085 
1086 // TODO(crbug.com/1087026): Add flag DCHECKs here.
SetShouldBlockOnHang(uint64_t old_flags,TimeTicks old_deadline)1087 bool HangWatchDeadline::SetShouldBlockOnHang(uint64_t old_flags,
1088                                              TimeTicks old_deadline) {
1089   DCHECK(old_deadline <= Max()) << "Value too high to be represented.";
1090   DCHECK(old_deadline >= TimeTicks{}) << "Value cannot be negative.";
1091 
1092   // Set the kShouldBlockOnHang flag only if |bits_| did not change since it was
1093   // read. kShouldBlockOnHang is the only non-persistent flag and should never
1094   // be set twice. Persistent flags and deadline changes are done from the same
1095   // thread so there is no risk of losing concurrently added information.
1096   uint64_t old_bits =
1097       old_flags | static_cast<uint64_t>(old_deadline.ToInternalValue());
1098   const uint64_t desired_bits =
1099       old_bits | static_cast<uint64_t>(Flag::kShouldBlockOnHang);
1100 
1101   // If a test needs to simulate |bits_| changing since calling this function
1102   // this happens now.
1103   if (switch_bits_callback_for_testing_) {
1104     const uint64_t switched_in_bits = SwitchBitsForTesting();
1105 
1106     // Injecting the flag being tested is invalid.
1107     DCHECK(!IsFlagSet(Flag::kShouldBlockOnHang, switched_in_bits));
1108   }
1109 
1110   return bits_.compare_exchange_weak(old_bits, desired_bits,
1111                                      std::memory_order_relaxed,
1112                                      std::memory_order_relaxed);
1113 }
1114 
SetIgnoreCurrentWatchHangsInScope()1115 void HangWatchDeadline::SetIgnoreCurrentWatchHangsInScope() {
1116   SetPersistentFlag(Flag::kIgnoreCurrentWatchHangsInScope);
1117 }
1118 
UnsetIgnoreCurrentWatchHangsInScope()1119 void HangWatchDeadline::UnsetIgnoreCurrentWatchHangsInScope() {
1120   ClearPersistentFlag(Flag::kIgnoreCurrentWatchHangsInScope);
1121 }
1122 
SetPersistentFlag(Flag flag)1123 void HangWatchDeadline::SetPersistentFlag(Flag flag) {
1124   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1125   if (switch_bits_callback_for_testing_)
1126     SwitchBitsForTesting();
1127   bits_.fetch_or(static_cast<uint64_t>(flag), std::memory_order_relaxed);
1128 }
1129 
ClearPersistentFlag(Flag flag)1130 void HangWatchDeadline::ClearPersistentFlag(Flag flag) {
1131   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1132   if (switch_bits_callback_for_testing_)
1133     SwitchBitsForTesting();
1134   bits_.fetch_and(~(static_cast<uint64_t>(flag)), std::memory_order_relaxed);
1135 }
1136 
1137 // static
ExtractFlags(uint64_t bits)1138 uint64_t HangWatchDeadline::ExtractFlags(uint64_t bits) {
1139   return bits & kOnlyFlagsMask;
1140 }
1141 
1142 // static
ExtractDeadline(uint64_t bits)1143 uint64_t HangWatchDeadline::ExtractDeadline(uint64_t bits) {
1144   return bits & kOnlyDeadlineMask;
1145 }
1146 
1147 // static
DeadlineFromBits(uint64_t bits)1148 TimeTicks HangWatchDeadline::DeadlineFromBits(uint64_t bits) {
1149   // |kOnlyDeadlineMask| has all the deadline bits set to 1 so is the largest
1150   // representable value.
1151   DCHECK(bits <= kOnlyDeadlineMask)
1152       << "Flags bits are set. Remove them before returning deadline.";
1153   static_assert(kOnlyDeadlineMask <= std::numeric_limits<int64_t>::max());
1154   return TimeTicks::FromInternalValue(static_cast<int64_t>(bits));
1155 }
1156 
IsFlagSet(Flag flag) const1157 bool HangWatchDeadline::IsFlagSet(Flag flag) const {
1158   return bits_.load(std::memory_order_relaxed) & static_cast<uint64_t>(flag);
1159 }
1160 
SetSwitchBitsClosureForTesting(RepeatingCallback<uint64_t (void)> closure)1161 void HangWatchDeadline::SetSwitchBitsClosureForTesting(
1162     RepeatingCallback<uint64_t(void)> closure) {
1163   switch_bits_callback_for_testing_ = closure;
1164 }
1165 
ResetSwitchBitsClosureForTesting()1166 void HangWatchDeadline::ResetSwitchBitsClosureForTesting() {
1167   DCHECK(switch_bits_callback_for_testing_);
1168   switch_bits_callback_for_testing_.Reset();
1169 }
1170 
SwitchBitsForTesting()1171 uint64_t HangWatchDeadline::SwitchBitsForTesting() {
1172   DCHECK(switch_bits_callback_for_testing_);
1173 
1174   const uint64_t old_bits = bits_.load(std::memory_order_relaxed);
1175   const uint64_t new_bits = switch_bits_callback_for_testing_.Run();
1176   const uint64_t old_flags = ExtractFlags(old_bits);
1177 
1178   const uint64_t switched_in_bits = old_flags | new_bits;
1179   bits_.store(switched_in_bits, std::memory_order_relaxed);
1180   return switched_in_bits;
1181 }
1182 
HangWatchState(HangWatcher::ThreadType thread_type)1183 HangWatchState::HangWatchState(HangWatcher::ThreadType thread_type)
1184     : resetter_(&hang_watch_state, this, nullptr), thread_type_(thread_type) {
1185 // TODO(crbug.com/1223033): Remove this once macOS uses system-wide ids.
1186 // On macOS the thread ids used by CrashPad are not the same as the ones
1187 // provided by PlatformThread. Make sure to use the same for correct
1188 // attribution.
1189 #if BUILDFLAG(IS_MAC)
1190   uint64_t thread_id;
1191   pthread_threadid_np(pthread_self(), &thread_id);
1192   thread_id_ = checked_cast<PlatformThreadId>(thread_id);
1193 #else
1194   thread_id_ = PlatformThread::CurrentId();
1195 #endif
1196 }
1197 
~HangWatchState()1198 HangWatchState::~HangWatchState() {
1199   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1200 
1201   DCHECK_EQ(GetHangWatchStateForCurrentThread(), this);
1202 
1203 #if DCHECK_IS_ON()
1204   // Destroying the HangWatchState should not be done if there are live
1205   // WatchHangsInScopes.
1206   DCHECK(!current_watch_hangs_in_scope_);
1207 #endif
1208 }
1209 
1210 // static
1211 std::unique_ptr<HangWatchState>
CreateHangWatchStateForCurrentThread(HangWatcher::ThreadType thread_type)1212 HangWatchState::CreateHangWatchStateForCurrentThread(
1213     HangWatcher::ThreadType thread_type) {
1214   // Allocate a watch state object for this thread.
1215   std::unique_ptr<HangWatchState> hang_state =
1216       std::make_unique<HangWatchState>(thread_type);
1217 
1218   // Setting the thread local worked.
1219   DCHECK_EQ(GetHangWatchStateForCurrentThread(), hang_state.get());
1220 
1221   // Transfer ownership to caller.
1222   return hang_state;
1223 }
1224 
GetDeadline() const1225 TimeTicks HangWatchState::GetDeadline() const {
1226   return deadline_.GetDeadline();
1227 }
1228 
GetFlagsAndDeadline() const1229 std::pair<uint64_t, TimeTicks> HangWatchState::GetFlagsAndDeadline() const {
1230   return deadline_.GetFlagsAndDeadline();
1231 }
1232 
SetDeadline(TimeTicks deadline)1233 void HangWatchState::SetDeadline(TimeTicks deadline) {
1234   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1235   deadline_.SetDeadline(deadline);
1236 }
1237 
IsOverDeadline() const1238 bool HangWatchState::IsOverDeadline() const {
1239   return TimeTicks::Now() > deadline_.GetDeadline();
1240 }
1241 
SetIgnoreCurrentWatchHangsInScope()1242 void HangWatchState::SetIgnoreCurrentWatchHangsInScope() {
1243   deadline_.SetIgnoreCurrentWatchHangsInScope();
1244 }
1245 
UnsetIgnoreCurrentWatchHangsInScope()1246 void HangWatchState::UnsetIgnoreCurrentWatchHangsInScope() {
1247   deadline_.UnsetIgnoreCurrentWatchHangsInScope();
1248 }
1249 
SetShouldBlockOnHang(uint64_t old_flags,TimeTicks old_deadline)1250 bool HangWatchState::SetShouldBlockOnHang(uint64_t old_flags,
1251                                           TimeTicks old_deadline) {
1252   return deadline_.SetShouldBlockOnHang(old_flags, old_deadline);
1253 }
1254 
IsFlagSet(HangWatchDeadline::Flag flag)1255 bool HangWatchState::IsFlagSet(HangWatchDeadline::Flag flag) {
1256   return deadline_.IsFlagSet(flag);
1257 }
1258 
1259 #if DCHECK_IS_ON()
SetCurrentWatchHangsInScope(WatchHangsInScope * current_hang_watch_scope_enable)1260 void HangWatchState::SetCurrentWatchHangsInScope(
1261     WatchHangsInScope* current_hang_watch_scope_enable) {
1262   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1263   current_watch_hangs_in_scope_ = current_hang_watch_scope_enable;
1264 }
1265 
GetCurrentWatchHangsInScope()1266 WatchHangsInScope* HangWatchState::GetCurrentWatchHangsInScope() {
1267   DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1268   return current_watch_hangs_in_scope_;
1269 }
1270 #endif
1271 
GetHangWatchDeadlineForTesting()1272 HangWatchDeadline* HangWatchState::GetHangWatchDeadlineForTesting() {
1273   return &deadline_;
1274 }
1275 
IncrementNestingLevel()1276 void HangWatchState::IncrementNestingLevel() {
1277   ++nesting_level_;
1278 }
1279 
DecrementNestingLevel()1280 void HangWatchState::DecrementNestingLevel() {
1281   --nesting_level_;
1282 }
1283 
1284 // static
GetHangWatchStateForCurrentThread()1285 HangWatchState* HangWatchState::GetHangWatchStateForCurrentThread() {
1286   // Workaround false-positive MSAN use-of-uninitialized-value on
1287   // thread_local storage for loaded libraries:
1288   // https://github.com/google/sanitizers/issues/1265
1289   MSAN_UNPOISON(&hang_watch_state, sizeof(internal::HangWatchState*));
1290 
1291   return hang_watch_state;
1292 }
1293 
GetThreadID() const1294 PlatformThreadId HangWatchState::GetThreadID() const {
1295   return thread_id_;
1296 }
1297 
1298 }  // namespace internal
1299 
1300 }  // namespace base
1301