1 // Copyright 2020 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/threading/hang_watcher.h"
6
7 #include <atomic>
8 #include <utility>
9
10 #include "base/containers/flat_map.h"
11 #include "base/debug/alias.h"
12 #include "base/debug/crash_logging.h"
13 #include "base/debug/dump_without_crashing.h"
14 #include "base/debug/leak_annotations.h"
15 #include "base/feature_list.h"
16 #include "base/functional/bind.h"
17 #include "base/functional/callback_helpers.h"
18 #include "base/metrics/field_trial_params.h"
19 #include "base/metrics/histogram_macros.h"
20 #include "base/power_monitor/power_monitor.h"
21 #include "base/ranges/algorithm.h"
22 #include "base/strings/string_number_conversions.h"
23 #include "base/synchronization/lock.h"
24 #include "base/synchronization/waitable_event.h"
25 #include "base/threading/platform_thread.h"
26 #include "base/threading/thread_checker.h"
27 #include "base/threading/thread_restrictions.h"
28 #include "base/threading/threading_features.h"
29 #include "base/time/default_tick_clock.h"
30 #include "base/time/time.h"
31 #include "base/trace_event/base_tracing.h"
32 #include "build/build_config.h"
33 #include "third_party/abseil-cpp/absl/base/attributes.h"
34
35 namespace base {
36
37 namespace {
38
39 // Defines how much logging happens when the HangWatcher monitors the threads.
40 // Logging levels are set per thread type through Finch. It's important that
41 // the order of the enum members stay the same and that their numerical
42 // values be in increasing order. The implementation of
43 // ThreadTypeLoggingLevelGreaterOrEqual() depends on it.
44 enum class LoggingLevel { kNone = 0, kUmaOnly = 1, kUmaAndCrash = 2 };
45
46 HangWatcher* g_instance = nullptr;
47 ABSL_CONST_INIT thread_local internal::HangWatchState* hang_watch_state =
48 nullptr;
49 std::atomic<bool> g_use_hang_watcher{false};
50 std::atomic<HangWatcher::ProcessType> g_hang_watcher_process_type{
51 HangWatcher::ProcessType::kBrowserProcess};
52
53 std::atomic<LoggingLevel> g_threadpool_log_level{LoggingLevel::kNone};
54 std::atomic<LoggingLevel> g_io_thread_log_level{LoggingLevel::kNone};
55 std::atomic<LoggingLevel> g_main_thread_log_level{LoggingLevel::kNone};
56
57 // Indicates whether HangWatcher::Run() should return after the next monitoring.
58 std::atomic<bool> g_keep_monitoring{true};
59
60 // Emits the hung thread count histogram. |count| is the number of threads
61 // of type |thread_type| that were hung or became hung during the last
62 // monitoring window. This function should be invoked for each thread type
63 // encountered on each call to Monitor().
LogHungThreadCountHistogram(HangWatcher::ThreadType thread_type,int count)64 void LogHungThreadCountHistogram(HangWatcher::ThreadType thread_type,
65 int count) {
66 // In the case of unique threads like the IO or UI/Main thread a count does
67 // not make sense.
68 const bool any_thread_hung = count >= 1;
69
70 const HangWatcher::ProcessType process_type =
71 g_hang_watcher_process_type.load(std::memory_order_relaxed);
72 switch (process_type) {
73 case HangWatcher::ProcessType::kUnknownProcess:
74 break;
75
76 case HangWatcher::ProcessType::kBrowserProcess:
77 switch (thread_type) {
78 case HangWatcher::ThreadType::kIOThread:
79 UMA_HISTOGRAM_BOOLEAN(
80 "HangWatcher.IsThreadHung.BrowserProcess."
81 "IOThread",
82 any_thread_hung);
83 break;
84 case HangWatcher::ThreadType::kMainThread:
85 UMA_HISTOGRAM_BOOLEAN(
86 "HangWatcher.IsThreadHung.BrowserProcess."
87 "UIThread",
88 any_thread_hung);
89 break;
90 case HangWatcher::ThreadType::kThreadPoolThread:
91 // Not recorded for now.
92 break;
93 }
94 break;
95
96 case HangWatcher::ProcessType::kGPUProcess:
97 // Not recorded for now.
98 break;
99
100 case HangWatcher::ProcessType::kRendererProcess:
101 switch (thread_type) {
102 case HangWatcher::ThreadType::kIOThread:
103 UMA_HISTOGRAM_BOOLEAN(
104 "HangWatcher.IsThreadHung.RendererProcess."
105 "IOThread",
106 any_thread_hung);
107 break;
108 case HangWatcher::ThreadType::kMainThread:
109 UMA_HISTOGRAM_BOOLEAN(
110 "HangWatcher.IsThreadHung.RendererProcess."
111 "MainThread",
112 any_thread_hung);
113 break;
114 case HangWatcher::ThreadType::kThreadPoolThread:
115 // Not recorded for now.
116 break;
117 }
118 break;
119
120 case HangWatcher::ProcessType::kUtilityProcess:
121 switch (thread_type) {
122 case HangWatcher::ThreadType::kIOThread:
123 UMA_HISTOGRAM_BOOLEAN(
124 "HangWatcher.IsThreadHung.UtilityProcess."
125 "IOThread",
126 any_thread_hung);
127 break;
128 case HangWatcher::ThreadType::kMainThread:
129 UMA_HISTOGRAM_BOOLEAN(
130 "HangWatcher.IsThreadHung.UtilityProcess."
131 "MainThread",
132 any_thread_hung);
133 break;
134 case HangWatcher::ThreadType::kThreadPoolThread:
135 // Not recorded for now.
136 break;
137 }
138 break;
139 }
140 }
141
142 // Returns true if |thread_type| was configured through Finch to have a logging
143 // level that is equal to or exceeds |logging_level|.
ThreadTypeLoggingLevelGreaterOrEqual(HangWatcher::ThreadType thread_type,LoggingLevel logging_level)144 bool ThreadTypeLoggingLevelGreaterOrEqual(HangWatcher::ThreadType thread_type,
145 LoggingLevel logging_level) {
146 switch (thread_type) {
147 case HangWatcher::ThreadType::kIOThread:
148 return g_io_thread_log_level.load(std::memory_order_relaxed) >=
149 logging_level;
150 case HangWatcher::ThreadType::kMainThread:
151 return g_main_thread_log_level.load(std::memory_order_relaxed) >=
152 logging_level;
153 case HangWatcher::ThreadType::kThreadPoolThread:
154 return g_threadpool_log_level.load(std::memory_order_relaxed) >=
155 logging_level;
156 }
157 }
158
159 } // namespace
160
161 // Determines if the HangWatcher is activated. When false the HangWatcher
162 // thread never started.
163 BASE_FEATURE(kEnableHangWatcher,
164 "EnableHangWatcher",
165 FEATURE_ENABLED_BY_DEFAULT);
166
167 BASE_FEATURE(kEnableHangWatcherInZygoteChildren,
168 "EnableHangWatcherInZygoteChildren",
169 FEATURE_ENABLED_BY_DEFAULT);
170
171 // Browser process.
172 constexpr base::FeatureParam<int> kIOThreadLogLevel{
173 &kEnableHangWatcher, "io_thread_log_level",
174 static_cast<int>(LoggingLevel::kUmaOnly)};
175 constexpr base::FeatureParam<int> kUIThreadLogLevel{
176 &kEnableHangWatcher, "ui_thread_log_level",
177 static_cast<int>(LoggingLevel::kUmaOnly)};
178 constexpr base::FeatureParam<int> kThreadPoolLogLevel{
179 &kEnableHangWatcher, "threadpool_log_level",
180 static_cast<int>(LoggingLevel::kUmaOnly)};
181
182 // GPU process.
183 constexpr base::FeatureParam<int> kGPUProcessIOThreadLogLevel{
184 &kEnableHangWatcher, "gpu_process_io_thread_log_level",
185 static_cast<int>(LoggingLevel::kNone)};
186 constexpr base::FeatureParam<int> kGPUProcessMainThreadLogLevel{
187 &kEnableHangWatcher, "gpu_process_main_thread_log_level",
188 static_cast<int>(LoggingLevel::kNone)};
189 constexpr base::FeatureParam<int> kGPUProcessThreadPoolLogLevel{
190 &kEnableHangWatcher, "gpu_process_threadpool_log_level",
191 static_cast<int>(LoggingLevel::kNone)};
192
193 // Renderer process.
194 constexpr base::FeatureParam<int> kRendererProcessIOThreadLogLevel{
195 &kEnableHangWatcher, "renderer_process_io_thread_log_level",
196 static_cast<int>(LoggingLevel::kUmaOnly)};
197 constexpr base::FeatureParam<int> kRendererProcessMainThreadLogLevel{
198 &kEnableHangWatcher, "renderer_process_main_thread_log_level",
199 static_cast<int>(LoggingLevel::kUmaOnly)};
200 constexpr base::FeatureParam<int> kRendererProcessThreadPoolLogLevel{
201 &kEnableHangWatcher, "renderer_process_threadpool_log_level",
202 static_cast<int>(LoggingLevel::kUmaOnly)};
203
204 // Utility process.
205 constexpr base::FeatureParam<int> kUtilityProcessIOThreadLogLevel{
206 &kEnableHangWatcher, "utility_process_io_thread_log_level",
207 static_cast<int>(LoggingLevel::kUmaOnly)};
208 constexpr base::FeatureParam<int> kUtilityProcessMainThreadLogLevel{
209 &kEnableHangWatcher, "utility_process_main_thread_log_level",
210 static_cast<int>(LoggingLevel::kUmaOnly)};
211 constexpr base::FeatureParam<int> kUtilityProcessThreadPoolLogLevel{
212 &kEnableHangWatcher, "utility_process_threadpool_log_level",
213 static_cast<int>(LoggingLevel::kUmaOnly)};
214
215 constexpr const char* kThreadName = "HangWatcher";
216
217 // The time that the HangWatcher thread will sleep for between calls to
218 // Monitor(). Increasing or decreasing this does not modify the type of hangs
219 // that can be detected. It instead increases the probability that a call to
220 // Monitor() will happen at the right time to catch a hang. This has to be
221 // balanced with power/cpu use concerns as busy looping would catch amost all
222 // hangs but present unacceptable overhead. NOTE: If this period is ever changed
223 // then all metrics that depend on it like
224 // HangWatcher.IsThreadHung need to be updated.
225 constexpr auto kMonitoringPeriod = base::Seconds(10);
226
WatchHangsInScope(TimeDelta timeout)227 WatchHangsInScope::WatchHangsInScope(TimeDelta timeout) {
228 internal::HangWatchState* current_hang_watch_state =
229 HangWatcher::IsEnabled()
230 ? internal::HangWatchState::GetHangWatchStateForCurrentThread()
231 : nullptr;
232
233 DCHECK(timeout >= base::TimeDelta()) << "Negative timeouts are invalid.";
234
235 // Thread is not monitored, noop.
236 if (!current_hang_watch_state) {
237 took_effect_ = false;
238 return;
239 }
240
241 #if DCHECK_IS_ON()
242 previous_watch_hangs_in_scope_ =
243 current_hang_watch_state->GetCurrentWatchHangsInScope();
244 current_hang_watch_state->SetCurrentWatchHangsInScope(this);
245 #endif
246
247 auto [old_flags, old_deadline] =
248 current_hang_watch_state->GetFlagsAndDeadline();
249
250 // TODO(crbug.com/1034046): Check whether we are over deadline already for the
251 // previous WatchHangsInScope here by issuing only one TimeTicks::Now()
252 // and resuing the value.
253
254 previous_deadline_ = old_deadline;
255 TimeTicks deadline = TimeTicks::Now() + timeout;
256 current_hang_watch_state->SetDeadline(deadline);
257 current_hang_watch_state->IncrementNestingLevel();
258
259 const bool hangs_ignored_for_current_scope =
260 internal::HangWatchDeadline::IsFlagSet(
261 internal::HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope,
262 old_flags);
263
264 // If the current WatchHangsInScope is ignored, temporarily reactivate hang
265 // watching for newly created WatchHangsInScopes. On exiting hang watching
266 // is suspended again to return to the original state.
267 if (hangs_ignored_for_current_scope) {
268 current_hang_watch_state->UnsetIgnoreCurrentWatchHangsInScope();
269 set_hangs_ignored_on_exit_ = true;
270 }
271 }
272
~WatchHangsInScope()273 WatchHangsInScope::~WatchHangsInScope() {
274 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
275
276 // If hang watching was not enabled at construction time there is nothing to
277 // validate or undo.
278 if (!took_effect_) {
279 return;
280 }
281
282 // If the thread was unregistered since construction there is also nothing to
283 // do.
284 auto* const state =
285 internal::HangWatchState::GetHangWatchStateForCurrentThread();
286 if (!state) {
287 return;
288 }
289
290 // If a hang is currently being captured we should block here so execution
291 // stops and we avoid recording unrelated stack frames in the crash.
292 if (state->IsFlagSet(internal::HangWatchDeadline::Flag::kShouldBlockOnHang)) {
293 base::HangWatcher::GetInstance()->BlockIfCaptureInProgress();
294 }
295
296 #if DCHECK_IS_ON()
297 // Verify that no Scope was destructed out of order.
298 DCHECK_EQ(this, state->GetCurrentWatchHangsInScope());
299 state->SetCurrentWatchHangsInScope(previous_watch_hangs_in_scope_);
300 #endif
301
302 if (state->nesting_level() == 1) {
303 // If a call to InvalidateActiveExpectations() suspended hang watching
304 // during the lifetime of this or any nested WatchHangsInScope it can now
305 // safely be reactivated by clearing the ignore bit since this is the
306 // outer-most scope.
307 state->UnsetIgnoreCurrentWatchHangsInScope();
308 } else if (set_hangs_ignored_on_exit_) {
309 // Return to ignoring hangs since this was the previous state before hang
310 // watching was temporarily enabled for this WatchHangsInScope only in the
311 // constructor.
312 state->SetIgnoreCurrentWatchHangsInScope();
313 }
314
315 // Reset the deadline to the value it had before entering this
316 // WatchHangsInScope.
317 state->SetDeadline(previous_deadline_);
318 // TODO(crbug.com/1034046): Log when a WatchHangsInScope exits after its
319 // deadline and that went undetected by the HangWatcher.
320
321 state->DecrementNestingLevel();
322 }
323
324 // static
InitializeOnMainThread(ProcessType process_type,bool is_zygote_child,bool emit_crashes)325 void HangWatcher::InitializeOnMainThread(ProcessType process_type,
326 bool is_zygote_child,
327 bool emit_crashes) {
328 DCHECK(!g_use_hang_watcher);
329 DCHECK(g_io_thread_log_level == LoggingLevel::kNone);
330 DCHECK(g_main_thread_log_level == LoggingLevel::kNone);
331 DCHECK(g_threadpool_log_level == LoggingLevel::kNone);
332
333 bool enable_hang_watcher = base::FeatureList::IsEnabled(kEnableHangWatcher);
334
335 #if BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS)
336 if (is_zygote_child) {
337 enable_hang_watcher =
338 enable_hang_watcher &&
339 base::FeatureList::IsEnabled(kEnableHangWatcherInZygoteChildren);
340 }
341 #endif
342
343 // Do not start HangWatcher in the GPU process until the issue related to
344 // invalid magic signature in the GPU WatchDog is fixed
345 // (https://crbug.com/1297760).
346 if (process_type == ProcessType::kGPUProcess)
347 enable_hang_watcher = false;
348
349 g_use_hang_watcher.store(enable_hang_watcher, std::memory_order_relaxed);
350
351 // Keep the process type.
352 g_hang_watcher_process_type.store(process_type, std::memory_order_relaxed);
353
354 // If hang watching is disabled as a whole there is no need to read the
355 // params.
356 if (!enable_hang_watcher)
357 return;
358
359 // Retrieve thread-specific config for hang watching.
360 if (process_type == HangWatcher::ProcessType::kBrowserProcess) {
361 // Crashes are set to always emit. Override any feature flags.
362 if (emit_crashes) {
363 g_io_thread_log_level.store(
364 static_cast<LoggingLevel>(LoggingLevel::kUmaAndCrash),
365 std::memory_order_relaxed);
366 g_main_thread_log_level.store(
367 static_cast<LoggingLevel>(LoggingLevel::kUmaAndCrash),
368 std::memory_order_relaxed);
369 } else {
370 g_io_thread_log_level.store(
371 static_cast<LoggingLevel>(kIOThreadLogLevel.Get()),
372 std::memory_order_relaxed);
373 g_main_thread_log_level.store(
374 static_cast<LoggingLevel>(kUIThreadLogLevel.Get()),
375 std::memory_order_relaxed);
376 }
377
378 g_threadpool_log_level.store(
379 static_cast<LoggingLevel>(kThreadPoolLogLevel.Get()),
380 std::memory_order_relaxed);
381 } else if (process_type == HangWatcher::ProcessType::kGPUProcess) {
382 g_threadpool_log_level.store(
383 static_cast<LoggingLevel>(kGPUProcessThreadPoolLogLevel.Get()),
384 std::memory_order_relaxed);
385 g_io_thread_log_level.store(
386 static_cast<LoggingLevel>(kGPUProcessIOThreadLogLevel.Get()),
387 std::memory_order_relaxed);
388 g_main_thread_log_level.store(
389 static_cast<LoggingLevel>(kGPUProcessMainThreadLogLevel.Get()),
390 std::memory_order_relaxed);
391 } else if (process_type == HangWatcher::ProcessType::kRendererProcess) {
392 g_threadpool_log_level.store(
393 static_cast<LoggingLevel>(kRendererProcessThreadPoolLogLevel.Get()),
394 std::memory_order_relaxed);
395 g_io_thread_log_level.store(
396 static_cast<LoggingLevel>(kRendererProcessIOThreadLogLevel.Get()),
397 std::memory_order_relaxed);
398 g_main_thread_log_level.store(
399 static_cast<LoggingLevel>(kRendererProcessMainThreadLogLevel.Get()),
400 std::memory_order_relaxed);
401 } else if (process_type == HangWatcher::ProcessType::kUtilityProcess) {
402 g_threadpool_log_level.store(
403 static_cast<LoggingLevel>(kUtilityProcessThreadPoolLogLevel.Get()),
404 std::memory_order_relaxed);
405 g_io_thread_log_level.store(
406 static_cast<LoggingLevel>(kUtilityProcessIOThreadLogLevel.Get()),
407 std::memory_order_relaxed);
408 g_main_thread_log_level.store(
409 static_cast<LoggingLevel>(kUtilityProcessMainThreadLogLevel.Get()),
410 std::memory_order_relaxed);
411 }
412 }
413
UnitializeOnMainThreadForTesting()414 void HangWatcher::UnitializeOnMainThreadForTesting() {
415 g_use_hang_watcher.store(false, std::memory_order_relaxed);
416 g_threadpool_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
417 g_io_thread_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
418 g_main_thread_log_level.store(LoggingLevel::kNone, std::memory_order_relaxed);
419 }
420
421 // static
IsEnabled()422 bool HangWatcher::IsEnabled() {
423 return g_use_hang_watcher.load(std::memory_order_relaxed);
424 }
425
426 // static
IsThreadPoolHangWatchingEnabled()427 bool HangWatcher::IsThreadPoolHangWatchingEnabled() {
428 return g_threadpool_log_level.load(std::memory_order_relaxed) !=
429 LoggingLevel::kNone;
430 }
431
432 // static
IsIOThreadHangWatchingEnabled()433 bool HangWatcher::IsIOThreadHangWatchingEnabled() {
434 return g_io_thread_log_level.load(std::memory_order_relaxed) !=
435 LoggingLevel::kNone;
436 }
437
438 // static
IsCrashReportingEnabled()439 bool HangWatcher::IsCrashReportingEnabled() {
440 if (g_main_thread_log_level.load(std::memory_order_relaxed) ==
441 LoggingLevel::kUmaAndCrash) {
442 return true;
443 }
444 if (g_io_thread_log_level.load(std::memory_order_relaxed) ==
445 LoggingLevel::kUmaAndCrash) {
446 return true;
447 }
448 if (g_threadpool_log_level.load(std::memory_order_relaxed) ==
449 LoggingLevel::kUmaAndCrash) {
450 return true;
451 }
452 return false;
453 }
454
455 // static
InvalidateActiveExpectations()456 void HangWatcher::InvalidateActiveExpectations() {
457 auto* const state =
458 internal::HangWatchState::GetHangWatchStateForCurrentThread();
459 if (!state) {
460 // If the current thread is not under watch there is nothing to invalidate.
461 return;
462 }
463 state->SetIgnoreCurrentWatchHangsInScope();
464 }
465
HangWatcher()466 HangWatcher::HangWatcher()
467 : monitor_period_(kMonitoringPeriod),
468 should_monitor_(WaitableEvent::ResetPolicy::AUTOMATIC),
469 thread_(this, kThreadName),
470 tick_clock_(base::DefaultTickClock::GetInstance()),
471 memory_pressure_listener_(
472 FROM_HERE,
473 base::BindRepeating(&HangWatcher::OnMemoryPressure,
474 base::Unretained(this))) {
475 // |thread_checker_| should not be bound to the constructing thread.
476 DETACH_FROM_THREAD(hang_watcher_thread_checker_);
477
478 should_monitor_.declare_only_used_while_idle();
479
480 DCHECK(!g_instance);
481 g_instance = this;
482 }
483
484 // static
CreateHangWatcherInstance()485 void HangWatcher::CreateHangWatcherInstance() {
486 DCHECK(!g_instance);
487 g_instance = new base::HangWatcher();
488 // The hang watcher is leaked to make sure it survives all watched threads.
489 ANNOTATE_LEAKING_OBJECT_PTR(g_instance);
490 }
491
492 #if !BUILDFLAG(IS_NACL)
493 debug::ScopedCrashKeyString
GetTimeSinceLastCriticalMemoryPressureCrashKey()494 HangWatcher::GetTimeSinceLastCriticalMemoryPressureCrashKey() {
495 DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
496
497 // The crash key size is large enough to hold the biggest possible return
498 // value from base::TimeDelta::InSeconds().
499 constexpr debug::CrashKeySize kCrashKeyContentSize =
500 debug::CrashKeySize::Size32;
501 DCHECK_GE(static_cast<uint64_t>(kCrashKeyContentSize),
502 base::NumberToString(std::numeric_limits<int64_t>::max()).size());
503
504 static debug::CrashKeyString* crash_key = AllocateCrashKeyString(
505 "seconds-since-last-memory-pressure", kCrashKeyContentSize);
506
507 const base::TimeTicks last_critical_memory_pressure_time =
508 last_critical_memory_pressure_.load(std::memory_order_relaxed);
509 if (last_critical_memory_pressure_time.is_null()) {
510 constexpr char kNoMemoryPressureMsg[] = "No critical memory pressure";
511 static_assert(
512 std::size(kNoMemoryPressureMsg) <=
513 static_cast<uint64_t>(kCrashKeyContentSize),
514 "The crash key is too small to hold \"No critical memory pressure\".");
515 return debug::ScopedCrashKeyString(crash_key, kNoMemoryPressureMsg);
516 } else {
517 base::TimeDelta time_since_last_critical_memory_pressure =
518 base::TimeTicks::Now() - last_critical_memory_pressure_time;
519 return debug::ScopedCrashKeyString(
520 crash_key, base::NumberToString(
521 time_since_last_critical_memory_pressure.InSeconds()));
522 }
523 }
524 #endif
525
GetTimeSinceLastSystemPowerResumeCrashKeyValue() const526 std::string HangWatcher::GetTimeSinceLastSystemPowerResumeCrashKeyValue()
527 const {
528 DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
529
530 const TimeTicks last_system_power_resume_time =
531 PowerMonitor::GetLastSystemResumeTime();
532 if (last_system_power_resume_time.is_null())
533 return "Never suspended";
534 if (last_system_power_resume_time == TimeTicks::Max())
535 return "Power suspended";
536
537 const TimeDelta time_since_last_system_resume =
538 TimeTicks::Now() - last_system_power_resume_time;
539 return NumberToString(time_since_last_system_resume.InSeconds());
540 }
541
OnMemoryPressure(base::MemoryPressureListener::MemoryPressureLevel memory_pressure_level)542 void HangWatcher::OnMemoryPressure(
543 base::MemoryPressureListener::MemoryPressureLevel memory_pressure_level) {
544 if (memory_pressure_level ==
545 base::MemoryPressureListener::MEMORY_PRESSURE_LEVEL_CRITICAL) {
546 last_critical_memory_pressure_.store(base::TimeTicks::Now(),
547 std::memory_order_relaxed);
548 }
549 }
550
~HangWatcher()551 HangWatcher::~HangWatcher() {
552 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
553 DCHECK_EQ(g_instance, this);
554 DCHECK(watch_states_.empty());
555 g_instance = nullptr;
556 Stop();
557 }
558
Start()559 void HangWatcher::Start() {
560 thread_.Start();
561 thread_started_ = true;
562 }
563
Stop()564 void HangWatcher::Stop() {
565 g_keep_monitoring.store(false, std::memory_order_relaxed);
566 should_monitor_.Signal();
567 thread_.Join();
568 thread_started_ = false;
569
570 // In production HangWatcher is always leaked but during testing it's possibly
571 // stopped and restarted using a new instance. This makes sure the next call
572 // to Start() will actually monitor in that case.
573 g_keep_monitoring.store(true, std::memory_order_relaxed);
574 }
575
IsWatchListEmpty()576 bool HangWatcher::IsWatchListEmpty() {
577 AutoLock auto_lock(watch_state_lock_);
578 return watch_states_.empty();
579 }
580
Wait()581 void HangWatcher::Wait() {
582 while (true) {
583 // Amount by which the actual time spent sleeping can deviate from
584 // the target time and still be considered timely.
585 constexpr base::TimeDelta kWaitDriftTolerance = base::Milliseconds(100);
586
587 const base::TimeTicks time_before_wait = tick_clock_->NowTicks();
588
589 // Sleep until next scheduled monitoring or until signaled.
590 const bool was_signaled = should_monitor_.TimedWait(monitor_period_);
591
592 if (after_wait_callback_)
593 after_wait_callback_.Run(time_before_wait);
594
595 const base::TimeTicks time_after_wait = tick_clock_->NowTicks();
596 const base::TimeDelta wait_time = time_after_wait - time_before_wait;
597 const bool wait_was_normal =
598 wait_time <= (monitor_period_ + kWaitDriftTolerance);
599
600 UMA_HISTOGRAM_TIMES("HangWatcher.SleepDrift.BrowserProcess",
601 wait_time - monitor_period_);
602
603 if (!wait_was_normal) {
604 // If the time spent waiting was too high it might indicate the machine is
605 // very slow or that that it went to sleep. In any case we can't trust the
606 // WatchHangsInScopes that are currently live. Update the ignore
607 // threshold to make sure they don't trigger a hang on subsequent monitors
608 // then keep waiting.
609
610 base::AutoLock auto_lock(watch_state_lock_);
611
612 // Find the latest deadline among the live watch states. They might change
613 // atomically while iterating but that's fine because if they do that
614 // means the new WatchHangsInScope was constructed very soon after the
615 // abnormal sleep happened and might be affected by the root cause still.
616 // Ignoring it is cautious and harmless.
617 base::TimeTicks latest_deadline;
618 for (const auto& state : watch_states_) {
619 base::TimeTicks deadline = state->GetDeadline();
620 if (deadline > latest_deadline) {
621 latest_deadline = deadline;
622 }
623 }
624
625 deadline_ignore_threshold_ = latest_deadline;
626 }
627
628 // Stop waiting.
629 if (wait_was_normal || was_signaled)
630 return;
631 }
632 }
633
Run()634 void HangWatcher::Run() {
635 // Monitor() should only run on |thread_|. Bind |thread_checker_| here to make
636 // sure of that.
637 DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
638
639 while (g_keep_monitoring.load(std::memory_order_relaxed)) {
640 Wait();
641
642 if (!IsWatchListEmpty() &&
643 g_keep_monitoring.load(std::memory_order_relaxed)) {
644 Monitor();
645 if (after_monitor_closure_for_testing_) {
646 after_monitor_closure_for_testing_.Run();
647 }
648 }
649 }
650 }
651
652 // static
GetInstance()653 HangWatcher* HangWatcher::GetInstance() {
654 return g_instance;
655 }
656
657 // static
RecordHang()658 void HangWatcher::RecordHang() {
659 base::debug::DumpWithoutCrashing();
660 NO_CODE_FOLDING();
661 }
662
RegisterThreadInternal(ThreadType thread_type)663 ScopedClosureRunner HangWatcher::RegisterThreadInternal(
664 ThreadType thread_type) {
665 AutoLock auto_lock(watch_state_lock_);
666 CHECK(base::FeatureList::GetInstance());
667
668 // Do not install a WatchState if the results would never be observable.
669 if (!ThreadTypeLoggingLevelGreaterOrEqual(thread_type,
670 LoggingLevel::kUmaOnly)) {
671 return ScopedClosureRunner(base::DoNothing());
672 }
673
674 watch_states_.push_back(
675 internal::HangWatchState::CreateHangWatchStateForCurrentThread(
676 thread_type));
677 return ScopedClosureRunner(BindOnce(&HangWatcher::UnregisterThread,
678 Unretained(HangWatcher::GetInstance())));
679 }
680
681 // static
RegisterThread(ThreadType thread_type)682 ScopedClosureRunner HangWatcher::RegisterThread(ThreadType thread_type) {
683 if (!GetInstance()) {
684 return ScopedClosureRunner();
685 }
686
687 return GetInstance()->RegisterThreadInternal(thread_type);
688 }
689
GetHighestDeadline() const690 base::TimeTicks HangWatcher::WatchStateSnapShot::GetHighestDeadline() const {
691 DCHECK(IsActionable());
692
693 // Since entries are sorted in increasing order the last entry is the largest
694 // one.
695 return hung_watch_state_copies_.back().deadline;
696 }
697
698 HangWatcher::WatchStateSnapShot::WatchStateSnapShot() = default;
699
Init(const HangWatchStates & watch_states,base::TimeTicks deadline_ignore_threshold)700 void HangWatcher::WatchStateSnapShot::Init(
701 const HangWatchStates& watch_states,
702 base::TimeTicks deadline_ignore_threshold) {
703 DCHECK(!initialized_);
704
705 // No matter if the snapshot is actionable or not after this function
706 // it will have been initialized.
707 initialized_ = true;
708
709 const base::TimeTicks now = base::TimeTicks::Now();
710 bool all_threads_marked = true;
711 bool found_deadline_before_ignore_threshold = false;
712
713 // Use an std::array to store the hang counts to avoid allocations. The
714 // numerical values of the HangWatcher::ThreadType enum is used to index into
715 // the array. A |kInvalidHangCount| is used to signify there were no threads
716 // of the type found.
717 constexpr size_t kHangCountArraySize =
718 static_cast<std::size_t>(base::HangWatcher::ThreadType::kMax) + 1;
719 std::array<int, kHangCountArraySize> hung_counts_per_thread_type;
720
721 constexpr int kInvalidHangCount = -1;
722 hung_counts_per_thread_type.fill(kInvalidHangCount);
723
724 // Will be true if any of the hung threads has a logging level high enough,
725 // as defined through finch params, to warant dumping a crash.
726 bool any_hung_thread_has_dumping_enabled = false;
727
728 // Copy hung thread information.
729 for (const auto& watch_state : watch_states) {
730 uint64_t flags;
731 TimeTicks deadline;
732 std::tie(flags, deadline) = watch_state->GetFlagsAndDeadline();
733
734 if (deadline <= deadline_ignore_threshold) {
735 found_deadline_before_ignore_threshold = true;
736 }
737
738 if (internal::HangWatchDeadline::IsFlagSet(
739 internal::HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope,
740 flags)) {
741 continue;
742 }
743
744 // If a thread type is monitored and did not hang it still needs to be
745 // logged as a zero count;
746 const size_t hang_count_index =
747 static_cast<size_t>(watch_state.get()->thread_type());
748 if (hung_counts_per_thread_type[hang_count_index] == kInvalidHangCount) {
749 hung_counts_per_thread_type[hang_count_index] = 0;
750 }
751
752 // Only copy hung threads.
753 if (deadline <= now) {
754 ++hung_counts_per_thread_type[hang_count_index];
755
756 if (ThreadTypeLoggingLevelGreaterOrEqual(watch_state.get()->thread_type(),
757 LoggingLevel::kUmaAndCrash)) {
758 any_hung_thread_has_dumping_enabled = true;
759 }
760
761 #if BUILDFLAG(ENABLE_BASE_TRACING)
762 // Emit trace events for monitored threads.
763 if (ThreadTypeLoggingLevelGreaterOrEqual(watch_state.get()->thread_type(),
764 LoggingLevel::kUmaOnly)) {
765 const PlatformThreadId thread_id = watch_state.get()->GetThreadID();
766 const auto track = perfetto::Track::FromPointer(
767 this, perfetto::ThreadTrack::ForThread(thread_id));
768 TRACE_EVENT_BEGIN("base", "HangWatcher::ThreadHung", track, deadline);
769 TRACE_EVENT_END("base", track, now);
770 // TODO(crbug.com/1021571): Remove this once fixed.
771 PERFETTO_INTERNAL_ADD_EMPTY_EVENT();
772 }
773 #endif
774
775 // Attempt to mark the thread as needing to stay within its current
776 // WatchHangsInScope until capture is complete.
777 bool thread_marked = watch_state->SetShouldBlockOnHang(flags, deadline);
778
779 // If marking some threads already failed the snapshot won't be kept so
780 // there is no need to keep adding to it. The loop doesn't abort though
781 // to keep marking the other threads. If these threads remain hung until
782 // the next capture then they'll already be marked and will be included
783 // in the capture at that time.
784 if (thread_marked && all_threads_marked) {
785 hung_watch_state_copies_.push_back(
786 WatchStateCopy{deadline, watch_state.get()->GetThreadID()});
787 } else {
788 all_threads_marked = false;
789 }
790 }
791 }
792
793 // Log the hung thread counts to histograms for each thread type if any thread
794 // of the type were found.
795 for (size_t i = 0; i < kHangCountArraySize; ++i) {
796 const int hang_count = hung_counts_per_thread_type[i];
797 const HangWatcher::ThreadType thread_type =
798 static_cast<HangWatcher::ThreadType>(i);
799 if (hang_count != kInvalidHangCount &&
800 ThreadTypeLoggingLevelGreaterOrEqual(thread_type,
801 LoggingLevel::kUmaOnly)) {
802 LogHungThreadCountHistogram(thread_type, hang_count);
803 }
804 }
805
806 // Three cases can invalidate this snapshot and prevent the capture of the
807 // hang.
808 //
809 // 1. Some threads could not be marked for blocking so this snapshot isn't
810 // actionable since marked threads could be hung because of unmarked ones.
811 // If only the marked threads were captured the information would be
812 // incomplete.
813 //
814 // 2. Any of the threads have a deadline before |deadline_ignore_threshold|.
815 // If any thread is ignored it reduces the confidence in the whole state and
816 // it's better to avoid capturing misleading data.
817 //
818 // 3. The hung threads found were all of types that are not configured through
819 // Finch to trigger a crash dump.
820 //
821 if (!all_threads_marked || found_deadline_before_ignore_threshold ||
822 !any_hung_thread_has_dumping_enabled) {
823 hung_watch_state_copies_.clear();
824 return;
825 }
826
827 // Sort |hung_watch_state_copies_| by order of decreasing hang severity so the
828 // most severe hang is first in the list.
829 ranges::sort(hung_watch_state_copies_,
830 [](const WatchStateCopy& lhs, const WatchStateCopy& rhs) {
831 return lhs.deadline < rhs.deadline;
832 });
833 }
834
Clear()835 void HangWatcher::WatchStateSnapShot::Clear() {
836 hung_watch_state_copies_.clear();
837 initialized_ = false;
838 }
839
840 HangWatcher::WatchStateSnapShot::WatchStateSnapShot(
841 const WatchStateSnapShot& other) = default;
842
843 HangWatcher::WatchStateSnapShot::~WatchStateSnapShot() = default;
844
PrepareHungThreadListCrashKey() const845 std::string HangWatcher::WatchStateSnapShot::PrepareHungThreadListCrashKey()
846 const {
847 DCHECK(IsActionable());
848
849 // Build a crash key string that contains the ids of the hung threads.
850 constexpr char kSeparator{'|'};
851 std::string list_of_hung_thread_ids;
852
853 // Add as many thread ids to the crash key as possible.
854 for (const WatchStateCopy& copy : hung_watch_state_copies_) {
855 std::string fragment = base::NumberToString(copy.thread_id) + kSeparator;
856 if (list_of_hung_thread_ids.size() + fragment.size() <
857 static_cast<std::size_t>(debug::CrashKeySize::Size256)) {
858 list_of_hung_thread_ids += fragment;
859 } else {
860 // Respect the by priority ordering of thread ids in the crash key by
861 // stopping the construction as soon as one does not fit. This avoids
862 // including lesser priority ids while omitting more important ones.
863 break;
864 }
865 }
866
867 return list_of_hung_thread_ids;
868 }
869
IsActionable() const870 bool HangWatcher::WatchStateSnapShot::IsActionable() const {
871 DCHECK(initialized_);
872 return !hung_watch_state_copies_.empty();
873 }
874
GrabWatchStateSnapshotForTesting() const875 HangWatcher::WatchStateSnapShot HangWatcher::GrabWatchStateSnapshotForTesting()
876 const {
877 WatchStateSnapShot snapshot;
878 snapshot.Init(watch_states_, deadline_ignore_threshold_);
879 return snapshot;
880 }
881
Monitor()882 void HangWatcher::Monitor() {
883 DCHECK_CALLED_ON_VALID_THREAD(hang_watcher_thread_checker_);
884 AutoLock auto_lock(watch_state_lock_);
885
886 // If all threads unregistered since this function was invoked there's
887 // nothing to do anymore.
888 if (watch_states_.empty())
889 return;
890
891 watch_state_snapshot_.Init(watch_states_, deadline_ignore_threshold_);
892
893 if (watch_state_snapshot_.IsActionable()) {
894 DoDumpWithoutCrashing(watch_state_snapshot_);
895 }
896
897 watch_state_snapshot_.Clear();
898 }
899
DoDumpWithoutCrashing(const WatchStateSnapShot & watch_state_snapshot)900 void HangWatcher::DoDumpWithoutCrashing(
901 const WatchStateSnapShot& watch_state_snapshot) {
902 TRACE_EVENT("base", "HangWatcher::DoDumpWithoutCrashing");
903
904 capture_in_progress_.store(true, std::memory_order_relaxed);
905 base::AutoLock scope_lock(capture_lock_);
906
907 #if !BUILDFLAG(IS_NACL)
908 const std::string list_of_hung_thread_ids =
909 watch_state_snapshot.PrepareHungThreadListCrashKey();
910
911 static debug::CrashKeyString* crash_key = AllocateCrashKeyString(
912 "list-of-hung-threads", debug::CrashKeySize::Size256);
913
914 const debug::ScopedCrashKeyString list_of_hung_threads_crash_key_string(
915 crash_key, list_of_hung_thread_ids);
916
917 const debug::ScopedCrashKeyString
918 time_since_last_critical_memory_pressure_crash_key_string =
919 GetTimeSinceLastCriticalMemoryPressureCrashKey();
920
921 SCOPED_CRASH_KEY_STRING32("HangWatcher", "seconds-since-last-resume",
922 GetTimeSinceLastSystemPowerResumeCrashKeyValue());
923 #endif
924
925 // To avoid capturing more than one hang that blames a subset of the same
926 // threads it's necessary to keep track of what is the furthest deadline
927 // that contributed to declaring a hang. Only once
928 // all threads have deadlines past this point can we be sure that a newly
929 // discovered hang is not directly related.
930 // Example:
931 // **********************************************************************
932 // Timeline A : L------1-------2----------3-------4----------N-----------
933 // Timeline B : -------2----------3-------4----------L----5------N-------
934 // Timeline C : L----------------------------5------6----7---8------9---N
935 // **********************************************************************
936 // In the example when a Monitor() happens during timeline A
937 // |deadline_ignore_threshold_| (L) is at time zero and deadlines (1-4)
938 // are before Now() (N) . A hang is captured and L is updated. During
939 // the next Monitor() (timeline B) a new deadline is over but we can't
940 // capture a hang because deadlines 2-4 are still live and already counted
941 // toward a hang. During a third monitor (timeline C) all live deadlines
942 // are now after L and a second hang can be recorded.
943 base::TimeTicks latest_expired_deadline =
944 watch_state_snapshot.GetHighestDeadline();
945
946 if (on_hang_closure_for_testing_)
947 on_hang_closure_for_testing_.Run();
948 else
949 RecordHang();
950
951 // Update after running the actual capture.
952 deadline_ignore_threshold_ = latest_expired_deadline;
953
954 capture_in_progress_.store(false, std::memory_order_relaxed);
955 }
956
SetAfterMonitorClosureForTesting(base::RepeatingClosure closure)957 void HangWatcher::SetAfterMonitorClosureForTesting(
958 base::RepeatingClosure closure) {
959 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
960 after_monitor_closure_for_testing_ = std::move(closure);
961 }
962
SetOnHangClosureForTesting(base::RepeatingClosure closure)963 void HangWatcher::SetOnHangClosureForTesting(base::RepeatingClosure closure) {
964 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
965 on_hang_closure_for_testing_ = std::move(closure);
966 }
967
SetMonitoringPeriodForTesting(base::TimeDelta period)968 void HangWatcher::SetMonitoringPeriodForTesting(base::TimeDelta period) {
969 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
970 monitor_period_ = period;
971 }
972
SetAfterWaitCallbackForTesting(RepeatingCallback<void (TimeTicks)> callback)973 void HangWatcher::SetAfterWaitCallbackForTesting(
974 RepeatingCallback<void(TimeTicks)> callback) {
975 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
976 after_wait_callback_ = callback;
977 }
978
SignalMonitorEventForTesting()979 void HangWatcher::SignalMonitorEventForTesting() {
980 DCHECK_CALLED_ON_VALID_THREAD(constructing_thread_checker_);
981 should_monitor_.Signal();
982 }
983
984 // static
StopMonitoringForTesting()985 void HangWatcher::StopMonitoringForTesting() {
986 g_keep_monitoring.store(false, std::memory_order_relaxed);
987 }
988
SetTickClockForTesting(const base::TickClock * tick_clock)989 void HangWatcher::SetTickClockForTesting(const base::TickClock* tick_clock) {
990 tick_clock_ = tick_clock;
991 }
992
BlockIfCaptureInProgress()993 void HangWatcher::BlockIfCaptureInProgress() {
994 // Makes a best-effort attempt to block execution if a hang is currently being
995 // captured. Only block on |capture_lock| if |capture_in_progress_| hints that
996 // it's already held to avoid serializing all threads on this function when no
997 // hang capture is in-progress.
998 if (capture_in_progress_.load(std::memory_order_relaxed))
999 base::AutoLock hang_lock(capture_lock_);
1000 }
1001
UnregisterThread()1002 void HangWatcher::UnregisterThread() {
1003 AutoLock auto_lock(watch_state_lock_);
1004
1005 auto it = ranges::find(
1006 watch_states_,
1007 internal::HangWatchState::GetHangWatchStateForCurrentThread(),
1008 &std::unique_ptr<internal::HangWatchState>::get);
1009
1010 // Thread should be registered to get unregistered.
1011 CHECK(it != watch_states_.end(), base::NotFatalUntil::M125);
1012
1013 watch_states_.erase(it);
1014 }
1015
1016 namespace internal {
1017 namespace {
1018
1019 constexpr uint64_t kOnlyDeadlineMask = 0x00FF'FFFF'FFFF'FFFFu;
1020 constexpr uint64_t kOnlyFlagsMask = ~kOnlyDeadlineMask;
1021 constexpr uint64_t kMaximumFlag = 0x8000'0000'0000'0000u;
1022
1023 // Use as a mask to keep persistent flags and the deadline.
1024 constexpr uint64_t kPersistentFlagsAndDeadlineMask =
1025 kOnlyDeadlineMask |
1026 static_cast<uint64_t>(
1027 HangWatchDeadline::Flag::kIgnoreCurrentWatchHangsInScope);
1028 } // namespace
1029
1030 // Flag binary representation assertions.
1031 static_assert(
1032 static_cast<uint64_t>(HangWatchDeadline::Flag::kMinValue) >
1033 kOnlyDeadlineMask,
1034 "Invalid numerical value for flag. Would interfere with bits of data.");
1035 static_assert(static_cast<uint64_t>(HangWatchDeadline::Flag::kMaxValue) <=
1036 kMaximumFlag,
1037 "A flag can only set a single bit.");
1038
1039 HangWatchDeadline::HangWatchDeadline() = default;
1040 HangWatchDeadline::~HangWatchDeadline() = default;
1041
GetFlagsAndDeadline() const1042 std::pair<uint64_t, TimeTicks> HangWatchDeadline::GetFlagsAndDeadline() const {
1043 uint64_t bits = bits_.load(std::memory_order_relaxed);
1044 return std::make_pair(ExtractFlags(bits),
1045 DeadlineFromBits(ExtractDeadline((bits))));
1046 }
1047
GetDeadline() const1048 TimeTicks HangWatchDeadline::GetDeadline() const {
1049 return DeadlineFromBits(
1050 ExtractDeadline(bits_.load(std::memory_order_relaxed)));
1051 }
1052
1053 // static
Max()1054 TimeTicks HangWatchDeadline::Max() {
1055 // |kOnlyDeadlineMask| has all the bits reserved for the TimeTicks value
1056 // set. This means it also represents the highest representable value.
1057 return DeadlineFromBits(kOnlyDeadlineMask);
1058 }
1059
1060 // static
IsFlagSet(Flag flag,uint64_t flags)1061 bool HangWatchDeadline::IsFlagSet(Flag flag, uint64_t flags) {
1062 return static_cast<uint64_t>(flag) & flags;
1063 }
1064
SetDeadline(TimeTicks new_deadline)1065 void HangWatchDeadline::SetDeadline(TimeTicks new_deadline) {
1066 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1067 DCHECK(new_deadline <= Max()) << "Value too high to be represented.";
1068 DCHECK(new_deadline >= TimeTicks{}) << "Value cannot be negative.";
1069
1070 if (switch_bits_callback_for_testing_) {
1071 const uint64_t switched_in_bits = SwitchBitsForTesting();
1072 // If a concurrent deadline change is tested it cannot have a deadline or
1073 // persistent flag change since those always happen on the same thread.
1074 DCHECK((switched_in_bits & kPersistentFlagsAndDeadlineMask) == 0u);
1075 }
1076
1077 // Discard all non-persistent flags and apply deadline change.
1078 const uint64_t old_bits = bits_.load(std::memory_order_relaxed);
1079 const uint64_t new_flags =
1080 ExtractFlags(old_bits & kPersistentFlagsAndDeadlineMask);
1081 bits_.store(new_flags | ExtractDeadline(static_cast<uint64_t>(
1082 new_deadline.ToInternalValue())),
1083 std::memory_order_relaxed);
1084 }
1085
1086 // TODO(crbug.com/1087026): Add flag DCHECKs here.
SetShouldBlockOnHang(uint64_t old_flags,TimeTicks old_deadline)1087 bool HangWatchDeadline::SetShouldBlockOnHang(uint64_t old_flags,
1088 TimeTicks old_deadline) {
1089 DCHECK(old_deadline <= Max()) << "Value too high to be represented.";
1090 DCHECK(old_deadline >= TimeTicks{}) << "Value cannot be negative.";
1091
1092 // Set the kShouldBlockOnHang flag only if |bits_| did not change since it was
1093 // read. kShouldBlockOnHang is the only non-persistent flag and should never
1094 // be set twice. Persistent flags and deadline changes are done from the same
1095 // thread so there is no risk of losing concurrently added information.
1096 uint64_t old_bits =
1097 old_flags | static_cast<uint64_t>(old_deadline.ToInternalValue());
1098 const uint64_t desired_bits =
1099 old_bits | static_cast<uint64_t>(Flag::kShouldBlockOnHang);
1100
1101 // If a test needs to simulate |bits_| changing since calling this function
1102 // this happens now.
1103 if (switch_bits_callback_for_testing_) {
1104 const uint64_t switched_in_bits = SwitchBitsForTesting();
1105
1106 // Injecting the flag being tested is invalid.
1107 DCHECK(!IsFlagSet(Flag::kShouldBlockOnHang, switched_in_bits));
1108 }
1109
1110 return bits_.compare_exchange_weak(old_bits, desired_bits,
1111 std::memory_order_relaxed,
1112 std::memory_order_relaxed);
1113 }
1114
SetIgnoreCurrentWatchHangsInScope()1115 void HangWatchDeadline::SetIgnoreCurrentWatchHangsInScope() {
1116 SetPersistentFlag(Flag::kIgnoreCurrentWatchHangsInScope);
1117 }
1118
UnsetIgnoreCurrentWatchHangsInScope()1119 void HangWatchDeadline::UnsetIgnoreCurrentWatchHangsInScope() {
1120 ClearPersistentFlag(Flag::kIgnoreCurrentWatchHangsInScope);
1121 }
1122
SetPersistentFlag(Flag flag)1123 void HangWatchDeadline::SetPersistentFlag(Flag flag) {
1124 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1125 if (switch_bits_callback_for_testing_)
1126 SwitchBitsForTesting();
1127 bits_.fetch_or(static_cast<uint64_t>(flag), std::memory_order_relaxed);
1128 }
1129
ClearPersistentFlag(Flag flag)1130 void HangWatchDeadline::ClearPersistentFlag(Flag flag) {
1131 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1132 if (switch_bits_callback_for_testing_)
1133 SwitchBitsForTesting();
1134 bits_.fetch_and(~(static_cast<uint64_t>(flag)), std::memory_order_relaxed);
1135 }
1136
1137 // static
ExtractFlags(uint64_t bits)1138 uint64_t HangWatchDeadline::ExtractFlags(uint64_t bits) {
1139 return bits & kOnlyFlagsMask;
1140 }
1141
1142 // static
ExtractDeadline(uint64_t bits)1143 uint64_t HangWatchDeadline::ExtractDeadline(uint64_t bits) {
1144 return bits & kOnlyDeadlineMask;
1145 }
1146
1147 // static
DeadlineFromBits(uint64_t bits)1148 TimeTicks HangWatchDeadline::DeadlineFromBits(uint64_t bits) {
1149 // |kOnlyDeadlineMask| has all the deadline bits set to 1 so is the largest
1150 // representable value.
1151 DCHECK(bits <= kOnlyDeadlineMask)
1152 << "Flags bits are set. Remove them before returning deadline.";
1153 static_assert(kOnlyDeadlineMask <= std::numeric_limits<int64_t>::max());
1154 return TimeTicks::FromInternalValue(static_cast<int64_t>(bits));
1155 }
1156
IsFlagSet(Flag flag) const1157 bool HangWatchDeadline::IsFlagSet(Flag flag) const {
1158 return bits_.load(std::memory_order_relaxed) & static_cast<uint64_t>(flag);
1159 }
1160
SetSwitchBitsClosureForTesting(RepeatingCallback<uint64_t (void)> closure)1161 void HangWatchDeadline::SetSwitchBitsClosureForTesting(
1162 RepeatingCallback<uint64_t(void)> closure) {
1163 switch_bits_callback_for_testing_ = closure;
1164 }
1165
ResetSwitchBitsClosureForTesting()1166 void HangWatchDeadline::ResetSwitchBitsClosureForTesting() {
1167 DCHECK(switch_bits_callback_for_testing_);
1168 switch_bits_callback_for_testing_.Reset();
1169 }
1170
SwitchBitsForTesting()1171 uint64_t HangWatchDeadline::SwitchBitsForTesting() {
1172 DCHECK(switch_bits_callback_for_testing_);
1173
1174 const uint64_t old_bits = bits_.load(std::memory_order_relaxed);
1175 const uint64_t new_bits = switch_bits_callback_for_testing_.Run();
1176 const uint64_t old_flags = ExtractFlags(old_bits);
1177
1178 const uint64_t switched_in_bits = old_flags | new_bits;
1179 bits_.store(switched_in_bits, std::memory_order_relaxed);
1180 return switched_in_bits;
1181 }
1182
HangWatchState(HangWatcher::ThreadType thread_type)1183 HangWatchState::HangWatchState(HangWatcher::ThreadType thread_type)
1184 : resetter_(&hang_watch_state, this, nullptr), thread_type_(thread_type) {
1185 // TODO(crbug.com/1223033): Remove this once macOS uses system-wide ids.
1186 // On macOS the thread ids used by CrashPad are not the same as the ones
1187 // provided by PlatformThread. Make sure to use the same for correct
1188 // attribution.
1189 #if BUILDFLAG(IS_MAC)
1190 uint64_t thread_id;
1191 pthread_threadid_np(pthread_self(), &thread_id);
1192 thread_id_ = checked_cast<PlatformThreadId>(thread_id);
1193 #else
1194 thread_id_ = PlatformThread::CurrentId();
1195 #endif
1196 }
1197
~HangWatchState()1198 HangWatchState::~HangWatchState() {
1199 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1200
1201 DCHECK_EQ(GetHangWatchStateForCurrentThread(), this);
1202
1203 #if DCHECK_IS_ON()
1204 // Destroying the HangWatchState should not be done if there are live
1205 // WatchHangsInScopes.
1206 DCHECK(!current_watch_hangs_in_scope_);
1207 #endif
1208 }
1209
1210 // static
1211 std::unique_ptr<HangWatchState>
CreateHangWatchStateForCurrentThread(HangWatcher::ThreadType thread_type)1212 HangWatchState::CreateHangWatchStateForCurrentThread(
1213 HangWatcher::ThreadType thread_type) {
1214 // Allocate a watch state object for this thread.
1215 std::unique_ptr<HangWatchState> hang_state =
1216 std::make_unique<HangWatchState>(thread_type);
1217
1218 // Setting the thread local worked.
1219 DCHECK_EQ(GetHangWatchStateForCurrentThread(), hang_state.get());
1220
1221 // Transfer ownership to caller.
1222 return hang_state;
1223 }
1224
GetDeadline() const1225 TimeTicks HangWatchState::GetDeadline() const {
1226 return deadline_.GetDeadline();
1227 }
1228
GetFlagsAndDeadline() const1229 std::pair<uint64_t, TimeTicks> HangWatchState::GetFlagsAndDeadline() const {
1230 return deadline_.GetFlagsAndDeadline();
1231 }
1232
SetDeadline(TimeTicks deadline)1233 void HangWatchState::SetDeadline(TimeTicks deadline) {
1234 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1235 deadline_.SetDeadline(deadline);
1236 }
1237
IsOverDeadline() const1238 bool HangWatchState::IsOverDeadline() const {
1239 return TimeTicks::Now() > deadline_.GetDeadline();
1240 }
1241
SetIgnoreCurrentWatchHangsInScope()1242 void HangWatchState::SetIgnoreCurrentWatchHangsInScope() {
1243 deadline_.SetIgnoreCurrentWatchHangsInScope();
1244 }
1245
UnsetIgnoreCurrentWatchHangsInScope()1246 void HangWatchState::UnsetIgnoreCurrentWatchHangsInScope() {
1247 deadline_.UnsetIgnoreCurrentWatchHangsInScope();
1248 }
1249
SetShouldBlockOnHang(uint64_t old_flags,TimeTicks old_deadline)1250 bool HangWatchState::SetShouldBlockOnHang(uint64_t old_flags,
1251 TimeTicks old_deadline) {
1252 return deadline_.SetShouldBlockOnHang(old_flags, old_deadline);
1253 }
1254
IsFlagSet(HangWatchDeadline::Flag flag)1255 bool HangWatchState::IsFlagSet(HangWatchDeadline::Flag flag) {
1256 return deadline_.IsFlagSet(flag);
1257 }
1258
1259 #if DCHECK_IS_ON()
SetCurrentWatchHangsInScope(WatchHangsInScope * current_hang_watch_scope_enable)1260 void HangWatchState::SetCurrentWatchHangsInScope(
1261 WatchHangsInScope* current_hang_watch_scope_enable) {
1262 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1263 current_watch_hangs_in_scope_ = current_hang_watch_scope_enable;
1264 }
1265
GetCurrentWatchHangsInScope()1266 WatchHangsInScope* HangWatchState::GetCurrentWatchHangsInScope() {
1267 DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
1268 return current_watch_hangs_in_scope_;
1269 }
1270 #endif
1271
GetHangWatchDeadlineForTesting()1272 HangWatchDeadline* HangWatchState::GetHangWatchDeadlineForTesting() {
1273 return &deadline_;
1274 }
1275
IncrementNestingLevel()1276 void HangWatchState::IncrementNestingLevel() {
1277 ++nesting_level_;
1278 }
1279
DecrementNestingLevel()1280 void HangWatchState::DecrementNestingLevel() {
1281 --nesting_level_;
1282 }
1283
1284 // static
GetHangWatchStateForCurrentThread()1285 HangWatchState* HangWatchState::GetHangWatchStateForCurrentThread() {
1286 // Workaround false-positive MSAN use-of-uninitialized-value on
1287 // thread_local storage for loaded libraries:
1288 // https://github.com/google/sanitizers/issues/1265
1289 MSAN_UNPOISON(&hang_watch_state, sizeof(internal::HangWatchState*));
1290
1291 return hang_watch_state;
1292 }
1293
GetThreadID() const1294 PlatformThreadId HangWatchState::GetThreadID() const {
1295 return thread_id_;
1296 }
1297
1298 } // namespace internal
1299
1300 } // namespace base
1301