1 // Copyright 2018 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/sampling_heap_profiler/sampling_heap_profiler.h"
6
7 #include <algorithm>
8 #include <cmath>
9 #include <utility>
10
11 #include "base/allocator/dispatcher/tls.h"
12 #include "base/compiler_specific.h"
13 #include "base/debug/stack_trace.h"
14 #include "base/feature_list.h"
15 #include "base/functional/bind.h"
16 #include "base/logging.h"
17 #include "base/metrics/histogram_functions.h"
18 #include "base/no_destructor.h"
19 #include "base/notreached.h"
20 #include "base/sampling_heap_profiler/lock_free_address_hash_set.h"
21 #include "base/sampling_heap_profiler/poisson_allocation_sampler.h"
22 #include "base/threading/thread_local_storage.h"
23 #include "base/trace_event/heap_profiler_allocation_context_tracker.h" // no-presubmit-check
24 #include "build/build_config.h"
25 #include "partition_alloc/partition_alloc.h"
26 #include "partition_alloc/shim/allocator_shim.h"
27
28 #if BUILDFLAG(IS_APPLE)
29 #include <pthread.h>
30 #endif
31
32 #if BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS) || BUILDFLAG(IS_ANDROID)
33 #include <sys/prctl.h>
34 #endif
35
36 namespace base {
37
38 constexpr uint32_t kMaxStackEntries = 256;
39
40 namespace {
41
42 struct ThreadLocalData {
43 const char* thread_name = nullptr;
44 };
45
GetThreadLocalData()46 ThreadLocalData* GetThreadLocalData() {
47 #if USE_LOCAL_TLS_EMULATION()
48 static base::NoDestructor<
49 base::allocator::dispatcher::ThreadLocalStorage<ThreadLocalData>>
50 thread_local_data("sampling_heap_profiler");
51 return thread_local_data->GetThreadLocalData();
52 #else
53 static thread_local ThreadLocalData thread_local_data;
54 return &thread_local_data;
55 #endif
56 }
57
58 #if BUILDFLAG(CAN_UNWIND_WITH_FRAME_POINTERS)
59 BASE_FEATURE(kAvoidFramePointers,
60 "AndroidHeapSamplerAvoidFramePointers",
61 base::FEATURE_DISABLED_BY_DEFAULT);
62 #endif
63
64 using StackUnwinder = SamplingHeapProfiler::StackUnwinder;
65 using base::allocator::dispatcher::AllocationSubsystem;
66
67 // If a thread name has been set from ThreadIdNameManager, use that. Otherwise,
68 // gets the thread name from kernel if available or returns a string with id.
69 // This function intentionally leaks the allocated strings since they are used
70 // to tag allocations even after the thread dies.
GetAndLeakThreadName()71 const char* GetAndLeakThreadName() {
72 const char* thread_name =
73 base::ThreadIdNameManager::GetInstance()->GetNameForCurrentThread();
74 if (thread_name && *thread_name != '\0')
75 return thread_name;
76
77 // prctl requires 16 bytes, snprintf requires 19, pthread_getname_np requires
78 // 64 on macOS, see PlatformThread::SetName in platform_thread_apple.mm.
79 constexpr size_t kBufferLen = 64;
80 char name[kBufferLen];
81 #if BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS) || BUILDFLAG(IS_ANDROID)
82 // If the thread name is not set, try to get it from prctl. Thread name might
83 // not be set in cases where the thread started before heap profiling was
84 // enabled.
85 int err = prctl(PR_GET_NAME, name);
86 if (!err)
87 return strdup(name);
88 #elif BUILDFLAG(IS_APPLE)
89 int err = pthread_getname_np(pthread_self(), name, kBufferLen);
90 if (err == 0 && *name != '\0')
91 return strdup(name);
92 #endif // BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS) ||
93 // BUILDFLAG(IS_ANDROID)
94
95 // Use tid if we don't have a thread name.
96 snprintf(name, sizeof(name), "Thread %lu",
97 static_cast<unsigned long>(base::PlatformThread::CurrentId()));
98 return strdup(name);
99 }
100
UpdateAndGetThreadName(const char * name)101 const char* UpdateAndGetThreadName(const char* name) {
102 ThreadLocalData* const thread_local_data = GetThreadLocalData();
103 if (name)
104 thread_local_data->thread_name = name;
105 if (!thread_local_data->thread_name) {
106 thread_local_data->thread_name = GetAndLeakThreadName();
107 }
108 return thread_local_data->thread_name;
109 }
110
111 // Checks whether unwinding from this function works.
CheckForDefaultUnwindTables()112 [[maybe_unused]] StackUnwinder CheckForDefaultUnwindTables() {
113 const void* stack[kMaxStackEntries];
114 size_t frame_count = base::debug::CollectStackTrace(stack, kMaxStackEntries);
115 // First frame is the current function and can be found without unwind tables.
116 return frame_count > 1 ? StackUnwinder::kDefault
117 : StackUnwinder::kUnavailable;
118 }
119
ChooseStackUnwinder()120 StackUnwinder ChooseStackUnwinder() {
121 #if BUILDFLAG(CAN_UNWIND_WITH_FRAME_POINTERS)
122 // Use frame pointers if available, since they can be faster than the default.
123 if (!base::FeatureList::IsEnabled(kAvoidFramePointers)) {
124 return StackUnwinder::kFramePointers;
125 }
126 #endif
127 #if BUILDFLAG(IS_ANDROID)
128 // Default unwind tables aren't always present on Android.
129 return CheckForDefaultUnwindTables();
130 #else
131 return StackUnwinder::kDefault;
132 #endif
133 }
134
135 } // namespace
136
Sample(size_t size,size_t total,uint32_t ordinal)137 SamplingHeapProfiler::Sample::Sample(size_t size,
138 size_t total,
139 uint32_t ordinal)
140 : size(size), total(total), ordinal(ordinal) {}
141
142 SamplingHeapProfiler::Sample::Sample(const Sample&) = default;
143 SamplingHeapProfiler::Sample::~Sample() = default;
144
145 SamplingHeapProfiler::SamplingHeapProfiler() = default;
~SamplingHeapProfiler()146 SamplingHeapProfiler::~SamplingHeapProfiler() {
147 if (record_thread_names_)
148 base::ThreadIdNameManager::GetInstance()->RemoveObserver(this);
149 }
150
Start()151 uint32_t SamplingHeapProfiler::Start() {
152 const auto unwinder = ChooseStackUnwinder();
153 #if BUILDFLAG(IS_ANDROID)
154 // Record which unwinder is in use on Android, since it's hard to keep track
155 // of which methods are available at runtime.
156 base::UmaHistogramEnumeration("HeapProfiling.AndroidStackUnwinder", unwinder);
157 #endif
158 if (unwinder == StackUnwinder::kUnavailable) {
159 LOG(WARNING) << "Sampling heap profiler: Stack unwinding is not available.";
160 return 0;
161 }
162 unwinder_.store(unwinder);
163
164 auto* poisson_allocation_sampler = PoissonAllocationSampler::Get();
165
166 // Sampling interval is in bytes. Record it in KB since the extra precision
167 // isn't needed for metrics and HeapProfilerController can set the interval to
168 // center around 10M bytes, which would overflow the buckets.
169 base::UmaHistogramCounts10M(
170 "HeapProfiling.SamplingIntervalKB",
171 static_cast<int>(poisson_allocation_sampler->SamplingInterval() / 1024));
172
173 AutoLock lock(start_stop_mutex_);
174 if (!running_sessions_++)
175 poisson_allocation_sampler->AddSamplesObserver(this);
176 return last_sample_ordinal_;
177 }
178
Stop()179 void SamplingHeapProfiler::Stop() {
180 AutoLock lock(start_stop_mutex_);
181 DCHECK_GT(running_sessions_, 0);
182 if (!--running_sessions_)
183 PoissonAllocationSampler::Get()->RemoveSamplesObserver(this);
184 }
185
SetSamplingInterval(size_t sampling_interval_bytes)186 void SamplingHeapProfiler::SetSamplingInterval(size_t sampling_interval_bytes) {
187 PoissonAllocationSampler::Get()->SetSamplingInterval(sampling_interval_bytes);
188 }
189
SetRecordThreadNames(bool value)190 void SamplingHeapProfiler::SetRecordThreadNames(bool value) {
191 if (record_thread_names_ == value)
192 return;
193 record_thread_names_ = value;
194 if (value) {
195 base::ThreadIdNameManager::GetInstance()->AddObserver(this);
196 } else {
197 base::ThreadIdNameManager::GetInstance()->RemoveObserver(this);
198 }
199 }
200
201 // static
CachedThreadName()202 const char* SamplingHeapProfiler::CachedThreadName() {
203 return UpdateAndGetThreadName(nullptr);
204 }
205
CaptureStackTrace(const void ** frames,size_t max_entries,size_t * count)206 const void** SamplingHeapProfiler::CaptureStackTrace(const void** frames,
207 size_t max_entries,
208 size_t* count) {
209 // Skip top frames as they correspond to the profiler itself.
210 size_t skip_frames = 3;
211 size_t frame_count = 0;
212 switch (unwinder_) {
213 #if BUILDFLAG(CAN_UNWIND_WITH_FRAME_POINTERS)
214 case StackUnwinder::kFramePointers:
215 frame_count = base::debug::TraceStackFramePointers(
216 const_cast<const void**>(frames), max_entries, skip_frames);
217 skip_frames = 0;
218 break;
219 #endif
220 case StackUnwinder::kDefault:
221 // Fall-back to capturing the stack with base::debug::CollectStackTrace,
222 // which is likely slower, but more reliable.
223 frame_count = base::debug::CollectStackTrace(frames, max_entries);
224 break;
225 default:
226 // Profiler should not be started if ChooseStackUnwinder() returns
227 // anything else.
228 NOTREACHED();
229 break;
230 }
231
232 skip_frames = std::min(skip_frames, frame_count);
233 *count = frame_count - skip_frames;
234 return frames + skip_frames;
235 }
236
SampleAdded(void * address,size_t size,size_t total,AllocationSubsystem type,const char * context)237 void SamplingHeapProfiler::SampleAdded(void* address,
238 size_t size,
239 size_t total,
240 AllocationSubsystem type,
241 const char* context) {
242 // CaptureStack and allocation context tracking may use TLS.
243 // Bail out if it has been destroyed.
244 if (UNLIKELY(base::ThreadLocalStorage::HasBeenDestroyed()))
245 return;
246 DCHECK(PoissonAllocationSampler::ScopedMuteThreadSamples::IsMuted());
247 Sample sample(size, total, ++last_sample_ordinal_);
248 sample.allocator = type;
249 CaptureNativeStack(context, &sample);
250 AutoLock lock(mutex_);
251 if (UNLIKELY(PoissonAllocationSampler::AreHookedSamplesMuted() &&
252 type != AllocationSubsystem::kManualForTesting)) {
253 // Throw away any non-test samples that were being collected before
254 // ScopedMuteHookedSamplesForTesting was enabled. This is done inside the
255 // lock to catch any samples that were being collected while
256 // ClearSamplesForTesting is running.
257 return;
258 }
259 RecordString(sample.context);
260
261 // If a sample is already present with the same address, then that means that
262 // the sampling heap profiler failed to observe the destruction -- possibly
263 // because the sampling heap profiler was temporarily disabled. We should
264 // override the old entry.
265 samples_.insert_or_assign(address, std::move(sample));
266 }
267
CaptureNativeStack(const char * context,Sample * sample)268 void SamplingHeapProfiler::CaptureNativeStack(const char* context,
269 Sample* sample) {
270 const void* stack[kMaxStackEntries];
271 size_t frame_count;
272 // One frame is reserved for the thread name.
273 const void** first_frame =
274 CaptureStackTrace(stack, kMaxStackEntries - 1, &frame_count);
275 DCHECK_LT(frame_count, kMaxStackEntries);
276 sample->stack.assign(first_frame, first_frame + frame_count);
277
278 if (record_thread_names_)
279 sample->thread_name = CachedThreadName();
280
281 if (!context) {
282 const auto* tracker =
283 trace_event::AllocationContextTracker::GetInstanceForCurrentThread();
284 if (tracker)
285 context = tracker->TaskContext();
286 }
287 sample->context = context;
288 }
289
RecordString(const char * string)290 const char* SamplingHeapProfiler::RecordString(const char* string) {
291 return string ? *strings_.insert(string).first : nullptr;
292 }
293
SampleRemoved(void * address)294 void SamplingHeapProfiler::SampleRemoved(void* address) {
295 DCHECK(base::PoissonAllocationSampler::ScopedMuteThreadSamples::IsMuted());
296 base::AutoLock lock(mutex_);
297 samples_.erase(address);
298 }
299
GetSamples(uint32_t profile_id)300 std::vector<SamplingHeapProfiler::Sample> SamplingHeapProfiler::GetSamples(
301 uint32_t profile_id) {
302 // Make sure the sampler does not invoke |SampleAdded| or |SampleRemoved|
303 // on this thread. Otherwise it could have end up with a deadlock.
304 // See crbug.com/882495
305 PoissonAllocationSampler::ScopedMuteThreadSamples no_samples_scope;
306 AutoLock lock(mutex_);
307 std::vector<Sample> samples;
308 samples.reserve(samples_.size());
309 for (auto& it : samples_) {
310 Sample& sample = it.second;
311 if (sample.ordinal > profile_id)
312 samples.push_back(sample);
313 }
314 return samples;
315 }
316
GetStrings()317 std::vector<const char*> SamplingHeapProfiler::GetStrings() {
318 PoissonAllocationSampler::ScopedMuteThreadSamples no_samples_scope;
319 AutoLock lock(mutex_);
320 return std::vector<const char*>(strings_.begin(), strings_.end());
321 }
322
323 // static
Init()324 void SamplingHeapProfiler::Init() {
325 GetThreadLocalData();
326 PoissonAllocationSampler::Init();
327 }
328
329 // static
Get()330 SamplingHeapProfiler* SamplingHeapProfiler::Get() {
331 static NoDestructor<SamplingHeapProfiler> instance;
332 return instance.get();
333 }
334
OnThreadNameChanged(const char * name)335 void SamplingHeapProfiler::OnThreadNameChanged(const char* name) {
336 UpdateAndGetThreadName(name);
337 }
338
ClearSamplesForTesting()339 void SamplingHeapProfiler::ClearSamplesForTesting() {
340 DCHECK(PoissonAllocationSampler::AreHookedSamplesMuted());
341 base::AutoLock lock(mutex_);
342 samples_.clear();
343 // Since hooked samples are muted, any samples that are waiting to take the
344 // lock in SampleAdded will be discarded. Tests can now call
345 // PoissonAllocationSampler::RecordAlloc with allocator type kManualForTesting
346 // to add samples cleanly.
347 }
348
349 } // namespace base
350