xref: /aosp_15_r20/art/perfetto_hprof/perfetto_hprof.cc (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "perfetto_hprof"
18 
19 #include "perfetto_hprof.h"
20 
21 #include <fcntl.h>
22 #include <fnmatch.h>
23 #include <inttypes.h>
24 #include <sched.h>
25 #include <signal.h>
26 #include <sys/socket.h>
27 #include <sys/stat.h>
28 #include <sys/types.h>
29 #include <sys/un.h>
30 #include <sys/wait.h>
31 #include <thread>
32 #include <time.h>
33 
34 #include <limits>
35 #include <optional>
36 #include <type_traits>
37 
38 #include "android-base/file.h"
39 #include "android-base/logging.h"
40 #include "android-base/properties.h"
41 #include "base/fast_exit.h"
42 #include "base/systrace.h"
43 #include "gc/heap-visit-objects-inl.h"
44 #include "gc/heap.h"
45 #include "gc/scoped_gc_critical_section.h"
46 #include "mirror/object-refvisitor-inl.h"
47 #include "nativehelper/scoped_local_ref.h"
48 #include "perfetto/profiling/parse_smaps.h"
49 #include "perfetto/trace/interned_data/interned_data.pbzero.h"
50 #include "perfetto/trace/profiling/heap_graph.pbzero.h"
51 #include "perfetto/trace/profiling/profile_common.pbzero.h"
52 #include "perfetto/trace/profiling/smaps.pbzero.h"
53 #include "perfetto/config/profiling/java_hprof_config.pbzero.h"
54 #include "perfetto/protozero/packed_repeated_fields.h"
55 #include "perfetto/tracing.h"
56 #include "runtime-inl.h"
57 #include "runtime_callbacks.h"
58 #include "scoped_thread_state_change-inl.h"
59 #include "thread_list.h"
60 #include "well_known_classes.h"
61 #include "dex/descriptors_names.h"
62 
63 // There are three threads involved in this:
64 // * listener thread: this is idle in the background when this plugin gets loaded, and waits
65 //   for data on on g_signal_pipe_fds.
66 // * signal thread: an arbitrary thread that handles the signal and writes data to
67 //   g_signal_pipe_fds.
68 // * perfetto producer thread: once the signal is received, the app forks. In the newly forked
69 //   child, the Perfetto Client API spawns a thread to communicate with traced.
70 
71 namespace perfetto_hprof {
72 
73 constexpr int kJavaHeapprofdSignal = __SIGRTMIN + 6;
74 constexpr time_t kWatchdogTimeoutSec = 120;
75 // This needs to be lower than the maximum acceptable chunk size, because this
76 // is checked *before* writing another submessage. We conservatively assume
77 // submessages can be up to 100k here for a 500k chunk size.
78 // DropBox has a 500k chunk limit, and each chunk needs to parse as a proto.
79 constexpr uint32_t kPacketSizeThreshold = 400000;
80 constexpr char kByte[1] = {'x'};
GetStateMutex()81 static art::Mutex& GetStateMutex() {
82   static art::Mutex state_mutex("perfetto_hprof_state_mutex", art::LockLevel::kGenericBottomLock);
83   return state_mutex;
84 }
85 
GetStateCV()86 static art::ConditionVariable& GetStateCV() {
87   static art::ConditionVariable state_cv("perfetto_hprof_state_cv", GetStateMutex());
88   return state_cv;
89 }
90 
91 static int requested_tracing_session_id = 0;
92 static State g_state = State::kUninitialized;
93 static bool g_oome_triggered = false;
94 static uint32_t g_oome_sessions_pending = 0;
95 
96 // Pipe to signal from the signal handler into a worker thread that handles the
97 // dump requests.
98 int g_signal_pipe_fds[2];
99 static struct sigaction g_orig_act = {};
100 
101 template <typename T>
FindOrAppend(std::map<T,uint64_t> * m,const T & s)102 uint64_t FindOrAppend(std::map<T, uint64_t>* m, const T& s) {
103   auto it = m->find(s);
104   if (it == m->end()) {
105     std::tie(it, std::ignore) = m->emplace(s, m->size());
106   }
107   return it->second;
108 }
109 
ArmWatchdogOrDie()110 void ArmWatchdogOrDie() {
111   timer_t timerid{};
112   struct sigevent sev {};
113   sev.sigev_notify = SIGEV_SIGNAL;
114   sev.sigev_signo = SIGKILL;
115 
116   if (timer_create(CLOCK_MONOTONIC, &sev, &timerid) == -1) {
117     // This only gets called in the child, so we can fatal without impacting
118     // the app.
119     PLOG(FATAL) << "failed to create watchdog timer";
120   }
121 
122   struct itimerspec its {};
123   its.it_value.tv_sec = kWatchdogTimeoutSec;
124 
125   if (timer_settime(timerid, 0, &its, nullptr) == -1) {
126     // This only gets called in the child, so we can fatal without impacting
127     // the app.
128     PLOG(FATAL) << "failed to arm watchdog timer";
129   }
130 }
131 
132 // Sample entries that match one of the following
133 // start with /system/
134 // start with /vendor/
135 // start with /data/app/
136 // contains "extracted in memory from Y", where Y matches any of the above
ShouldSampleSmapsEntry(const perfetto::profiling::SmapsEntry & e)137 bool ShouldSampleSmapsEntry(const perfetto::profiling::SmapsEntry& e) {
138   if (e.pathname.starts_with("/system/") ||
139       e.pathname.starts_with("/vendor/") ||
140       e.pathname.starts_with("/data/app/")) {
141     return true;
142   }
143   if (e.pathname.starts_with("[anon:")) {
144     if (e.pathname.find("extracted in memory from /system/") != std::string::npos) {
145       return true;
146     }
147     if (e.pathname.find("extracted in memory from /vendor/") != std::string::npos) {
148       return true;
149     }
150     if (e.pathname.find("extracted in memory from /data/app/") != std::string::npos) {
151       return true;
152     }
153   }
154   return false;
155 }
156 
GetCurrentBootClockNs()157 uint64_t GetCurrentBootClockNs() {
158   struct timespec ts = {};
159   if (clock_gettime(CLOCK_BOOTTIME, &ts) != 0) {
160     LOG(FATAL) << "Failed to get boottime.";
161   }
162   return ts.tv_sec * 1000000000LL + ts.tv_nsec;
163 }
164 
IsDebugBuild()165 bool IsDebugBuild() {
166   std::string build_type = android::base::GetProperty("ro.build.type", "");
167   return !build_type.empty() && build_type != "user";
168 }
169 
170 // Verifies the manifest restrictions are respected.
171 // For regular heap dumps this is already handled by heapprofd.
IsOomeHeapDumpAllowed(const perfetto::DataSourceConfig & ds_config)172 bool IsOomeHeapDumpAllowed(const perfetto::DataSourceConfig& ds_config) {
173   if (art::Runtime::Current()->IsJavaDebuggable() || IsDebugBuild()) {
174     return true;
175   }
176 
177   if (ds_config.session_initiator() ==
178       perfetto::DataSourceConfig::SESSION_INITIATOR_TRUSTED_SYSTEM) {
179     return art::Runtime::Current()->IsProfileable() || art::Runtime::Current()->IsSystemServer();
180   } else {
181     return art::Runtime::Current()->IsProfileableFromShell();
182   }
183 }
184 
185 class JavaHprofDataSource : public perfetto::DataSource<JavaHprofDataSource> {
186  public:
187   constexpr static perfetto::BufferExhaustedPolicy kBufferExhaustedPolicy =
188     perfetto::BufferExhaustedPolicy::kStall;
189 
JavaHprofDataSource(bool is_oome_heap)190   explicit JavaHprofDataSource(bool is_oome_heap) : is_oome_heap_(is_oome_heap) {}
191 
OnSetup(const SetupArgs & args)192   void OnSetup(const SetupArgs& args) override {
193     if (!is_oome_heap_) {
194       uint64_t normalized_tracing_session_id =
195         args.config->tracing_session_id() % std::numeric_limits<int32_t>::max();
196       if (requested_tracing_session_id < 0) {
197         LOG(ERROR) << "invalid requested tracing session id " << requested_tracing_session_id;
198         return;
199       }
200       if (static_cast<uint64_t>(requested_tracing_session_id) != normalized_tracing_session_id) {
201         return;
202       }
203     }
204 
205     // This is on the heap as it triggers -Wframe-larger-than.
206     std::unique_ptr<perfetto::protos::pbzero::JavaHprofConfig::Decoder> cfg(
207         new perfetto::protos::pbzero::JavaHprofConfig::Decoder(
208           args.config->java_hprof_config_raw()));
209 
210     dump_smaps_ = cfg->dump_smaps();
211     for (auto it = cfg->ignored_types(); it; ++it) {
212       std::string name = (*it).ToStdString();
213       ignored_types_.emplace_back(art::InversePrettyDescriptor(name));
214     }
215     // This tracing session ID matches the requesting tracing session ID, so we know heapprofd
216     // has verified it targets this process.
217     enabled_ =
218         !is_oome_heap_ || (IsOomeHeapDumpAllowed(*args.config) && IsOomeDumpEnabled(*cfg.get()));
219   }
220 
dump_smaps()221   bool dump_smaps() { return dump_smaps_; }
222 
223   // Per-DataSource enable bit. Invoked by the ::Trace method.
enabled()224   bool enabled() { return enabled_; }
225 
OnStart(const StartArgs &)226   void OnStart(const StartArgs&) override {
227     art::MutexLock lk(art_thread(), GetStateMutex());
228     // In case there are multiple tracing sessions waiting for an OOME error,
229     // there will be a data source instance for each of them. Before the
230     // transition to kStart and signaling the dumping thread, we need to make
231     // sure all the data sources are ready.
232     if (is_oome_heap_ && g_oome_sessions_pending > 0) {
233       --g_oome_sessions_pending;
234     }
235     if (g_state == State::kWaitForStart) {
236       // WriteHeapPackets is responsible for checking whether the DataSource is\
237       // actually enabled.
238       if (!is_oome_heap_ || g_oome_sessions_pending == 0) {
239         g_state = State::kStart;
240         GetStateCV().Broadcast(art_thread());
241       }
242     }
243   }
244 
245   // This datasource can be used with a trace config with a short duration_ms
246   // but a long datasource_stop_timeout_ms. In that case, OnStop is called (in
247   // general) before the dump is done. In that case, we handle the stop
248   // asynchronously, and notify the tracing service once we are done.
249   // In case OnStop is called after the dump is done (but before the process)
250   // has exited, we just acknowledge the request.
OnStop(const StopArgs & a)251   void OnStop(const StopArgs& a) override {
252     art::MutexLock lk(art_thread(), finish_mutex_);
253     if (is_finished_) {
254       return;
255     }
256     is_stopped_ = true;
257     async_stop_ = a.HandleStopAsynchronously();
258   }
259 
art_thread()260   static art::Thread* art_thread() {
261     // TODO(fmayer): Attach the Perfetto producer thread to ART and give it a name. This is
262     // not trivial, we cannot just attach the first time this method is called, because
263     // AttachCurrentThread deadlocks with the ConditionVariable::Wait in WaitForDataSource.
264     //
265     // We should attach the thread as soon as the Client API spawns it, but that needs more
266     // complicated plumbing.
267     return nullptr;
268   }
269 
ignored_types()270   std::vector<std::string> ignored_types() { return ignored_types_; }
271 
Finish()272   void Finish() {
273     art::MutexLock lk(art_thread(), finish_mutex_);
274     if (is_stopped_) {
275       async_stop_();
276     } else {
277       is_finished_ = true;
278     }
279   }
280 
281  private:
IsOomeDumpEnabled(const perfetto::protos::pbzero::JavaHprofConfig::Decoder & cfg)282   static bool IsOomeDumpEnabled(const perfetto::protos::pbzero::JavaHprofConfig::Decoder& cfg) {
283     std::string cmdline;
284     if (!android::base::ReadFileToString("/proc/self/cmdline", &cmdline)) {
285       return false;
286     }
287     const char* argv0 = cmdline.c_str();
288 
289     for (auto it = cfg.process_cmdline(); it; ++it) {
290       std::string pattern = (*it).ToStdString();
291       if (fnmatch(pattern.c_str(), argv0, FNM_NOESCAPE) == 0) {
292         return true;
293       }
294     }
295     return false;
296   }
297 
298   bool is_oome_heap_ = false;
299   bool enabled_ = false;
300   bool dump_smaps_ = false;
301   std::vector<std::string> ignored_types_;
302 
303   art::Mutex finish_mutex_{"perfetto_hprof_ds_mutex", art::LockLevel::kGenericBottomLock};
304   bool is_finished_ = false;
305   bool is_stopped_ = false;
306   std::function<void()> async_stop_;
307 };
308 
SetupDataSource(const std::string & ds_name,bool is_oome_heap)309 void SetupDataSource(const std::string& ds_name, bool is_oome_heap) {
310   perfetto::TracingInitArgs args;
311   args.backends = perfetto::BackendType::kSystemBackend;
312   perfetto::Tracing::Initialize(args);
313 
314   perfetto::DataSourceDescriptor dsd;
315   dsd.set_name(ds_name);
316   dsd.set_will_notify_on_stop(true);
317   JavaHprofDataSource::Register(dsd, is_oome_heap);
318 }
319 
320 // Waits for the data source OnStart
WaitForDataSource(art::Thread * self)321 void WaitForDataSource(art::Thread* self) {
322   art::MutexLock lk(self, GetStateMutex());
323   while (g_state != State::kStart) {
324     GetStateCV().Wait(self);
325   }
326 }
327 
328 // Waits for the data source OnStart with a timeout. Returns false on timeout.
TimedWaitForDataSource(art::Thread * self,int64_t timeout_ms)329 bool TimedWaitForDataSource(art::Thread* self, int64_t timeout_ms) {
330   const uint64_t cutoff_ns = GetCurrentBootClockNs() + timeout_ms * 1000000;
331   art::MutexLock lk(self, GetStateMutex());
332   while (g_state != State::kStart) {
333     const uint64_t current_ns = GetCurrentBootClockNs();
334     if (current_ns >= cutoff_ns) {
335       return false;
336     }
337     GetStateCV().TimedWait(self, (cutoff_ns - current_ns) / 1000000, 0);
338   }
339   return true;
340 }
341 
342 // Helper class to write Java heap dumps to `ctx`. The whole heap dump can be
343 // split into more perfetto.protos.HeapGraph messages, to avoid making each
344 // message too big.
345 class Writer {
346  public:
Writer(pid_t pid,JavaHprofDataSource::TraceContext * ctx,uint64_t timestamp)347   Writer(pid_t pid, JavaHprofDataSource::TraceContext* ctx, uint64_t timestamp)
348       : pid_(pid), ctx_(ctx), timestamp_(timestamp),
349         last_written_(ctx_->written()) {}
350 
351   // Return whether the next call to GetHeapGraph will create a new TracePacket.
will_create_new_packet() const352   bool will_create_new_packet() const {
353     return !heap_graph_ || ctx_->written() - last_written_ > kPacketSizeThreshold;
354   }
355 
GetHeapGraph()356   perfetto::protos::pbzero::HeapGraph* GetHeapGraph() {
357     if (will_create_new_packet()) {
358       CreateNewHeapGraph();
359     }
360     return heap_graph_;
361   }
362 
Finalize()363   void Finalize() {
364     if (trace_packet_) {
365       trace_packet_->Finalize();
366     }
367     heap_graph_ = nullptr;
368   }
369 
~Writer()370   ~Writer() { Finalize(); }
371 
372  private:
373   Writer(const Writer&) = delete;
374   Writer& operator=(const Writer&) = delete;
375   Writer(Writer&&) = delete;
376   Writer& operator=(Writer&&) = delete;
377 
CreateNewHeapGraph()378   void CreateNewHeapGraph() {
379     if (heap_graph_) {
380       heap_graph_->set_continued(true);
381     }
382     Finalize();
383 
384     uint64_t written = ctx_->written();
385 
386     trace_packet_ = ctx_->NewTracePacket();
387     trace_packet_->set_timestamp(timestamp_);
388     heap_graph_ = trace_packet_->set_heap_graph();
389     heap_graph_->set_pid(pid_);
390     heap_graph_->set_index(index_++);
391 
392     last_written_ = written;
393   }
394 
395   const pid_t pid_;
396   JavaHprofDataSource::TraceContext* const ctx_;
397   const uint64_t timestamp_;
398 
399   uint64_t last_written_ = 0;
400 
401   perfetto::DataSource<JavaHprofDataSource>::TraceContext::TracePacketHandle
402       trace_packet_;
403   perfetto::protos::pbzero::HeapGraph* heap_graph_ = nullptr;
404 
405   uint64_t index_ = 0;
406 };
407 
408 class ReferredObjectsFinder {
409  public:
ReferredObjectsFinder(std::vector<std::pair<std::string,art::mirror::Object * >> * referred_objects,bool emit_field_ids)410   explicit ReferredObjectsFinder(
411       std::vector<std::pair<std::string, art::mirror::Object*>>* referred_objects,
412       bool emit_field_ids)
413       : referred_objects_(referred_objects), emit_field_ids_(emit_field_ids) {}
414 
415   // For art::mirror::Object::VisitReferences.
operator ()(art::ObjPtr<art::mirror::Object> obj,art::MemberOffset offset,bool is_static) const416   void operator()(art::ObjPtr<art::mirror::Object> obj, art::MemberOffset offset,
417                   bool is_static) const
418       REQUIRES_SHARED(art::Locks::mutator_lock_) {
419     if (offset.Uint32Value() == art::mirror::Object::ClassOffset().Uint32Value()) {
420       // Skip shadow$klass pointer.
421       return;
422     }
423     art::mirror::Object* ref = obj->GetFieldObject<art::mirror::Object>(offset);
424     art::ArtField* field;
425     if (is_static) {
426       field = art::ArtField::FindStaticFieldWithOffset(obj->AsClass(), offset.Uint32Value());
427     } else {
428       field = art::ArtField::FindInstanceFieldWithOffset(obj->GetClass(), offset.Uint32Value());
429     }
430     std::string field_name = "";
431     if (field != nullptr && emit_field_ids_) {
432       field_name = field->PrettyField(/*with_type=*/true);
433     }
434     referred_objects_->emplace_back(std::move(field_name), ref);
435   }
436 
VisitRootIfNonNull(art::mirror::CompressedReference<art::mirror::Object> * root) const437   void VisitRootIfNonNull(
438       [[maybe_unused]] art::mirror::CompressedReference<art::mirror::Object>* root) const {}
VisitRoot(art::mirror::CompressedReference<art::mirror::Object> * root) const439   void VisitRoot(
440       [[maybe_unused]] art::mirror::CompressedReference<art::mirror::Object>* root) const {}
441 
442  private:
443   // We can use a raw Object* pointer here, because there are no concurrent GC threads after the
444   // fork.
445   std::vector<std::pair<std::string, art::mirror::Object*>>* referred_objects_;
446   // Prettifying field names is expensive; avoid if field name will not be used.
447   bool emit_field_ids_;
448 };
449 
450 class RootFinder : public art::SingleRootVisitor {
451  public:
RootFinder(std::map<art::RootType,std::vector<art::mirror::Object * >> * root_objects)452   explicit RootFinder(
453     std::map<art::RootType, std::vector<art::mirror::Object*>>* root_objects)
454       : root_objects_(root_objects) {}
455 
VisitRoot(art::mirror::Object * root,const art::RootInfo & info)456   void VisitRoot(art::mirror::Object* root, const art::RootInfo& info) override {
457     (*root_objects_)[info.GetType()].emplace_back(root);
458   }
459 
460  private:
461   // We can use a raw Object* pointer here, because there are no concurrent GC threads after the
462   // fork.
463   std::map<art::RootType, std::vector<art::mirror::Object*>>* root_objects_;
464 };
465 
ToProtoType(art::RootType art_type)466 perfetto::protos::pbzero::HeapGraphRoot::Type ToProtoType(art::RootType art_type) {
467   using perfetto::protos::pbzero::HeapGraphRoot;
468   switch (art_type) {
469     case art::kRootUnknown:
470       return HeapGraphRoot::ROOT_UNKNOWN;
471     case art::kRootJNIGlobal:
472       return HeapGraphRoot::ROOT_JNI_GLOBAL;
473     case art::kRootJNILocal:
474       return HeapGraphRoot::ROOT_JNI_LOCAL;
475     case art::kRootJavaFrame:
476       return HeapGraphRoot::ROOT_JAVA_FRAME;
477     case art::kRootNativeStack:
478       return HeapGraphRoot::ROOT_NATIVE_STACK;
479     case art::kRootStickyClass:
480       return HeapGraphRoot::ROOT_STICKY_CLASS;
481     case art::kRootThreadBlock:
482       return HeapGraphRoot::ROOT_THREAD_BLOCK;
483     case art::kRootMonitorUsed:
484       return HeapGraphRoot::ROOT_MONITOR_USED;
485     case art::kRootThreadObject:
486       return HeapGraphRoot::ROOT_THREAD_OBJECT;
487     case art::kRootInternedString:
488       return HeapGraphRoot::ROOT_INTERNED_STRING;
489     case art::kRootFinalizing:
490       return HeapGraphRoot::ROOT_FINALIZING;
491     case art::kRootDebugger:
492       return HeapGraphRoot::ROOT_DEBUGGER;
493     case art::kRootReferenceCleanup:
494       return HeapGraphRoot::ROOT_REFERENCE_CLEANUP;
495     case art::kRootVMInternal:
496       return HeapGraphRoot::ROOT_VM_INTERNAL;
497     case art::kRootJNIMonitor:
498       return HeapGraphRoot::ROOT_JNI_MONITOR;
499   }
500 }
501 
ProtoClassKind(uint32_t class_flags)502 perfetto::protos::pbzero::HeapGraphType::Kind ProtoClassKind(uint32_t class_flags) {
503   using perfetto::protos::pbzero::HeapGraphType;
504   switch (class_flags) {
505     case art::mirror::kClassFlagNormal:
506     case art::mirror::kClassFlagRecord:
507       return HeapGraphType::KIND_NORMAL;
508     case art::mirror::kClassFlagNoReferenceFields:
509     case art::mirror::kClassFlagNoReferenceFields | art::mirror::kClassFlagRecord:
510       return HeapGraphType::KIND_NOREFERENCES;
511     case art::mirror::kClassFlagString | art::mirror::kClassFlagNoReferenceFields:
512       return HeapGraphType::KIND_STRING;
513     case art::mirror::kClassFlagObjectArray:
514       return HeapGraphType::KIND_ARRAY;
515     case art::mirror::kClassFlagClass:
516       return HeapGraphType::KIND_CLASS;
517     case art::mirror::kClassFlagClassLoader:
518       return HeapGraphType::KIND_CLASSLOADER;
519     case art::mirror::kClassFlagDexCache:
520       return HeapGraphType::KIND_DEXCACHE;
521     case art::mirror::kClassFlagSoftReference:
522       return HeapGraphType::KIND_SOFT_REFERENCE;
523     case art::mirror::kClassFlagWeakReference:
524       return HeapGraphType::KIND_WEAK_REFERENCE;
525     case art::mirror::kClassFlagFinalizerReference:
526       return HeapGraphType::KIND_FINALIZER_REFERENCE;
527     case art::mirror::kClassFlagPhantomReference:
528       return HeapGraphType::KIND_PHANTOM_REFERENCE;
529     default:
530       return HeapGraphType::KIND_UNKNOWN;
531   }
532 }
533 
PrettyType(art::mirror::Class * klass)534 std::string PrettyType(art::mirror::Class* klass) NO_THREAD_SAFETY_ANALYSIS {
535   if (klass == nullptr) {
536     return "(raw)";
537   }
538   std::string temp;
539   std::string result(art::PrettyDescriptor(klass->GetDescriptor(&temp)));
540   return result;
541 }
542 
DumpSmaps(JavaHprofDataSource::TraceContext * ctx)543 void DumpSmaps(JavaHprofDataSource::TraceContext* ctx) {
544   FILE* smaps = fopen("/proc/self/smaps", "re");
545   if (smaps != nullptr) {
546     auto trace_packet = ctx->NewTracePacket();
547     auto* smaps_packet = trace_packet->set_smaps_packet();
548     smaps_packet->set_pid(getpid());
549     perfetto::profiling::ParseSmaps(smaps,
550         [&smaps_packet](const perfetto::profiling::SmapsEntry& e) {
551       if (ShouldSampleSmapsEntry(e)) {
552         auto* smaps_entry = smaps_packet->add_entries();
553         smaps_entry->set_path(e.pathname);
554         smaps_entry->set_size_kb(e.size_kb);
555         smaps_entry->set_private_dirty_kb(e.private_dirty_kb);
556         smaps_entry->set_swap_kb(e.swap_kb);
557       }
558     });
559     fclose(smaps);
560   } else {
561     PLOG(ERROR) << "failed to open smaps";
562   }
563 }
564 
GetObjectId(const art::mirror::Object * obj)565 uint64_t GetObjectId(const art::mirror::Object* obj) {
566   return reinterpret_cast<uint64_t>(obj) / std::alignment_of<art::mirror::Object>::value;
567 }
568 
569 template <typename F>
ForInstanceReferenceField(art::mirror::Class * klass,F fn)570 void ForInstanceReferenceField(art::mirror::Class* klass, F fn) NO_THREAD_SAFETY_ANALYSIS {
571   for (art::ArtField& af : klass->GetIFields()) {
572     if (af.IsPrimitiveType() ||
573         af.GetOffset().Uint32Value() == art::mirror::Object::ClassOffset().Uint32Value()) {
574       continue;
575     }
576     fn(af.GetOffset());
577   }
578 }
579 
EncodedSize(uint64_t n)580 size_t EncodedSize(uint64_t n) {
581   if (n == 0) return 1;
582   return 1 + static_cast<size_t>(art::MostSignificantBit(n)) / 7;
583 }
584 
585 // Returns all the references that `*obj` (an object of type `*klass`) is holding.
GetReferences(art::mirror::Object * obj,art::mirror::Class * klass,bool emit_field_ids)586 std::vector<std::pair<std::string, art::mirror::Object*>> GetReferences(art::mirror::Object* obj,
587                                                                         art::mirror::Class* klass,
588                                                                         bool emit_field_ids)
589     REQUIRES_SHARED(art::Locks::mutator_lock_) {
590   std::vector<std::pair<std::string, art::mirror::Object*>> referred_objects;
591   ReferredObjectsFinder objf(&referred_objects, emit_field_ids);
592 
593   uint32_t klass_flags = klass->GetClassFlags();
594   if (klass_flags != art::mirror::kClassFlagNormal &&
595       klass_flags != art::mirror::kClassFlagSoftReference &&
596       klass_flags != art::mirror::kClassFlagWeakReference &&
597       klass_flags != art::mirror::kClassFlagFinalizerReference &&
598       klass_flags != art::mirror::kClassFlagPhantomReference) {
599     obj->VisitReferences(objf, art::VoidFunctor());
600   } else {
601     for (art::mirror::Class* cls = klass; cls != nullptr; cls = cls->GetSuperClass().Ptr()) {
602       ForInstanceReferenceField(cls,
603                                 [obj, objf](art::MemberOffset offset) NO_THREAD_SAFETY_ANALYSIS {
604                                   objf(art::ObjPtr<art::mirror::Object>(obj),
605                                        offset,
606                                        /*is_static=*/false);
607                                 });
608     }
609   }
610   return referred_objects;
611 }
612 
613 // Returns the base for delta encoding all the `referred_objects`. If delta
614 // encoding would waste space, returns 0.
EncodeBaseObjId(const std::vector<std::pair<std::string,art::mirror::Object * >> & referred_objects,const art::mirror::Object * min_nonnull_ptr)615 uint64_t EncodeBaseObjId(
616     const std::vector<std::pair<std::string, art::mirror::Object*>>& referred_objects,
617     const art::mirror::Object* min_nonnull_ptr) REQUIRES_SHARED(art::Locks::mutator_lock_) {
618   uint64_t base_obj_id = GetObjectId(min_nonnull_ptr);
619   if (base_obj_id <= 1) {
620     return 0;
621   }
622 
623   // We need to decrement the base for object ids so that we can tell apart
624   // null references.
625   base_obj_id--;
626   uint64_t bytes_saved = 0;
627   for (const auto& p : referred_objects) {
628     art::mirror::Object* referred_obj = p.second;
629     if (!referred_obj) {
630       continue;
631     }
632     uint64_t referred_obj_id = GetObjectId(referred_obj);
633     bytes_saved += EncodedSize(referred_obj_id) - EncodedSize(referred_obj_id - base_obj_id);
634   }
635 
636   // +1 for storing the field id.
637   if (bytes_saved <= EncodedSize(base_obj_id) + 1) {
638     // Subtracting the base ptr gains fewer bytes than it takes to store it.
639     return 0;
640   }
641   return base_obj_id;
642 }
643 
644 // Helper to keep intermediate state while dumping objects and classes from ART into
645 // perfetto.protos.HeapGraph.
646 class HeapGraphDumper {
647  public:
648   // Instances of classes whose name is in `ignored_types` will be ignored.
HeapGraphDumper(const std::vector<std::string> & ignored_types)649   explicit HeapGraphDumper(const std::vector<std::string>& ignored_types)
650       : ignored_types_(ignored_types),
651         reference_field_ids_(std::make_unique<protozero::PackedVarInt>()),
652         reference_object_ids_(std::make_unique<protozero::PackedVarInt>()) {}
653 
654   // Dumps a heap graph from `*runtime` and writes it to `writer`.
Dump(art::Runtime * runtime,Writer & writer)655   void Dump(art::Runtime* runtime, Writer& writer) REQUIRES(art::Locks::mutator_lock_) {
656     DumpRootObjects(runtime, writer);
657 
658     DumpObjects(runtime, writer);
659 
660     WriteInternedData(writer);
661   }
662 
663  private:
664   // Dumps the root objects from `*runtime` to `writer`.
DumpRootObjects(art::Runtime * runtime,Writer & writer)665   void DumpRootObjects(art::Runtime* runtime, Writer& writer)
666       REQUIRES_SHARED(art::Locks::mutator_lock_) {
667     std::map<art::RootType, std::vector<art::mirror::Object*>> root_objects;
668     RootFinder rcf(&root_objects);
669     runtime->VisitRoots(&rcf);
670     std::unique_ptr<protozero::PackedVarInt> object_ids(new protozero::PackedVarInt);
671     for (const auto& p : root_objects) {
672       const art::RootType root_type = p.first;
673       const std::vector<art::mirror::Object*>& children = p.second;
674       perfetto::protos::pbzero::HeapGraphRoot* root_proto = writer.GetHeapGraph()->add_roots();
675       root_proto->set_root_type(ToProtoType(root_type));
676       for (art::mirror::Object* obj : children) {
677         if (writer.will_create_new_packet()) {
678           root_proto->set_object_ids(*object_ids);
679           object_ids->Reset();
680           root_proto = writer.GetHeapGraph()->add_roots();
681           root_proto->set_root_type(ToProtoType(root_type));
682         }
683         object_ids->Append(GetObjectId(obj));
684       }
685       root_proto->set_object_ids(*object_ids);
686       object_ids->Reset();
687     }
688   }
689 
690   // Dumps all the objects from `*runtime` to `writer`.
DumpObjects(art::Runtime * runtime,Writer & writer)691   void DumpObjects(art::Runtime* runtime, Writer& writer) REQUIRES(art::Locks::mutator_lock_) {
692     runtime->GetHeap()->VisitObjectsPaused(
693         [this, &writer](art::mirror::Object* obj)
694             REQUIRES_SHARED(art::Locks::mutator_lock_) { WriteOneObject(obj, writer); });
695   }
696 
697   // Writes all the previously accumulated (while dumping objects and roots) interned data to
698   // `writer`.
WriteInternedData(Writer & writer)699   void WriteInternedData(Writer& writer) {
700     for (const auto& p : interned_locations_) {
701       const std::string& str = p.first;
702       uint64_t id = p.second;
703 
704       perfetto::protos::pbzero::InternedString* location_proto =
705           writer.GetHeapGraph()->add_location_names();
706       location_proto->set_iid(id);
707       location_proto->set_str(reinterpret_cast<const uint8_t*>(str.c_str()), str.size());
708     }
709     for (const auto& p : interned_fields_) {
710       const std::string& str = p.first;
711       uint64_t id = p.second;
712 
713       perfetto::protos::pbzero::InternedString* field_proto =
714           writer.GetHeapGraph()->add_field_names();
715       field_proto->set_iid(id);
716       field_proto->set_str(reinterpret_cast<const uint8_t*>(str.c_str()), str.size());
717     }
718   }
719 
720   // Writes `*obj` into `writer`.
WriteOneObject(art::mirror::Object * obj,Writer & writer)721   void WriteOneObject(art::mirror::Object* obj, Writer& writer)
722       REQUIRES_SHARED(art::Locks::mutator_lock_) {
723     if (obj->IsClass()) {
724       WriteClass(obj->AsClass().Ptr(), writer);
725     }
726 
727     art::mirror::Class* klass = obj->GetClass();
728     uintptr_t class_ptr = reinterpret_cast<uintptr_t>(klass);
729     // We need to synethesize a new type for Class<Foo>, which does not exist
730     // in the runtime. Otherwise, all the static members of all classes would be
731     // attributed to java.lang.Class.
732     if (klass->IsClassClass()) {
733       class_ptr = WriteSyntheticClassFromObj(obj, writer);
734     }
735 
736     if (IsIgnored(obj)) {
737       return;
738     }
739 
740     auto class_id = FindOrAppend(&interned_classes_, class_ptr);
741 
742     uint64_t object_id = GetObjectId(obj);
743     perfetto::protos::pbzero::HeapGraphObject* object_proto = writer.GetHeapGraph()->add_objects();
744     if (prev_object_id_ && prev_object_id_ < object_id) {
745       object_proto->set_id_delta(object_id - prev_object_id_);
746     } else {
747       object_proto->set_id(object_id);
748     }
749     prev_object_id_ = object_id;
750     object_proto->set_type_id(class_id);
751 
752     // Arrays / strings are magic and have an instance dependent size.
753     if (obj->SizeOf() != klass->GetObjectSize()) {
754       object_proto->set_self_size(obj->SizeOf());
755     }
756 
757     const art::gc::Heap* heap = art::Runtime::Current()->GetHeap();
758     const auto* space = heap->FindContinuousSpaceFromObject(obj, /*fail_ok=*/true);
759     auto heap_type = perfetto::protos::pbzero::HeapGraphObject::HEAP_TYPE_APP;
760     if (space != nullptr) {
761       if (space->IsZygoteSpace()) {
762         heap_type = perfetto::protos::pbzero::HeapGraphObject::HEAP_TYPE_ZYGOTE;
763       } else if (space->IsImageSpace() && heap->ObjectIsInBootImageSpace(obj)) {
764         heap_type = perfetto::protos::pbzero::HeapGraphObject::HEAP_TYPE_BOOT_IMAGE;
765       }
766     } else {
767       const auto* los = heap->GetLargeObjectsSpace();
768       if (los->Contains(obj) && los->IsZygoteLargeObject(art::Thread::Current(), obj)) {
769         heap_type = perfetto::protos::pbzero::HeapGraphObject::HEAP_TYPE_ZYGOTE;
770       }
771     }
772     if (heap_type != prev_heap_type_) {
773       object_proto->set_heap_type_delta(heap_type);
774       prev_heap_type_ = heap_type;
775     }
776 
777     FillReferences(obj, klass, object_proto);
778 
779     FillFieldValues(obj, klass, object_proto);
780   }
781 
782   // Writes `*klass` into `writer`.
WriteClass(art::mirror::Class * klass,Writer & writer)783   void WriteClass(art::mirror::Class* klass, Writer& writer)
784       REQUIRES_SHARED(art::Locks::mutator_lock_) {
785     perfetto::protos::pbzero::HeapGraphType* type_proto = writer.GetHeapGraph()->add_types();
786     type_proto->set_id(FindOrAppend(&interned_classes_, reinterpret_cast<uintptr_t>(klass)));
787     type_proto->set_class_name(PrettyType(klass));
788     type_proto->set_location_id(FindOrAppend(&interned_locations_, klass->GetLocation()));
789     type_proto->set_object_size(klass->GetObjectSize());
790     type_proto->set_kind(ProtoClassKind(klass->GetClassFlags()));
791     type_proto->set_classloader_id(GetObjectId(klass->GetClassLoader().Ptr()));
792     if (klass->GetSuperClass().Ptr()) {
793       type_proto->set_superclass_id(FindOrAppend(
794           &interned_classes_, reinterpret_cast<uintptr_t>(klass->GetSuperClass().Ptr())));
795     }
796     ForInstanceReferenceField(
797         klass, [klass, this](art::MemberOffset offset) NO_THREAD_SAFETY_ANALYSIS {
798           auto art_field = art::ArtField::FindInstanceFieldWithOffset(klass, offset.Uint32Value());
799           reference_field_ids_->Append(
800               FindOrAppend(&interned_fields_, art_field->PrettyField(true)));
801         });
802     type_proto->set_reference_field_id(*reference_field_ids_);
803     reference_field_ids_->Reset();
804   }
805 
806   // Creates a fake class that represents a type only used by `*obj` into `writer`.
WriteSyntheticClassFromObj(art::mirror::Object * obj,Writer & writer)807   uintptr_t WriteSyntheticClassFromObj(art::mirror::Object* obj, Writer& writer)
808       REQUIRES_SHARED(art::Locks::mutator_lock_) {
809     CHECK(obj->IsClass());
810     perfetto::protos::pbzero::HeapGraphType* type_proto = writer.GetHeapGraph()->add_types();
811     // All pointers are at least multiples of two, so this way we can make sure
812     // we are not colliding with a real class.
813     uintptr_t class_ptr = reinterpret_cast<uintptr_t>(obj) | 1;
814     auto class_id = FindOrAppend(&interned_classes_, class_ptr);
815     type_proto->set_id(class_id);
816     type_proto->set_class_name(obj->PrettyTypeOf());
817     type_proto->set_location_id(FindOrAppend(&interned_locations_, obj->AsClass()->GetLocation()));
818     return class_ptr;
819   }
820 
821   // Fills `*object_proto` with all the references held by `*obj` (an object of type `*klass`).
FillReferences(art::mirror::Object * obj,art::mirror::Class * klass,perfetto::protos::pbzero::HeapGraphObject * object_proto)822   void FillReferences(art::mirror::Object* obj,
823                       art::mirror::Class* klass,
824                       perfetto::protos::pbzero::HeapGraphObject* object_proto)
825       REQUIRES_SHARED(art::Locks::mutator_lock_) {
826     const uint32_t klass_flags = klass->GetClassFlags();
827     const bool emit_field_ids = klass_flags != art::mirror::kClassFlagObjectArray &&
828                                 klass_flags != art::mirror::kClassFlagNormal &&
829                                 klass_flags != art::mirror::kClassFlagSoftReference &&
830                                 klass_flags != art::mirror::kClassFlagWeakReference &&
831                                 klass_flags != art::mirror::kClassFlagFinalizerReference &&
832                                 klass_flags != art::mirror::kClassFlagPhantomReference;
833     std::vector<std::pair<std::string, art::mirror::Object*>> referred_objects =
834         GetReferences(obj, klass, emit_field_ids);
835 
836     art::mirror::Object* min_nonnull_ptr = FilterIgnoredReferencesAndFindMin(referred_objects);
837 
838     uint64_t base_obj_id = EncodeBaseObjId(referred_objects, min_nonnull_ptr);
839 
840     for (const auto& p : referred_objects) {
841       const std::string& field_name = p.first;
842       art::mirror::Object* referred_obj = p.second;
843       if (emit_field_ids) {
844         reference_field_ids_->Append(FindOrAppend(&interned_fields_, field_name));
845       }
846       uint64_t referred_obj_id = GetObjectId(referred_obj);
847       if (referred_obj_id) {
848         referred_obj_id -= base_obj_id;
849       }
850       reference_object_ids_->Append(referred_obj_id);
851     }
852     if (emit_field_ids) {
853       object_proto->set_reference_field_id(*reference_field_ids_);
854       reference_field_ids_->Reset();
855     }
856     if (base_obj_id) {
857       // The field is called `reference_field_id_base`, but it has always been used as a base for
858       // `reference_object_id`. It should be called `reference_object_id_base`.
859       object_proto->set_reference_field_id_base(base_obj_id);
860     }
861     object_proto->set_reference_object_id(*reference_object_ids_);
862     reference_object_ids_->Reset();
863   }
864 
865   // Iterates all the `referred_objects` and sets all the objects that are supposed to be ignored
866   // to nullptr. Returns the object with the smallest address (ignoring nullptr).
FilterIgnoredReferencesAndFindMin(std::vector<std::pair<std::string,art::mirror::Object * >> & referred_objects) const867   art::mirror::Object* FilterIgnoredReferencesAndFindMin(
868       std::vector<std::pair<std::string, art::mirror::Object*>>& referred_objects) const
869       REQUIRES_SHARED(art::Locks::mutator_lock_) {
870     art::mirror::Object* min_nonnull_ptr = nullptr;
871     for (auto& p : referred_objects) {
872       art::mirror::Object*& referred_obj = p.second;
873       if (referred_obj == nullptr)
874         continue;
875       if (IsIgnored(referred_obj)) {
876         referred_obj = nullptr;
877         continue;
878       }
879       if (min_nonnull_ptr == nullptr || min_nonnull_ptr > referred_obj) {
880         min_nonnull_ptr = referred_obj;
881       }
882     }
883     return min_nonnull_ptr;
884   }
885 
886   // Fills `*object_proto` with the value of a subset of potentially interesting fields of `*obj`
887   // (an object of type `*klass`).
FillFieldValues(art::mirror::Object * obj,art::mirror::Class * klass,perfetto::protos::pbzero::HeapGraphObject * object_proto) const888   void FillFieldValues(art::mirror::Object* obj,
889                        art::mirror::Class* klass,
890                        perfetto::protos::pbzero::HeapGraphObject* object_proto) const
891       REQUIRES_SHARED(art::Locks::mutator_lock_) {
892     if (obj->IsClass() || klass->IsClassClass()) {
893       return;
894     }
895 
896     for (art::mirror::Class* cls = klass; cls != nullptr; cls = cls->GetSuperClass().Ptr()) {
897       if (cls->IsArrayClass()) {
898         continue;
899       }
900 
901       if (cls->DescriptorEquals("Llibcore/util/NativeAllocationRegistry;")) {
902         art::ArtField* af = cls->FindDeclaredInstanceField(
903             "size", art::Primitive::Descriptor(art::Primitive::kPrimLong));
904         if (af) {
905           object_proto->set_native_allocation_registry_size_field(af->GetLong(obj));
906         }
907       }
908     }
909   }
910 
911   // Returns true if `*obj` has a type that's supposed to be ignored.
IsIgnored(art::mirror::Object * obj) const912   bool IsIgnored(art::mirror::Object* obj) const REQUIRES_SHARED(art::Locks::mutator_lock_) {
913     if (obj->IsClass()) {
914       return false;
915     }
916     art::mirror::Class* klass = obj->GetClass();
917     std::string temp;
918     std::string_view name(klass->GetDescriptor(&temp));
919     return std::find(ignored_types_.begin(), ignored_types_.end(), name) != ignored_types_.end();
920   }
921 
922   // Name of classes whose instances should be ignored.
923   const std::vector<std::string> ignored_types_;
924 
925   // Make sure that intern ID 0 (default proto value for a uint64_t) always maps to ""
926   // (default proto value for a string) or to 0 (default proto value for a uint64).
927 
928   // Map from string (the field name) to its index in perfetto.protos.HeapGraph.field_names
929   std::map<std::string, uint64_t> interned_fields_{{"", 0}};
930   // Map from string (the location name) to its index in perfetto.protos.HeapGraph.location_names
931   std::map<std::string, uint64_t> interned_locations_{{"", 0}};
932   // Map from addr (the class pointer) to its id in perfetto.protos.HeapGraph.types
933   std::map<uintptr_t, uint64_t> interned_classes_{{0, 0}};
934 
935   // Temporary buffers: used locally in some methods and then cleared.
936   std::unique_ptr<protozero::PackedVarInt> reference_field_ids_;
937   std::unique_ptr<protozero::PackedVarInt> reference_object_ids_;
938 
939   // Id of the previous object that was dumped. Used for delta encoding.
940   uint64_t prev_object_id_ = 0;
941   // Heap type of the previous object that was dumped. Used for delta encoding.
942   perfetto::protos::pbzero::HeapGraphObject::HeapType prev_heap_type_ =
943       perfetto::protos::pbzero::HeapGraphObject::HEAP_TYPE_UNKNOWN;
944 };
945 
946 // waitpid with a timeout implemented by ~busy-waiting
947 // See b/181031512 for rationale.
BusyWaitpid(pid_t pid,uint32_t timeout_ms)948 void BusyWaitpid(pid_t pid, uint32_t timeout_ms) {
949   for (size_t i = 0;; ++i) {
950     if (i == timeout_ms) {
951       // The child hasn't exited.
952       // Give up and SIGKILL it. The next waitpid should succeed.
953       LOG(ERROR) << "perfetto_hprof child timed out. Sending SIGKILL.";
954       kill(pid, SIGKILL);
955     }
956     int stat_loc;
957     pid_t wait_result = waitpid(pid, &stat_loc, WNOHANG);
958     if (wait_result == -1 && errno != EINTR) {
959       if (errno != ECHILD) {
960         // This hopefully never happens (should only be EINVAL).
961         PLOG(FATAL_WITHOUT_ABORT) << "waitpid";
962       }
963       // If we get ECHILD, the parent process was handling SIGCHLD, or did a wildcard wait.
964       // The child is no longer here either way, so that's good enough for us.
965       break;
966     } else if (wait_result > 0) {
967       break;
968     } else {  // wait_result == 0 || errno == EINTR.
969       usleep(1000);
970     }
971   }
972 }
973 
974 enum class ResumeParentPolicy {
975   IMMEDIATELY,
976   DEFERRED
977 };
978 
ForkUnderThreadListLock(art::Thread * self)979 pid_t ForkUnderThreadListLock(art::Thread* self) {
980   art::MutexLock lk(self, *art::Locks::thread_list_lock_);
981   return fork();
982 }
983 
ForkAndRun(art::Thread * self,ResumeParentPolicy resume_parent_policy,const std::function<void (pid_t child)> & parent_runnable,const std::function<void (pid_t parent,uint64_t timestamp)> & child_runnable)984 void ForkAndRun(art::Thread* self,
985                 ResumeParentPolicy resume_parent_policy,
986                 const std::function<void(pid_t child)>& parent_runnable,
987                 const std::function<void(pid_t parent, uint64_t timestamp)>& child_runnable) {
988   pid_t parent_pid = getpid();
989   LOG(INFO) << "forking for " << parent_pid;
990   // Need to take a heap dump while GC isn't running. See the comment in
991   // Heap::VisitObjects(). Also we need the critical section to avoid visiting
992   // the same object twice. See b/34967844.
993   //
994   // We need to do this before the fork, because otherwise it can deadlock
995   // waiting for the GC, as all other threads get terminated by the clone, but
996   // their locks are not released.
997   // We must also avoid any logd logging actions on the forked process; art LogdLoggerLocked
998   // serializes logging from different threads via a mutex.
999   // This does not perfectly solve all fork-related issues, as there could still be threads that
1000   // are unaffected by ScopedSuspendAll and in a non-fork-friendly situation
1001   // (e.g. inside a malloc holding a lock). This situation is quite rare, and in that case we will
1002   // hit the watchdog in the grand-child process if it gets stuck.
1003   std::optional<art::gc::ScopedGCCriticalSection> gcs(std::in_place, self, art::gc::kGcCauseHprof,
1004                                                       art::gc::kCollectorTypeHprof);
1005 
1006   std::optional<art::ScopedSuspendAll> ssa(std::in_place, __FUNCTION__, /* long_suspend=*/ true);
1007 
1008   // Optimistically get the thread_list_lock_ to avoid the child process deadlocking
1009   pid_t pid = ForkUnderThreadListLock(self);
1010   if (pid == -1) {
1011     // Fork error.
1012     PLOG(ERROR) << "fork";
1013     return;
1014   }
1015   if (pid != 0) {
1016     // Parent
1017     if (resume_parent_policy == ResumeParentPolicy::IMMEDIATELY) {
1018       // Stop the thread suspension as soon as possible to allow the rest of the application to
1019       // continue while we waitpid here.
1020       ssa.reset();
1021       gcs.reset();
1022     }
1023     parent_runnable(pid);
1024     if (resume_parent_policy != ResumeParentPolicy::IMMEDIATELY) {
1025       ssa.reset();
1026       gcs.reset();
1027     }
1028     return;
1029   }
1030   // The following code is only executed by the child of the original process.
1031   // Uninstall signal handler, so we don't trigger a profile on it.
1032   if (sigaction(kJavaHeapprofdSignal, &g_orig_act, nullptr) != 0) {
1033     close(g_signal_pipe_fds[0]);
1034     close(g_signal_pipe_fds[1]);
1035     PLOG(FATAL) << "Failed to sigaction";
1036     return;
1037   }
1038 
1039   uint64_t ts = GetCurrentBootClockNs();
1040   child_runnable(parent_pid, ts);
1041   // Prevent the `atexit` handlers from running. We do not want to call cleanup
1042   // functions the parent process has registered.
1043   art::FastExit(0);
1044 }
1045 
WriteHeapPackets(pid_t parent_pid,uint64_t timestamp)1046 void WriteHeapPackets(pid_t parent_pid, uint64_t timestamp) {
1047   JavaHprofDataSource::Trace(
1048       [parent_pid, timestamp](JavaHprofDataSource::TraceContext ctx)
1049           NO_THREAD_SAFETY_ANALYSIS {
1050             bool dump_smaps;
1051             std::vector<std::string> ignored_types;
1052             {
1053               auto ds = ctx.GetDataSourceLocked();
1054               if (!ds || !ds->enabled()) {
1055                 if (ds) ds->Finish();
1056                 LOG(INFO) << "skipping irrelevant data source.";
1057                 return;
1058               }
1059               dump_smaps = ds->dump_smaps();
1060               ignored_types = ds->ignored_types();
1061             }
1062             art::ScopedTrace trace("ART heap dump for " + std::to_string(parent_pid));
1063             if (dump_smaps) {
1064               DumpSmaps(&ctx);
1065             }
1066             Writer writer(parent_pid, &ctx, timestamp);
1067             HeapGraphDumper dumper(ignored_types);
1068 
1069             dumper.Dump(art::Runtime::Current(), writer);
1070 
1071             writer.Finalize();
1072             ctx.Flush([] {
1073               art::MutexLock lk(JavaHprofDataSource::art_thread(), GetStateMutex());
1074               g_state = State::kEnd;
1075               GetStateCV().Broadcast(JavaHprofDataSource::art_thread());
1076             });
1077             // Wait for the Flush that will happen on the Perfetto thread.
1078             {
1079               art::MutexLock lk(JavaHprofDataSource::art_thread(), GetStateMutex());
1080               while (g_state != State::kEnd) {
1081                 GetStateCV().Wait(JavaHprofDataSource::art_thread());
1082               }
1083             }
1084             {
1085               auto ds = ctx.GetDataSourceLocked();
1086               if (ds) {
1087                 ds->Finish();
1088               } else {
1089                 LOG(ERROR) << "datasource timed out (duration_ms + datasource_stop_timeout_ms) "
1090                               "before dump finished";
1091               }
1092             }
1093           });
1094 }
1095 
DumpPerfetto(art::Thread * self)1096 void DumpPerfetto(art::Thread* self) {
1097   ForkAndRun(
1098     self,
1099     ResumeParentPolicy::IMMEDIATELY,
1100     // parent thread
1101     [](pid_t child) {
1102       // Busy waiting here will introduce some extra latency, but that is okay because we have
1103       // already unsuspended all other threads. This runs on the perfetto_hprof_listener, which
1104       // is not needed for progress of the app itself.
1105       // We daemonize the child process, so effectively we only need to wait
1106       // for it to fork and exit.
1107       BusyWaitpid(child, 1000);
1108     },
1109     // child thread
1110     [self](pid_t dumped_pid, uint64_t timestamp) {
1111       // Daemon creates a new process that is the grand-child of the original process, and exits.
1112       if (daemon(0, 0) == -1) {
1113         PLOG(FATAL) << "daemon";
1114       }
1115       // The following code is only executed by the grand-child of the original process.
1116 
1117       // Make sure that this is the first thing we do after forking, so if anything
1118       // below hangs, the fork will go away from the watchdog.
1119       ArmWatchdogOrDie();
1120       SetupDataSource("android.java_hprof", false);
1121       WaitForDataSource(self);
1122       WriteHeapPackets(dumped_pid, timestamp);
1123       LOG(INFO) << "finished dumping heap for " << dumped_pid;
1124     });
1125 }
1126 
DumpPerfettoOutOfMemory()1127 void DumpPerfettoOutOfMemory() REQUIRES_SHARED(art::Locks::mutator_lock_) {
1128   art::Thread* self = art::Thread::Current();
1129   if (!self) {
1130     LOG(FATAL_WITHOUT_ABORT) << "no thread in DumpPerfettoOutOfMemory";
1131     return;
1132   }
1133 
1134   // Ensure that there is an active, armed tracing session
1135   uint32_t session_cnt =
1136       android::base::GetUintProperty<uint32_t>("traced.oome_heap_session.count", 0);
1137   if (session_cnt == 0) {
1138     return;
1139   }
1140   {
1141     // OutOfMemoryErrors are reentrant, make sure we do not fork and process
1142     // more than once.
1143     art::MutexLock lk(self, GetStateMutex());
1144     if (g_oome_triggered) {
1145       return;
1146     }
1147     g_oome_triggered = true;
1148     g_oome_sessions_pending = session_cnt;
1149   }
1150 
1151   art::ScopedThreadSuspension sts(self, art::ThreadState::kSuspended);
1152   // If we fork & resume the original process execution it will most likely exit
1153   // ~immediately due to the OOME error thrown. When the system detects that
1154   // that, it will cleanup by killing all processes in the cgroup (including
1155   // the process we just forked).
1156   // We need to avoid the race between the heap dump and the process group
1157   // cleanup, and the only way to do this is to avoid resuming the original
1158   // process until the heap dump is complete.
1159   // Given we are already about to crash anyway, the diagnostic data we get
1160   // outweighs the cost of introducing some latency.
1161   ForkAndRun(
1162     self,
1163     ResumeParentPolicy::DEFERRED,
1164     // parent process
1165     [](pid_t child) {
1166       // waitpid to reap the zombie
1167       // we are explicitly waiting for the child to exit
1168       // The reason for the timeout on top of the watchdog is that it is
1169       // possible (albeit unlikely) that even the watchdog will fail to be
1170       // activated in the case of an atfork handler.
1171       BusyWaitpid(child, kWatchdogTimeoutSec * 1000);
1172     },
1173     // child process
1174     [self](pid_t dumped_pid, uint64_t timestamp) {
1175       ArmWatchdogOrDie();
1176       art::SetThreadName("perfetto_oome_hprof");
1177       art::ScopedTrace trace("perfetto_hprof oome");
1178       SetupDataSource("android.java_hprof.oom", true);
1179       perfetto::Tracing::ActivateTriggers({"com.android.telemetry.art-outofmemory"}, 500);
1180 
1181       // A pre-armed tracing session might not exist, so we should wait for a
1182       // limited amount of time before we decide to let the execution continue.
1183       if (!TimedWaitForDataSource(self, 1000)) {
1184         LOG(INFO) << "OOME hprof timeout (state " << g_state << ")";
1185         return;
1186       }
1187       WriteHeapPackets(dumped_pid, timestamp);
1188       LOG(INFO) << "OOME hprof complete for " << dumped_pid;
1189     });
1190 }
1191 
1192 // The plugin initialization function.
ArtPlugin_Initialize()1193 extern "C" bool ArtPlugin_Initialize() {
1194   if (art::Runtime::Current() == nullptr) {
1195     return false;
1196   }
1197   art::Thread* self = art::Thread::Current();
1198   {
1199     art::MutexLock lk(self, GetStateMutex());
1200     if (g_state != State::kUninitialized) {
1201       LOG(ERROR) << "perfetto_hprof already initialized. state: " << g_state;
1202       return false;
1203     }
1204     g_state = State::kWaitForListener;
1205   }
1206 
1207   if (pipe2(g_signal_pipe_fds, O_CLOEXEC) == -1) {
1208     PLOG(ERROR) << "Failed to pipe";
1209     return false;
1210   }
1211 
1212   struct sigaction act = {};
1213   act.sa_flags = SA_SIGINFO | SA_RESTART;
1214   act.sa_sigaction = [](int, siginfo_t* si, void*) {
1215     requested_tracing_session_id = si->si_value.sival_int;
1216     if (write(g_signal_pipe_fds[1], kByte, sizeof(kByte)) == -1) {
1217       PLOG(ERROR) << "Failed to trigger heap dump";
1218     }
1219   };
1220 
1221   // TODO(fmayer): We can probably use the SignalCatcher thread here to not
1222   // have an idle thread.
1223   if (sigaction(kJavaHeapprofdSignal, &act, &g_orig_act) != 0) {
1224     close(g_signal_pipe_fds[0]);
1225     close(g_signal_pipe_fds[1]);
1226     PLOG(ERROR) << "Failed to sigaction";
1227     return false;
1228   }
1229 
1230   std::thread th([] {
1231     art::Runtime* runtime = art::Runtime::Current();
1232     if (!runtime) {
1233       LOG(FATAL_WITHOUT_ABORT) << "no runtime in perfetto_hprof_listener";
1234       return;
1235     }
1236     if (!runtime->AttachCurrentThread("perfetto_hprof_listener", /*as_daemon=*/ true,
1237                                       runtime->GetSystemThreadGroup(), /*create_peer=*/ false)) {
1238       LOG(ERROR) << "failed to attach thread.";
1239       {
1240         art::MutexLock lk(nullptr, GetStateMutex());
1241         g_state = State::kUninitialized;
1242         GetStateCV().Broadcast(nullptr);
1243       }
1244 
1245       return;
1246     }
1247     art::Thread* self = art::Thread::Current();
1248     if (!self) {
1249       LOG(FATAL_WITHOUT_ABORT) << "no thread in perfetto_hprof_listener";
1250       return;
1251     }
1252     {
1253       art::MutexLock lk(self, GetStateMutex());
1254       if (g_state == State::kWaitForListener) {
1255         g_state = State::kWaitForStart;
1256         GetStateCV().Broadcast(self);
1257       }
1258     }
1259     char buf[1];
1260     for (;;) {
1261       int res;
1262       do {
1263         res = read(g_signal_pipe_fds[0], buf, sizeof(buf));
1264       } while (res == -1 && errno == EINTR);
1265 
1266       if (res <= 0) {
1267         if (res == -1) {
1268           PLOG(ERROR) << "failed to read";
1269         }
1270         close(g_signal_pipe_fds[0]);
1271         return;
1272       }
1273 
1274       perfetto_hprof::DumpPerfetto(self);
1275     }
1276   });
1277   th.detach();
1278 
1279   // Register the OOM error handler.
1280   art::Runtime::Current()->SetOutOfMemoryErrorHook(perfetto_hprof::DumpPerfettoOutOfMemory);
1281 
1282   return true;
1283 }
1284 
ArtPlugin_Deinitialize()1285 extern "C" bool ArtPlugin_Deinitialize() {
1286   art::Runtime::Current()->SetOutOfMemoryErrorHook(nullptr);
1287 
1288   if (sigaction(kJavaHeapprofdSignal, &g_orig_act, nullptr) != 0) {
1289     PLOG(ERROR) << "failed to reset signal handler";
1290     // We cannot close the pipe if the signal handler wasn't unregistered,
1291     // to avoid receiving SIGPIPE.
1292     return false;
1293   }
1294   close(g_signal_pipe_fds[1]);
1295 
1296   art::Thread* self = art::Thread::Current();
1297   art::MutexLock lk(self, GetStateMutex());
1298   // Wait until after the thread was registered to the runtime. This is so
1299   // we do not attempt to register it with the runtime after it had been torn
1300   // down (ArtPlugin_Deinitialize gets called in the Runtime dtor).
1301   while (g_state == State::kWaitForListener) {
1302     GetStateCV().Wait(art::Thread::Current());
1303   }
1304   g_state = State::kUninitialized;
1305   GetStateCV().Broadcast(self);
1306   return true;
1307 }
1308 
1309 }  // namespace perfetto_hprof
1310 
1311 namespace perfetto {
1312 
1313 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(perfetto_hprof::JavaHprofDataSource);
1314 
1315 }
1316