xref: /aosp_15_r20/external/perfetto/src/trace_processor/importers/proto/heap_graph_tracker.cc (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/importers/proto/heap_graph_tracker.h"
18 
19 #include <algorithm>
20 #include <array>
21 #include <cinttypes>
22 #include <cstdint>
23 #include <cstring>
24 #include <deque>
25 #include <map>
26 #include <memory>
27 #include <optional>
28 #include <set>
29 #include <string>
30 #include <tuple>
31 #include <utility>
32 #include <vector>
33 
34 #include "perfetto/base/logging.h"
35 #include "perfetto/ext/base/string_view.h"
36 #include "protos/perfetto/trace/profiling/heap_graph.pbzero.h"
37 #include "src/trace_processor/storage/stats.h"
38 #include "src/trace_processor/storage/trace_storage.h"
39 #include "src/trace_processor/tables/profiler_tables_py.h"
40 #include "src/trace_processor/util/profiler_util.h"
41 
42 namespace perfetto::trace_processor {
43 
44 namespace {
45 
46 using ClassTable = tables::HeapGraphClassTable;
47 using ObjectTable = tables::HeapGraphObjectTable;
48 using ReferenceTable = tables::HeapGraphReferenceTable;
49 
50 // Iterates all the references owned by the object `id`.
51 //
52 // Calls bool(*fn)(ObjectTable::RowReference) with the each row
53 // from the `storage.heap_graph_reference()` table associated to the |object|.
54 // When `fn` returns false (or when there are no more rows owned by |object|),
55 // stops the iteration.
56 template <typename F>
ForReferenceSet(TraceStorage * storage,ObjectTable::ConstRowReference object,F fn)57 void ForReferenceSet(TraceStorage* storage,
58                      ObjectTable::ConstRowReference object,
59                      F fn) {
60   std::optional<uint32_t> reference_set_id = object.reference_set_id();
61   if (!reference_set_id)
62     return;
63 
64   auto* ref = storage->mutable_heap_graph_reference_table();
65   Query q;
66   q.constraints = {ref->reference_set_id().eq(*reference_set_id)};
67   auto it = ref->FilterToIterator(q);
68 
69   for (; it; ++it) {
70     if (!fn(it.row_reference()))
71       break;
72   }
73 }
74 
75 struct ClassDescriptor {
76   StringId name;
77   std::optional<StringId> location;
78 
operator <perfetto::trace_processor::__anon708abdb80111::ClassDescriptor79   bool operator<(const ClassDescriptor& other) const {
80     return std::tie(name, location) < std::tie(other.name, other.location);
81   }
82 };
83 
GetClassDescriptor(const TraceStorage & storage,ObjectTable::Id obj_id)84 ClassDescriptor GetClassDescriptor(const TraceStorage& storage,
85                                    ObjectTable::Id obj_id) {
86   auto obj_row_ref = *storage.heap_graph_object_table().FindById(obj_id);
87   auto type_row_ref =
88       *storage.heap_graph_class_table().FindById(obj_row_ref.type_id());
89   return {type_row_ref.name(), type_row_ref.location()};
90 }
91 
GetReferredObj(const TraceStorage & storage,uint32_t ref_set_id,const std::string & field_name)92 std::optional<ObjectTable::Id> GetReferredObj(const TraceStorage& storage,
93                                               uint32_t ref_set_id,
94                                               const std::string& field_name) {
95   const auto& refs_tbl = storage.heap_graph_reference_table();
96   Query q;
97   q.constraints = {refs_tbl.reference_set_id().eq(ref_set_id),
98                    refs_tbl.field_name().eq(NullTermStringView(field_name))};
99   auto refs_it = refs_tbl.FilterToIterator(q);
100   if (!refs_it) {
101     return std::nullopt;
102   }
103   return refs_it.owned_id();
104 }
105 
106 // Maps from normalized class name and location, to superclass.
107 std::map<ClassDescriptor, ClassDescriptor>
BuildSuperclassMap(UniquePid upid,int64_t ts,TraceStorage * storage)108 BuildSuperclassMap(UniquePid upid, int64_t ts, TraceStorage* storage) {
109   std::map<ClassDescriptor, ClassDescriptor> superclass_map;
110 
111   // Resolve superclasses by iterating heap graph objects and identifying the
112   // superClass field.
113   const auto& objects_tbl = storage->heap_graph_object_table();
114   Query q;
115   q.constraints = {objects_tbl.upid().eq(upid),
116                    objects_tbl.graph_sample_ts().eq(ts)};
117   auto obj_it = objects_tbl.FilterToIterator(q);
118   for (; obj_it; ++obj_it) {
119     auto obj_id = obj_it.id();
120     auto class_descriptor = GetClassDescriptor(*storage, obj_id);
121     auto normalized =
122         GetNormalizedType(storage->GetString(class_descriptor.name));
123     // superClass ptrs are stored on the static class objects
124     // ignore arrays (as they are generated objects)
125     if (!normalized.is_static_class || normalized.number_of_arrays > 0)
126       continue;
127 
128     auto opt_ref_set_id = obj_it.reference_set_id();
129     if (!opt_ref_set_id)
130       continue;
131     auto super_obj_id =
132         GetReferredObj(*storage, *opt_ref_set_id, "java.lang.Class.superClass");
133     if (!super_obj_id) {
134       // This is expected to be missing for Object and primitive types
135       continue;
136     }
137 
138     // Lookup the super obj type id
139     auto super_class_descriptor = GetClassDescriptor(*storage, *super_obj_id);
140     auto super_class_name =
141         NormalizeTypeName(storage->GetString(super_class_descriptor.name));
142     StringId super_class_id = storage->InternString(super_class_name);
143     StringId class_id = storage->InternString(normalized.name);
144     superclass_map[{class_id, class_descriptor.location}] = {
145         super_class_id, super_class_descriptor.location};
146   }
147   return superclass_map;
148 }
149 
150 // Extract the size from `nar_size`, which is the value of a
151 // libcore.util.NativeAllocationRegistry.size field: it encodes the size, but
152 // uses the least significant bit to represent the source of the allocation.
GetSizeFromNativeAllocationRegistry(int64_t nar_size)153 int64_t GetSizeFromNativeAllocationRegistry(int64_t nar_size) {
154   constexpr uint64_t kIsMalloced = 1;
155   return static_cast<int64_t>(static_cast<uint64_t>(nar_size) & ~kIsMalloced);
156 }
157 
158 // A given object can be a heap root in different ways. Ensure analysis is
159 // consistent.
160 constexpr std::array<protos::pbzero::HeapGraphRoot::Type, 3>
161     kRootTypePrecedence = {
162         protos::pbzero::HeapGraphRoot::ROOT_STICKY_CLASS,
163         protos::pbzero::HeapGraphRoot::ROOT_JNI_GLOBAL,
164         protos::pbzero::HeapGraphRoot::ROOT_JNI_LOCAL,
165 };
166 }  // namespace
167 
GetStaticClassTypeName(base::StringView type)168 std::optional<base::StringView> GetStaticClassTypeName(base::StringView type) {
169   static const base::StringView kJavaClassTemplate("java.lang.Class<");
170   if (!type.empty() && type.at(type.size() - 1) == '>' &&
171       type.substr(0, kJavaClassTemplate.size()) == kJavaClassTemplate) {
172     return type.substr(kJavaClassTemplate.size(),
173                        type.size() - kJavaClassTemplate.size() - 1);
174   }
175   return {};
176 }
177 
NumberOfArrays(base::StringView type)178 size_t NumberOfArrays(base::StringView type) {
179   if (type.size() < 2)
180     return 0;
181 
182   size_t arrays = 0;
183   while (type.size() >= 2 * (arrays + 1) &&
184          memcmp(type.end() - 2 * (arrays + 1), "[]", 2) == 0) {
185     arrays++;
186   }
187   return arrays;
188 }
189 
GetNormalizedType(base::StringView type)190 NormalizedType GetNormalizedType(base::StringView type) {
191   auto static_class_type_name = GetStaticClassTypeName(type);
192   if (static_class_type_name.has_value()) {
193     type = static_class_type_name.value();
194   }
195   size_t number_of_arrays = NumberOfArrays(type);
196   return {base::StringView(type.data(), type.size() - number_of_arrays * 2),
197           static_class_type_name.has_value(), number_of_arrays};
198 }
199 
NormalizeTypeName(base::StringView type)200 base::StringView NormalizeTypeName(base::StringView type) {
201   return GetNormalizedType(type).name;
202 }
203 
DenormalizeTypeName(NormalizedType normalized,base::StringView deobfuscated_type_name)204 std::string DenormalizeTypeName(NormalizedType normalized,
205                                 base::StringView deobfuscated_type_name) {
206   std::string result = deobfuscated_type_name.ToStdString();
207   for (size_t i = 0; i < normalized.number_of_arrays; ++i) {
208     result += "[]";
209   }
210   if (normalized.is_static_class) {
211     result = "java.lang.Class<" + result + ">";
212   }
213   return result;
214 }
215 
HeapGraphTracker(TraceStorage * storage)216 HeapGraphTracker::HeapGraphTracker(TraceStorage* storage)
217     : storage_(storage),
218       cleaner_thunk_str_id_(storage_->InternString("sun.misc.Cleaner.thunk")),
219       referent_str_id_(
220           storage_->InternString("java.lang.ref.Reference.referent")),
221       cleaner_thunk_this0_str_id_(storage_->InternString(
222           "libcore.util.NativeAllocationRegistry$CleanerThunk.this$0")),
223       native_size_str_id_(
224           storage_->InternString("libcore.util.NativeAllocationRegistry.size")),
225       cleaner_next_str_id_(storage_->InternString("sun.misc.Cleaner.next")) {
226   for (size_t i = 0; i < root_type_string_ids_.size(); i++) {
227     auto val = static_cast<protos::pbzero::HeapGraphRoot::Type>(i);
228     auto str_view =
229         base::StringView(protos::pbzero::HeapGraphRoot_Type_Name(val));
230     root_type_string_ids_[i] = storage_->InternString(str_view);
231   }
232 
233   for (size_t i = 0; i < type_kind_string_ids_.size(); i++) {
234     auto val = static_cast<protos::pbzero::HeapGraphType::Kind>(i);
235     auto str_view =
236         base::StringView(protos::pbzero::HeapGraphType_Kind_Name(val));
237     type_kind_string_ids_[i] = storage_->InternString(str_view);
238   }
239 }
240 
GetOrCreateSequence(uint32_t seq_id)241 HeapGraphTracker::SequenceState& HeapGraphTracker::GetOrCreateSequence(
242     uint32_t seq_id) {
243   return sequence_state_[seq_id];
244 }
245 
SetPidAndTimestamp(SequenceState * sequence_state,UniquePid upid,int64_t ts)246 bool HeapGraphTracker::SetPidAndTimestamp(SequenceState* sequence_state,
247                                           UniquePid upid,
248                                           int64_t ts) {
249   if (sequence_state->current_upid != 0 &&
250       sequence_state->current_upid != upid) {
251     storage_->IncrementStats(stats::heap_graph_non_finalized_graph);
252     return false;
253   }
254   if (sequence_state->current_ts != 0 && sequence_state->current_ts != ts) {
255     storage_->IncrementStats(stats::heap_graph_non_finalized_graph);
256     return false;
257   }
258   sequence_state->current_upid = upid;
259   sequence_state->current_ts = ts;
260   return true;
261 }
262 
GetOrInsertObject(SequenceState * sequence_state,uint64_t object_id)263 ObjectTable::RowReference HeapGraphTracker::GetOrInsertObject(
264     SequenceState* sequence_state,
265     uint64_t object_id) {
266   auto* object_table = storage_->mutable_heap_graph_object_table();
267   auto* ptr = sequence_state->object_id_to_db_row.Find(object_id);
268   if (!ptr) {
269     auto id_and_row = object_table->Insert({sequence_state->current_upid,
270                                             sequence_state->current_ts,
271                                             -1,
272                                             0,
273                                             /*reference_set_id=*/std::nullopt,
274                                             /*reachable=*/0,
275                                             {},
276                                             /*root_type=*/std::nullopt,
277                                             /*root_distance*/ -1});
278     bool inserted;
279     std::tie(ptr, inserted) = sequence_state->object_id_to_db_row.Insert(
280         object_id, id_and_row.row_number);
281   }
282   return ptr->ToRowReference(object_table);
283 }
284 
GetOrInsertType(SequenceState * sequence_state,uint64_t type_id)285 ClassTable::RowReference HeapGraphTracker::GetOrInsertType(
286     SequenceState* sequence_state,
287     uint64_t type_id) {
288   auto* class_table = storage_->mutable_heap_graph_class_table();
289   auto* ptr = sequence_state->type_id_to_db_row.Find(type_id);
290   if (!ptr) {
291     auto id_and_row =
292         class_table->Insert({StringId(), std::nullopt, std::nullopt});
293     bool inserted;
294     std::tie(ptr, inserted) = sequence_state->type_id_to_db_row.Insert(
295         type_id, id_and_row.row_number);
296   }
297   return ptr->ToRowReference(class_table);
298 }
299 
AddObject(uint32_t seq_id,UniquePid upid,int64_t ts,SourceObject obj)300 void HeapGraphTracker::AddObject(uint32_t seq_id,
301                                  UniquePid upid,
302                                  int64_t ts,
303                                  SourceObject obj) {
304   SequenceState& sequence_state = GetOrCreateSequence(seq_id);
305 
306   if (!SetPidAndTimestamp(&sequence_state, upid, ts))
307     return;
308 
309   sequence_state.last_object_id = obj.object_id;
310 
311   ObjectTable::RowReference owner_row_ref =
312       GetOrInsertObject(&sequence_state, obj.object_id);
313   ClassTable::RowReference type_row_ref =
314       GetOrInsertType(&sequence_state, obj.type_id);
315 
316   ClassTable::Id type_id = type_row_ref.id();
317 
318   owner_row_ref.set_self_size(static_cast<int64_t>(obj.self_size));
319   owner_row_ref.set_type_id(type_id);
320 
321   if (obj.self_size == 0) {
322     sequence_state.deferred_size_objects_for_type_[type_id].push_back(
323         owner_row_ref.ToRowNumber());
324   }
325 
326   uint32_t reference_set_id =
327       storage_->heap_graph_reference_table().row_count();
328   bool any_references = false;
329 
330   ObjectTable::Id owner_id = owner_row_ref.id();
331   for (size_t i = 0; i < obj.referred_objects.size(); ++i) {
332     uint64_t owned_object_id = obj.referred_objects[i];
333     // This is true for unset reference fields.
334     std::optional<ObjectTable::RowReference> owned_row_ref;
335     if (owned_object_id != 0)
336       owned_row_ref = GetOrInsertObject(&sequence_state, owned_object_id);
337 
338     auto ref_id_and_row =
339         storage_->mutable_heap_graph_reference_table()->Insert(
340             {reference_set_id,
341              owner_id,
342              owned_row_ref ? std::make_optional(owned_row_ref->id())
343                            : std::nullopt,
344              {},
345              {},
346              /*deobfuscated_field_name=*/std::nullopt});
347     if (!obj.field_name_ids.empty()) {
348       sequence_state.references_for_field_name_id[obj.field_name_ids[i]]
349           .push_back(ref_id_and_row.row_number);
350     }
351     any_references = true;
352   }
353   if (any_references) {
354     owner_row_ref.set_reference_set_id(reference_set_id);
355     if (obj.field_name_ids.empty()) {
356       sequence_state.deferred_reference_objects_for_type_[type_id].push_back(
357           owner_row_ref.ToRowNumber());
358     }
359   }
360 
361   if (obj.native_allocation_registry_size.has_value()) {
362     sequence_state.nar_size_by_obj_id[owner_id] =
363         *obj.native_allocation_registry_size;
364   }
365 }
366 
AddRoot(uint32_t seq_id,UniquePid upid,int64_t ts,SourceRoot root)367 void HeapGraphTracker::AddRoot(uint32_t seq_id,
368                                UniquePid upid,
369                                int64_t ts,
370                                SourceRoot root) {
371   SequenceState& sequence_state = GetOrCreateSequence(seq_id);
372   if (!SetPidAndTimestamp(&sequence_state, upid, ts))
373     return;
374 
375   sequence_state.current_roots.emplace_back(std::move(root));
376 }
377 
AddInternedLocationName(uint32_t seq_id,uint64_t intern_id,StringId strid)378 void HeapGraphTracker::AddInternedLocationName(uint32_t seq_id,
379                                                uint64_t intern_id,
380                                                StringId strid) {
381   SequenceState& sequence_state = GetOrCreateSequence(seq_id);
382   sequence_state.interned_location_names.emplace(intern_id, strid);
383 }
384 
AddInternedType(uint32_t seq_id,uint64_t intern_id,StringId strid,std::optional<uint64_t> location_id,uint64_t object_size,std::vector<uint64_t> field_name_ids,uint64_t superclass_id,uint64_t classloader_id,bool no_fields,protos::pbzero::HeapGraphType::Kind kind)385 void HeapGraphTracker::AddInternedType(
386     uint32_t seq_id,
387     uint64_t intern_id,
388     StringId strid,
389     std::optional<uint64_t> location_id,
390     uint64_t object_size,
391     std::vector<uint64_t> field_name_ids,
392     uint64_t superclass_id,
393     uint64_t classloader_id,
394     bool no_fields,
395     protos::pbzero::HeapGraphType::Kind kind) {
396   SequenceState& sequence_state = GetOrCreateSequence(seq_id);
397   InternedType& type = sequence_state.interned_types[intern_id];
398   type.name = strid;
399   type.location_id = location_id;
400   type.object_size = object_size;
401   type.field_name_ids = std::move(field_name_ids);
402   type.superclass_id = superclass_id;
403   type.classloader_id = classloader_id;
404   type.no_fields = no_fields;
405   type.kind = kind;
406 }
407 
AddInternedFieldName(uint32_t seq_id,uint64_t intern_id,base::StringView str)408 void HeapGraphTracker::AddInternedFieldName(uint32_t seq_id,
409                                             uint64_t intern_id,
410                                             base::StringView str) {
411   SequenceState& sequence_state = GetOrCreateSequence(seq_id);
412   size_t space = str.find(' ');
413   base::StringView type;
414   if (space != base::StringView::npos) {
415     type = str.substr(0, space);
416     str = str.substr(space + 1);
417   }
418   StringId field_name = storage_->InternString(str);
419   StringId type_name = storage_->InternString(type);
420 
421   sequence_state.interned_fields.Insert(intern_id,
422                                         InternedField{field_name, type_name});
423 
424   auto it = sequence_state.references_for_field_name_id.find(intern_id);
425   if (it != sequence_state.references_for_field_name_id.end()) {
426     auto* hgr = storage_->mutable_heap_graph_reference_table();
427     for (ReferenceTable::RowNumber reference_row_num : it->second) {
428       auto row_ref = reference_row_num.ToRowReference(hgr);
429       row_ref.set_field_name(field_name);
430       row_ref.set_field_type_name(type_name);
431       field_to_rows_[field_name].emplace_back(reference_row_num);
432     }
433   }
434 }
435 
SetPacketIndex(uint32_t seq_id,uint64_t index)436 void HeapGraphTracker::SetPacketIndex(uint32_t seq_id, uint64_t index) {
437   SequenceState& sequence_state = GetOrCreateSequence(seq_id);
438   bool dropped_packet = false;
439   // perfetto_hprof starts counting at index = 0.
440   if (!sequence_state.prev_index && index != 0) {
441     dropped_packet = true;
442   }
443 
444   if (sequence_state.prev_index && *sequence_state.prev_index + 1 != index) {
445     dropped_packet = true;
446   }
447 
448   if (dropped_packet) {
449     sequence_state.truncated = true;
450     if (sequence_state.prev_index) {
451       PERFETTO_ELOG("Missing packets between %" PRIu64 " and %" PRIu64,
452                     *sequence_state.prev_index, index);
453     } else {
454       PERFETTO_ELOG("Invalid first packet index %" PRIu64 " (!= 0)", index);
455     }
456 
457     storage_->IncrementIndexedStats(
458         stats::heap_graph_missing_packet,
459         static_cast<int>(sequence_state.current_upid));
460   }
461   sequence_state.prev_index = index;
462 }
463 
464 // This only works on Android S+ traces. We need to have ingested the whole
465 // profile before calling this function (e.g. in FinalizeProfile).
GetSuperClass(SequenceState * sequence_state,const InternedType * current_type)466 HeapGraphTracker::InternedType* HeapGraphTracker::GetSuperClass(
467     SequenceState* sequence_state,
468     const InternedType* current_type) {
469   if (current_type->superclass_id) {
470     auto it = sequence_state->interned_types.find(current_type->superclass_id);
471     if (it != sequence_state->interned_types.end())
472       return &it->second;
473   }
474   storage_->IncrementIndexedStats(
475       stats::heap_graph_malformed_packet,
476       static_cast<int>(sequence_state->current_upid));
477   return nullptr;
478 }
479 
FinalizeProfile(uint32_t seq_id)480 void HeapGraphTracker::FinalizeProfile(uint32_t seq_id) {
481   SequenceState& sequence_state = GetOrCreateSequence(seq_id);
482   if (sequence_state.truncated) {
483     truncated_graphs_.emplace(
484         std::make_pair(sequence_state.current_upid, sequence_state.current_ts));
485   }
486 
487   // We do this in FinalizeProfile because the interned_location_names get
488   // written at the end of the dump.
489   for (const auto& p : sequence_state.interned_types) {
490     uint64_t id = p.first;
491     const InternedType& interned_type = p.second;
492     std::optional<StringId> location_name;
493     if (interned_type.location_id) {
494       auto it = sequence_state.interned_location_names.find(
495           *interned_type.location_id);
496       if (it == sequence_state.interned_location_names.end()) {
497         storage_->IncrementIndexedStats(
498             stats::heap_graph_invalid_string_id,
499             static_cast<int>(sequence_state.current_upid));
500       } else {
501         location_name = it->second;
502       }
503     }
504     ClassTable::RowReference type_row_ref =
505         GetOrInsertType(&sequence_state, id);
506     ClassTable::Id type_id = type_row_ref.id();
507 
508     auto sz_obj_it =
509         sequence_state.deferred_size_objects_for_type_.find(type_id);
510     if (sz_obj_it != sequence_state.deferred_size_objects_for_type_.end()) {
511       auto* hgo = storage_->mutable_heap_graph_object_table();
512       for (ObjectTable::RowNumber obj_row_num : sz_obj_it->second) {
513         auto obj_row_ref = obj_row_num.ToRowReference(hgo);
514         obj_row_ref.set_self_size(
515             static_cast<int64_t>(interned_type.object_size));
516       }
517       sequence_state.deferred_size_objects_for_type_.erase(sz_obj_it);
518     }
519 
520     auto ref_obj_it =
521         sequence_state.deferred_reference_objects_for_type_.find(type_id);
522     if (ref_obj_it !=
523         sequence_state.deferred_reference_objects_for_type_.end()) {
524       for (ObjectTable::RowNumber obj_row_number : ref_obj_it->second) {
525         auto obj_row_ref = obj_row_number.ToRowReference(
526             storage_->mutable_heap_graph_object_table());
527         const InternedType* current_type = &interned_type;
528         if (interned_type.no_fields) {
529           continue;
530         }
531         size_t field_offset_in_cls = 0;
532         ForReferenceSet(
533             storage_, obj_row_ref,
534             [this, &current_type, &sequence_state,
535              &field_offset_in_cls](ReferenceTable::RowReference ref) {
536               while (current_type && field_offset_in_cls >=
537                                          current_type->field_name_ids.size()) {
538                 size_t prev_type_size = current_type->field_name_ids.size();
539                 current_type = GetSuperClass(&sequence_state, current_type);
540                 field_offset_in_cls -= prev_type_size;
541               }
542 
543               if (!current_type) {
544                 return false;
545               }
546 
547               uint64_t field_id =
548                   current_type->field_name_ids[field_offset_in_cls++];
549               auto* ptr = sequence_state.interned_fields.Find(field_id);
550               if (!ptr) {
551                 PERFETTO_DLOG("Invalid field id.");
552                 storage_->IncrementIndexedStats(
553                     stats::heap_graph_malformed_packet,
554                     static_cast<int>(sequence_state.current_upid));
555                 return true;
556               }
557               const InternedField& field = *ptr;
558               ref.set_field_name(field.name);
559               ref.set_field_type_name(field.type_name);
560               field_to_rows_[field.name].emplace_back(ref.ToRowNumber());
561               return true;
562             });
563       }
564       sequence_state.deferred_reference_objects_for_type_.erase(ref_obj_it);
565     }
566 
567     type_row_ref.set_name(interned_type.name);
568     if (interned_type.classloader_id) {
569       auto classloader_object_ref =
570           GetOrInsertObject(&sequence_state, interned_type.classloader_id);
571       type_row_ref.set_classloader_id(classloader_object_ref.id().value);
572     }
573     if (location_name)
574       type_row_ref.set_location(*location_name);
575     type_row_ref.set_kind(InternTypeKindString(interned_type.kind));
576 
577     base::StringView normalized_type =
578         NormalizeTypeName(storage_->GetString(interned_type.name));
579 
580     std::optional<StringId> class_package;
581     if (location_name) {
582       std::optional<std::string> package_name =
583           PackageFromLocation(storage_, storage_->GetString(*location_name));
584       if (package_name) {
585         class_package = storage_->InternString(base::StringView(*package_name));
586       }
587     }
588     if (!class_package) {
589       auto app_id = storage_->process_table()[sequence_state.current_upid]
590                         .android_appid();
591       if (app_id) {
592         for (auto it = storage_->package_list_table().IterateRows(); it; ++it) {
593           if (it.uid() == *app_id) {
594             class_package = it.package_name();
595             break;
596           }
597         }
598       }
599     }
600 
601     class_to_rows_[std::make_pair(class_package,
602                                   storage_->InternString(normalized_type))]
603         .emplace_back(type_row_ref.ToRowNumber());
604   }
605 
606   if (!sequence_state.deferred_size_objects_for_type_.empty() ||
607       !sequence_state.deferred_reference_objects_for_type_.empty()) {
608     storage_->IncrementIndexedStats(
609         stats::heap_graph_malformed_packet,
610         static_cast<int>(sequence_state.current_upid));
611   }
612 
613   for (const SourceRoot& root : sequence_state.current_roots) {
614     for (uint64_t obj_id : root.object_ids) {
615       auto ptr = sequence_state.object_id_to_db_row.Find(obj_id);
616       // This can only happen for an invalid type string id, which is already
617       // reported as an error. Silently continue here.
618       if (!ptr)
619         continue;
620 
621       ObjectTable::RowReference row_ref =
622           ptr->ToRowReference(storage_->mutable_heap_graph_object_table());
623       roots_[std::make_pair(sequence_state.current_upid,
624                             sequence_state.current_ts)]
625           .emplace(*ptr);
626       MarkRoot(row_ref, InternRootTypeString(root.root_type));
627     }
628   }
629 
630   PopulateSuperClasses(sequence_state);
631   PopulateNativeSize(sequence_state);
632   sequence_state_.erase(seq_id);
633 }
634 
GetReferenceByFieldName(ObjectTable::Id obj,StringId field)635 std::optional<ObjectTable::Id> HeapGraphTracker::GetReferenceByFieldName(
636     ObjectTable::Id obj,
637     StringId field) {
638   std::optional<ObjectTable::Id> referred;
639   auto obj_row_ref = *storage_->heap_graph_object_table().FindById(obj);
640   ForReferenceSet(storage_, obj_row_ref,
641                   [&](ReferenceTable::RowReference ref) -> bool {
642                     if (ref.field_name() == field) {
643                       referred = ref.owned_id();
644                       return false;
645                     }
646                     return true;
647                   });
648   return referred;
649 }
650 
PopulateNativeSize(const SequenceState & seq)651 void HeapGraphTracker::PopulateNativeSize(const SequenceState& seq) {
652   //             +-------------------------------+  .referent   +--------+
653   //             |       sun.misc.Cleaner        | -----------> | Object |
654   //             +-------------------------------+              +--------+
655   //                |
656   //                | .thunk
657   //                v
658   // +----------------------------------------------------+
659   // | libcore.util.NativeAllocationRegistry$CleanerThunk |
660   // +----------------------------------------------------+
661   //   |
662   //   | .this$0
663   //   v
664   // +----------------------------------------------------+
665   // |       libcore.util.NativeAllocationRegistry        |
666   // |                       .size                        |
667   // +----------------------------------------------------+
668   //
669   // `.size` should be attributed as the native size of Object
670 
671   const auto& class_tbl = storage_->heap_graph_class_table();
672   auto& objects_tbl = *storage_->mutable_heap_graph_object_table();
673 
674   struct Cleaner {
675     ObjectTable::Id referent;
676     ObjectTable::Id thunk;
677   };
678   std::vector<Cleaner> cleaners;
679 
680   Query q;
681   q.constraints = {class_tbl.name().eq("sun.misc.Cleaner")};
682   auto class_it = class_tbl.FilterToIterator(q);
683   for (; class_it; ++class_it) {
684     auto class_id = class_it.id();
685     Query query;
686     query.constraints = {objects_tbl.type_id().eq(class_id.value),
687                          objects_tbl.upid().eq(seq.current_upid),
688                          objects_tbl.graph_sample_ts().eq(seq.current_ts)};
689     auto obj_it = objects_tbl.FilterToIterator(query);
690     for (; obj_it; ++obj_it) {
691       ObjectTable::Id cleaner_obj_id = obj_it.id();
692       std::optional<ObjectTable::Id> referent_id =
693           GetReferenceByFieldName(cleaner_obj_id, referent_str_id_);
694       std::optional<ObjectTable::Id> thunk_id =
695           GetReferenceByFieldName(cleaner_obj_id, cleaner_thunk_str_id_);
696 
697       if (!referent_id || !thunk_id) {
698         continue;
699       }
700 
701       std::optional<ObjectTable::Id> next_id =
702           GetReferenceByFieldName(cleaner_obj_id, cleaner_next_str_id_);
703       if (next_id.has_value() && *next_id == cleaner_obj_id) {
704         // sun.misc.Cleaner.next points to the sun.misc.Cleaner: this means
705         // that the sun.misc.Cleaner.clean() has already been called. Skip this.
706         continue;
707       }
708       cleaners.push_back(Cleaner{*referent_id, *thunk_id});
709     }
710   }
711 
712   for (const auto& cleaner : cleaners) {
713     std::optional<ObjectTable::Id> this0 =
714         GetReferenceByFieldName(cleaner.thunk, cleaner_thunk_this0_str_id_);
715     if (!this0) {
716       continue;
717     }
718 
719     auto nar_size_it = seq.nar_size_by_obj_id.find(*this0);
720     if (nar_size_it == seq.nar_size_by_obj_id.end()) {
721       continue;
722     }
723 
724     int64_t native_size =
725         GetSizeFromNativeAllocationRegistry(nar_size_it->second);
726     auto referent_row_ref = *objects_tbl.FindById(cleaner.referent);
727     int64_t total_native_size = referent_row_ref.native_size() + native_size;
728     referent_row_ref.set_native_size(total_native_size);
729   }
730 }
731 
732 // TODO(fmayer): For Android S+ traces, use the superclass_id from the trace.
PopulateSuperClasses(const SequenceState & seq)733 void HeapGraphTracker::PopulateSuperClasses(const SequenceState& seq) {
734   // Maps from normalized class name and location, to superclass.
735   std::map<ClassDescriptor, ClassDescriptor> superclass_map =
736       BuildSuperclassMap(seq.current_upid, seq.current_ts, storage_);
737 
738   auto* classes_tbl = storage_->mutable_heap_graph_class_table();
739   std::map<ClassDescriptor, ClassTable::Id> class_to_id;
740   for (auto it = classes_tbl->IterateRows(); it; ++it) {
741     class_to_id[{it.name(), it.location()}] = it.id();
742   }
743 
744   // Iterate through the classes table and annotate with superclasses.
745   // We iterate all rows on the classes table (even though the superclass
746   // mapping was generated on the current sequence) - if we cannot identify
747   // a superclass we will just skip.
748   for (uint32_t i = 0; i < classes_tbl->row_count(); ++i) {
749     auto rr = (*classes_tbl)[i];
750     auto name = storage_->GetString(rr.name());
751     auto location = rr.location();
752     auto normalized = GetNormalizedType(name);
753     if (normalized.is_static_class || normalized.number_of_arrays > 0)
754       continue;
755 
756     StringId class_name_id = storage_->InternString(normalized.name);
757     auto map_it = superclass_map.find({class_name_id, location});
758     if (map_it == superclass_map.end()) {
759       continue;
760     }
761 
762     // Find the row for the superclass id
763     auto superclass_it = class_to_id.find(map_it->second);
764     if (superclass_it == class_to_id.end()) {
765       // This can happen for traces was captured before the patch to
766       // explicitly emit interned types (meaning classes without live
767       // instances would not appear here).
768       continue;
769     }
770     rr.set_superclass_id(superclass_it->second);
771   }
772 }
773 
GetChildren(ObjectTable::RowReference object,std::vector<ObjectTable::Id> & children)774 void HeapGraphTracker::GetChildren(ObjectTable::RowReference object,
775                                    std::vector<ObjectTable::Id>& children) {
776   children.clear();
777 
778   auto cls_row_ref =
779       *storage_->heap_graph_class_table().FindById(object.type_id());
780 
781   StringId kind = cls_row_ref.kind();
782 
783   bool is_ignored_reference =
784       kind == InternTypeKindString(
785                   protos::pbzero::HeapGraphType::KIND_WEAK_REFERENCE) ||
786       kind == InternTypeKindString(
787                   protos::pbzero::HeapGraphType::KIND_SOFT_REFERENCE) ||
788       kind == InternTypeKindString(
789                   protos::pbzero::HeapGraphType::KIND_FINALIZER_REFERENCE) ||
790       kind == InternTypeKindString(
791                   protos::pbzero::HeapGraphType::KIND_PHANTOM_REFERENCE);
792 
793   ForReferenceSet(
794       storage_, object,
795       [object, &children, is_ignored_reference,
796        this](ReferenceTable::RowReference ref) {
797         PERFETTO_CHECK(ref.owner_id() == object.id());
798         auto opt_owned = ref.owned_id();
799         if (!opt_owned) {
800           return true;
801         }
802         if (is_ignored_reference && ref.field_name() == referent_str_id_) {
803           // If `object` is a special reference kind, its
804           // "java.lang.ref.Reference.referent" field should be ignored.
805           return true;
806         }
807         children.push_back(*opt_owned);
808         return true;
809       });
810   std::sort(children.begin(), children.end(),
811             [](const ObjectTable::Id& a, const ObjectTable::Id& b) {
812               return a.value < b.value;
813             });
814   children.erase(std::unique(children.begin(), children.end()), children.end());
815 }
816 
RankRoot(StringId type)817 size_t HeapGraphTracker::RankRoot(StringId type) {
818   size_t idx = 0;
819   for (; idx < kRootTypePrecedence.size(); ++idx) {
820     if (type == InternRootTypeString(kRootTypePrecedence[idx])) {
821       break;
822     }
823   }
824   return idx;
825 }
826 
MarkRoot(ObjectTable::RowReference row_ref,StringId type)827 void HeapGraphTracker::MarkRoot(ObjectTable::RowReference row_ref,
828                                 StringId type) {
829   // Already marked as a root
830   if (row_ref.root_type()) {
831     if (RankRoot(type) < RankRoot(*row_ref.root_type())) {
832       row_ref.set_root_type(type);
833     }
834     return;
835   }
836   row_ref.set_root_type(type);
837 
838   std::vector<ObjectTable::Id> children;
839 
840   // DFS to mark reachability for all children
841   std::vector<ObjectTable::RowReference> stack({row_ref});
842   while (!stack.empty()) {
843     ObjectTable::RowReference cur_node = stack.back();
844     stack.pop_back();
845 
846     if (cur_node.reachable())
847       continue;
848     cur_node.set_reachable(true);
849 
850     GetChildren(cur_node, children);
851     for (ObjectTable::Id child_node : children) {
852       auto child_ref =
853           *storage_->mutable_heap_graph_object_table()->FindById(child_node);
854       stack.push_back(child_ref);
855     }
856   }
857 }
858 
UpdateShortestPaths(ObjectTable::RowReference row_ref)859 void HeapGraphTracker::UpdateShortestPaths(ObjectTable::RowReference row_ref) {
860   // Calculate shortest distance to a GC root.
861   std::deque<std::pair<int32_t, ObjectTable::RowReference>> reachable_nodes{
862       {0, row_ref}};
863 
864   std::vector<ObjectTable::Id> children;
865   while (!reachable_nodes.empty()) {
866     auto pair = reachable_nodes.front();
867 
868     int32_t distance = pair.first;
869     ObjectTable::RowReference cur_row_ref = pair.second;
870 
871     reachable_nodes.pop_front();
872     int32_t cur_distance = cur_row_ref.root_distance();
873     if (cur_distance == -1 || cur_distance > distance) {
874       cur_row_ref.set_root_distance(distance);
875 
876       GetChildren(cur_row_ref, children);
877       for (ObjectTable::Id child_node : children) {
878         auto child_row_ref =
879             *storage_->mutable_heap_graph_object_table()->FindById(child_node);
880         int32_t child_distance = child_row_ref.root_distance();
881         if (child_distance == -1 || child_distance > distance + 1)
882           reachable_nodes.emplace_back(distance + 1, child_row_ref);
883       }
884     }
885   }
886 }
887 
FindPathFromRoot(ObjectTable::RowReference row_ref,PathFromRoot * path)888 void HeapGraphTracker::FindPathFromRoot(ObjectTable::RowReference row_ref,
889                                         PathFromRoot* path) {
890   // We have long retention chains (e.g. from LinkedList). If we use the stack
891   // here, we risk running out of stack space. This is why we use a vector to
892   // simulate the stack.
893   struct StackElem {
894     ObjectTable::RowReference node;  // Node in the original graph.
895     size_t parent_id;                // id of parent node in the result tree.
896     size_t i;        // Index of the next child of this node to handle.
897     uint32_t depth;  // Depth in the resulting tree
898                      // (including artificial root).
899     std::vector<ObjectTable::Id> children;
900   };
901 
902   std::vector<StackElem> stack{{row_ref, PathFromRoot::kRoot, 0, 0, {}}};
903   while (!stack.empty()) {
904     ObjectTable::RowReference object_row_ref = stack.back().node;
905 
906     size_t parent_id = stack.back().parent_id;
907     uint32_t depth = stack.back().depth;
908     size_t& i = stack.back().i;
909     std::vector<ObjectTable::Id>& children = stack.back().children;
910 
911     ClassTable::Id type_id = object_row_ref.type_id();
912 
913     auto type_row_ref = *storage_->heap_graph_class_table().FindById(type_id);
914     std::optional<StringId> opt_class_name_id =
915         type_row_ref.deobfuscated_name();
916     if (!opt_class_name_id) {
917       opt_class_name_id = type_row_ref.name();
918     }
919     PERFETTO_CHECK(opt_class_name_id);
920     StringId class_name_id = *opt_class_name_id;
921     std::optional<StringId> root_type = object_row_ref.root_type();
922     if (root_type) {
923       class_name_id = storage_->InternString(base::StringView(
924           storage_->GetString(class_name_id).ToStdString() + " [" +
925           storage_->GetString(*root_type).ToStdString() + "]"));
926     }
927     auto it = path->nodes[parent_id].children.find(class_name_id);
928     if (it == path->nodes[parent_id].children.end()) {
929       size_t path_id = path->nodes.size();
930       path->nodes.emplace_back(PathFromRoot::Node{});
931       std::tie(it, std::ignore) =
932           path->nodes[parent_id].children.emplace(class_name_id, path_id);
933       path->nodes.back().class_name_id = class_name_id;
934       path->nodes.back().depth = depth;
935       path->nodes.back().parent_id = parent_id;
936     }
937     size_t path_id = it->second;
938     PathFromRoot::Node* output_tree_node = &path->nodes[path_id];
939 
940     if (i == 0) {
941       // This is the first time we are looking at this node, so add its
942       // size to the relevant node in the resulting tree.
943       output_tree_node->size += object_row_ref.self_size();
944       output_tree_node->count++;
945       GetChildren(object_row_ref, children);
946 
947       if (object_row_ref.native_size()) {
948         StringId native_class_name_id = storage_->InternString(
949             base::StringView(std::string("[native] ") +
950                              storage_->GetString(class_name_id).ToStdString()));
951         std::map<StringId, size_t>::iterator native_it;
952         bool inserted_new_node;
953         std::tie(native_it, inserted_new_node) =
954             path->nodes[path_id].children.insert({native_class_name_id, 0});
955         if (inserted_new_node) {
956           native_it->second = path->nodes.size();
957           path->nodes.emplace_back(PathFromRoot::Node{});
958 
959           path->nodes.back().class_name_id = native_class_name_id;
960           path->nodes.back().depth = depth + 1;
961           path->nodes.back().parent_id = path_id;
962         }
963         PathFromRoot::Node* new_output_tree_node =
964             &path->nodes[native_it->second];
965 
966         new_output_tree_node->size += object_row_ref.native_size();
967         new_output_tree_node->count++;
968       }
969     }
970 
971     // We have already handled this node and just need to get its i-th child.
972     if (!children.empty()) {
973       PERFETTO_CHECK(i < children.size());
974       ObjectTable::Id child = children[i];
975       auto child_row_ref =
976           *storage_->mutable_heap_graph_object_table()->FindById(child);
977       if (++i == children.size())
978         stack.pop_back();
979 
980       int32_t child_distance = child_row_ref.root_distance();
981       int32_t n_distance = object_row_ref.root_distance();
982       PERFETTO_CHECK(n_distance >= 0);
983       PERFETTO_CHECK(child_distance >= 0);
984 
985       bool visited = path->visited.count(child);
986 
987       if (child_distance == n_distance + 1 && !visited) {
988         path->visited.emplace(child);
989         stack.emplace_back(StackElem{child_row_ref, path_id, 0, depth + 1, {}});
990       }
991     } else {
992       stack.pop_back();
993     }
994   }
995 }
996 
997 std::unique_ptr<tables::ExperimentalFlamegraphTable>
BuildFlamegraph(const int64_t current_ts,const UniquePid current_upid)998 HeapGraphTracker::BuildFlamegraph(const int64_t current_ts,
999                                   const UniquePid current_upid) {
1000   auto profile_type = storage_->InternString("graph");
1001   auto java_mapping = storage_->InternString("JAVA");
1002 
1003   std::unique_ptr<tables::ExperimentalFlamegraphTable> tbl(
1004       new tables::ExperimentalFlamegraphTable(storage_->mutable_string_pool()));
1005 
1006   auto it = roots_.find(std::make_pair(current_upid, current_ts));
1007   if (it == roots_.end()) {
1008     // TODO(fmayer): This should not be within the flame graph but some marker
1009     // in the UI.
1010     if (IsTruncated(current_upid, current_ts)) {
1011       tables::ExperimentalFlamegraphTable::Row alloc_row{};
1012       alloc_row.ts = current_ts;
1013       alloc_row.upid = current_upid;
1014       alloc_row.profile_type = profile_type;
1015       alloc_row.depth = 0;
1016       alloc_row.name = storage_->InternString(
1017           "ERROR: INCOMPLETE GRAPH (try increasing buffer size)");
1018       alloc_row.map_name = java_mapping;
1019       alloc_row.count = 1;
1020       alloc_row.cumulative_count = 1;
1021       alloc_row.size = 1;
1022       alloc_row.cumulative_size = 1;
1023       alloc_row.parent_id = std::nullopt;
1024       tbl->Insert(alloc_row);
1025       return tbl;
1026     }
1027     // We haven't seen this graph, so we should raise an error.
1028     return nullptr;
1029   }
1030 
1031   const std::set<ObjectTable::RowNumber>& roots = it->second;
1032   auto* object_table = storage_->mutable_heap_graph_object_table();
1033 
1034   // First pass to calculate shortest paths
1035   for (ObjectTable::RowNumber root : roots) {
1036     UpdateShortestPaths(root.ToRowReference(object_table));
1037   }
1038   PathFromRoot init_path;
1039   for (ObjectTable::RowNumber root : roots) {
1040     FindPathFromRoot(root.ToRowReference(object_table), &init_path);
1041   }
1042 
1043   std::vector<int64_t> node_to_cumulative_size(init_path.nodes.size());
1044   std::vector<int64_t> node_to_cumulative_count(init_path.nodes.size());
1045   // i > 0 is to skip the artifical root node.
1046   for (size_t i = init_path.nodes.size() - 1; i > 0; --i) {
1047     const PathFromRoot::Node& node = init_path.nodes[i];
1048 
1049     node_to_cumulative_size[i] += node.size;
1050     node_to_cumulative_count[i] += node.count;
1051     node_to_cumulative_size[node.parent_id] += node_to_cumulative_size[i];
1052     node_to_cumulative_count[node.parent_id] += node_to_cumulative_count[i];
1053   }
1054 
1055   std::vector<FlamegraphId> node_to_id(init_path.nodes.size());
1056   // i = 1 is to skip the artifical root node.
1057   for (size_t i = 1; i < init_path.nodes.size(); ++i) {
1058     const PathFromRoot::Node& node = init_path.nodes[i];
1059     PERFETTO_CHECK(node.parent_id < i);
1060     std::optional<FlamegraphId> parent_id;
1061     if (node.parent_id != 0)
1062       parent_id = node_to_id[node.parent_id];
1063     const uint32_t depth = node.depth;
1064 
1065     tables::ExperimentalFlamegraphTable::Row alloc_row{};
1066     alloc_row.ts = current_ts;
1067     alloc_row.upid = current_upid;
1068     alloc_row.profile_type = profile_type;
1069     alloc_row.depth = depth;
1070     alloc_row.name = node.class_name_id;
1071     alloc_row.map_name = java_mapping;
1072     alloc_row.count = static_cast<int64_t>(node.count);
1073     alloc_row.cumulative_count =
1074         static_cast<int64_t>(node_to_cumulative_count[i]);
1075     alloc_row.size = static_cast<int64_t>(node.size);
1076     alloc_row.cumulative_size =
1077         static_cast<int64_t>(node_to_cumulative_size[i]);
1078     alloc_row.parent_id = parent_id;
1079     node_to_id[i] = tbl->Insert(alloc_row).id;
1080   }
1081   return tbl;
1082 }
1083 
FinalizeAllProfiles()1084 void HeapGraphTracker::FinalizeAllProfiles() {
1085   if (!sequence_state_.empty()) {
1086     storage_->IncrementStats(stats::heap_graph_non_finalized_graph);
1087     // There might still be valuable data even though the trace is truncated.
1088     while (!sequence_state_.empty()) {
1089       FinalizeProfile(sequence_state_.begin()->first);
1090     }
1091   }
1092 }
1093 
IsTruncated(UniquePid upid,int64_t ts)1094 bool HeapGraphTracker::IsTruncated(UniquePid upid, int64_t ts) {
1095   // The graph was finalized but was missing packets.
1096   if (truncated_graphs_.find(std::make_pair(upid, ts)) !=
1097       truncated_graphs_.end()) {
1098     return true;
1099   }
1100 
1101   // Or the graph was never finalized, so is missing packets at the end.
1102   for (const auto& p : sequence_state_) {
1103     const SequenceState& sequence_state = p.second;
1104     if (sequence_state.current_upid == upid &&
1105         sequence_state.current_ts == ts) {
1106       return true;
1107     }
1108   }
1109   return false;
1110 }
1111 
InternRootTypeString(protos::pbzero::HeapGraphRoot::Type root_type)1112 StringId HeapGraphTracker::InternRootTypeString(
1113     protos::pbzero::HeapGraphRoot::Type root_type) {
1114   size_t idx = static_cast<size_t>(root_type);
1115   if (idx >= root_type_string_ids_.size()) {
1116     idx = static_cast<size_t>(protos::pbzero::HeapGraphRoot::ROOT_UNKNOWN);
1117   }
1118 
1119   return root_type_string_ids_[idx];
1120 }
1121 
InternTypeKindString(protos::pbzero::HeapGraphType::Kind kind)1122 StringId HeapGraphTracker::InternTypeKindString(
1123     protos::pbzero::HeapGraphType::Kind kind) {
1124   size_t idx = static_cast<size_t>(kind);
1125   if (idx >= type_kind_string_ids_.size()) {
1126     idx = static_cast<size_t>(protos::pbzero::HeapGraphType::KIND_UNKNOWN);
1127   }
1128 
1129   return type_kind_string_ids_[idx];
1130 }
1131 
1132 HeapGraphTracker::~HeapGraphTracker() = default;
1133 
1134 }  // namespace perfetto::trace_processor
1135