xref: /aosp_15_r20/external/stg/dwarf_processor.cc (revision 9e3b08ae94a55201065475453d799e8b1378bea6)
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2022-2023 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License.  You may obtain a copy of the License at
9 //
10 //     https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Aleksei Vetrov
19 
20 #include "dwarf_processor.h"
21 
22 #include <dwarf.h>
23 #include <elfutils/libdw.h>
24 
25 #include <algorithm>
26 #include <cstddef>
27 #include <cstdint>
28 #include <memory>
29 #include <optional>
30 #include <sstream>
31 #include <string>
32 #include <string_view>
33 #include <utility>
34 #include <vector>
35 
36 #include "dwarf_wrappers.h"
37 #include "error.h"
38 #include "filter.h"
39 #include "hex.h"
40 #include "graph.h"
41 #include "scope.h"
42 
43 namespace stg {
44 namespace dwarf {
45 
46 namespace {
47 
HasIncompleteTypes(uint64_t language)48 bool HasIncompleteTypes(uint64_t language) {
49   return language != DW_LANG_Rust;
50 }
51 
EntryToString(Entry & entry)52 std::string EntryToString(Entry& entry) {
53   std::ostringstream os;
54   os << "DWARF entry <" << Hex(entry.GetOffset()) << ">";
55   return os.str();
56 }
57 
MaybeGetName(Entry & entry)58 std::optional<std::string> MaybeGetName(Entry& entry) {
59   return entry.MaybeGetString(DW_AT_name);
60 }
61 
GetName(Entry & entry)62 std::string GetName(Entry& entry) {
63   auto result = MaybeGetName(entry);
64   if (!result.has_value()) {
65     Die() << "Name was not found for " << EntryToString(entry);
66   }
67   return std::move(*result);
68 }
69 
GetNameOrEmpty(Entry & entry)70 std::string GetNameOrEmpty(Entry& entry) {
71   auto result = MaybeGetName(entry);
72   if (!result.has_value()) {
73     return {};
74   }
75   return std::move(*result);
76 }
77 
GetLinkageName(int version,Entry & entry)78 std::string GetLinkageName(int version, Entry& entry) {
79   auto linkage_name = entry.MaybeGetString(
80       version < 4 ? DW_AT_MIPS_linkage_name : DW_AT_linkage_name);
81   if (linkage_name.has_value()) {
82     return std::move(*linkage_name);
83   }
84   return GetNameOrEmpty(entry);
85 }
86 
GetBitSize(Entry & entry)87 size_t GetBitSize(Entry& entry) {
88   if (auto byte_size = entry.MaybeGetUnsignedConstant(DW_AT_byte_size)) {
89     return *byte_size * 8;
90   } else if (auto bit_size = entry.MaybeGetUnsignedConstant(DW_AT_bit_size)) {
91     return *bit_size;
92   }
93   Die() << "Bit size was not found for " << EntryToString(entry);
94 }
95 
GetByteSize(Entry & entry)96 size_t GetByteSize(Entry& entry) {
97   if (auto byte_size = entry.MaybeGetUnsignedConstant(DW_AT_byte_size)) {
98     return *byte_size;
99   } else if (auto bit_size = entry.MaybeGetUnsignedConstant(DW_AT_bit_size)) {
100     // Round up bit_size / 8 to get minimal needed storage size in bytes.
101     return (*bit_size + 7) / 8;
102   }
103   Die() << "Byte size was not found for " << EntryToString(entry);
104 }
105 
GetEncoding(Entry & entry)106 Primitive::Encoding GetEncoding(Entry& entry) {
107   const auto dwarf_encoding = entry.MaybeGetUnsignedConstant(DW_AT_encoding);
108   if (!dwarf_encoding) {
109     Die() << "Encoding was not found for " << EntryToString(entry);
110   }
111   switch (*dwarf_encoding) {
112     case DW_ATE_boolean:
113       return Primitive::Encoding::BOOLEAN;
114     case DW_ATE_complex_float:
115       return Primitive::Encoding::COMPLEX_NUMBER;
116     case DW_ATE_float:
117       return Primitive::Encoding::REAL_NUMBER;
118     case DW_ATE_signed:
119       return Primitive::Encoding::SIGNED_INTEGER;
120     case DW_ATE_signed_char:
121       return Primitive::Encoding::SIGNED_CHARACTER;
122     case DW_ATE_unsigned:
123       return Primitive::Encoding::UNSIGNED_INTEGER;
124     case DW_ATE_unsigned_char:
125       return Primitive::Encoding::UNSIGNED_CHARACTER;
126     case DW_ATE_UTF:
127       return Primitive::Encoding::UTF;
128     default:
129       Die() << "Unknown encoding " << Hex(*dwarf_encoding) << " for "
130             << EntryToString(entry);
131   }
132 }
133 
MaybeGetReferredType(Entry & entry)134 std::optional<Entry> MaybeGetReferredType(Entry& entry) {
135   return entry.MaybeGetReference(DW_AT_type);
136 }
137 
GetReferredType(Entry & entry)138 Entry GetReferredType(Entry& entry) {
139   auto result = MaybeGetReferredType(entry);
140   if (!result.has_value()) {
141     Die() << "Type reference was not found in " << EntryToString(entry);
142   }
143   return *result;
144 }
145 
GetNumberOfElements(Entry & entry)146 size_t GetNumberOfElements(Entry& entry) {
147   // DWARF standard says, that array dimensions could be an entry with
148   // either DW_TAG_subrange_type or DW_TAG_enumeration_type. However, this
149   // code supports only the DW_TAG_subrange_type.
150   Check(entry.GetTag() == DW_TAG_subrange_type)
151       << "Array's dimensions should be an entry of DW_TAG_subrange_type";
152   std::optional<size_t> number_of_elements = entry.MaybeGetCount();
153   if (number_of_elements) {
154     return *number_of_elements;
155   }
156   // If a subrange has no DW_AT_count and no DW_AT_upper_bound attribute, its
157   // size is unknown.
158   return 0;
159 }
160 
161 // Calculate number of bits from the "beginning" of the containing entity to
162 // the "beginning" of the data member using DW_AT_bit_offset.
163 //
164 // "Number of bits from the beginning", depends on the definition of the
165 // "beginning", which is different for big- and little-endian architectures.
166 // However, DW_AT_bit_offset is defined from the high order bit of the storage
167 // unit to the high order bit of a field and is the same for both architectures.
168 
169 // So this function converts DW_AT_bit_offset to the "number of bits from the
170 // beginning".
CalculateBitfieldAdjustment(Entry & entry,size_t bit_size,bool is_little_endian_binary)171 size_t CalculateBitfieldAdjustment(Entry& entry, size_t bit_size,
172                              bool is_little_endian_binary) {
173   if (bit_size == 0) {
174     // bit_size == 0 marks that it is not a bit field. No adjustment needed.
175     return 0;
176   }
177   auto container_byte_size = entry.MaybeGetUnsignedConstant(DW_AT_byte_size);
178   auto bit_offset = entry.MaybeGetUnsignedConstant(DW_AT_bit_offset);
179   Check(container_byte_size.has_value() && bit_offset.has_value())
180       << "If member offset is defined as DW_AT_data_member_location, bit field "
181          "should have DW_AT_byte_size and DW_AT_bit_offset";
182   // The following structure will be used as an example in the explanations:
183   // struct foo {
184   //   uint16_t rest_of_the_struct;
185   //   uint16_t x : 5;
186   //   uint16_t y : 6;
187   //   uint16_t z : 5;
188   // };
189   if (is_little_endian_binary) {
190     // Compiler usualy packs bit fields starting with the least significant
191     // bits, but DW_AT_bit_offset is counted from high to low bits:
192     //
193     // rest of the struct|<    container   >
194     //    Container bits: 01234|56789A|BCDEF
195     //  Bit-fields' bits: 01234|012345|01234
196     //        bit_offset: <<<<B<<<<<<5<<<<<0
197     //   bits from start: 0>>>>>5>>>>>>B>>>>
198     //                    <x:5>|< y:6>|<z:5>
199     //
200     //   x.bit_offset: 11 (0xB) bits
201     //   y.bit_offset: 5 bits
202     //   z.bit_offset: 0 bits
203     //
204     // So we need to subtract bit_offset from the container bit size
205     // (container_byte_size * 8) to inverse direction. Also we need to convert
206     // from high- to low-order bit, because the field "begins" with low-order
207     // bit. To do so we need to subtract field's bit size. Resulting formula is:
208     //
209     //   container_byte_size * 8 - bit_offset - bit_size
210     //
211     // If we try it on example, we get correct values:
212     //   x: 2 * 8 - 11 - 5 = 0
213     //   y: 2 * 8 - 5 - 6 = 5
214     //   z: 2 * 8 - 0 - 5 = 11 (0xB)
215     return *container_byte_size * 8 - *bit_offset - bit_size;
216   }
217   // Big-endian orders begins with high-order bit and the bit_offset is from the
218   // high order bit:
219   //
220   // rest of the struct|<    container   >
221   //    Container bits: FEDCB|A98765|43210
222   //  Bit-fields' bits: 43210|543210|43210
223   //        bit_offset: 0>>>>>5>>>>>>B>>>>
224   //   bits from start: 0>>>>>5>>>>>>B>>>>
225   //                    <x:5>|< y:6>|<z:5>
226   //
227   // So we just return bit_offset.
228   return *bit_offset;
229 }
230 
231 // Calculate the number of bits from the beginning of the structure to the
232 // beginning of the data member.
GetDataBitOffset(Entry & entry,size_t bit_size,bool is_little_endian_binary)233 size_t GetDataBitOffset(Entry& entry, size_t bit_size,
234                         bool is_little_endian_binary) {
235   // Offset may be represented either by DW_AT_data_bit_offset (in bits) or by
236   // DW_AT_data_member_location (in bytes).
237   if (auto data_bit_offset =
238           entry.MaybeGetUnsignedConstant(DW_AT_data_bit_offset)) {
239     // DW_AT_data_bit_offset contains what this function needs for any type
240     // of member (bitfield or not) on architecture of any endianness.
241     return *data_bit_offset;
242   } else if (auto byte_offset = entry.MaybeGetMemberByteOffset()) {
243     // DW_AT_data_member_location contains offset in bytes.
244     const size_t bit_offset = *byte_offset * 8;
245     // But there can be offset part, coming from DW_AT_bit_offset. DWARF 5
246     // standard requires to use DW_AT_data_bit_offset in this case, but a lot
247     // of binaries still use combination of DW_AT_data_member_location and
248     // DW_AT_bit_offset.
249     const size_t bitfield_adjusment =
250         CalculateBitfieldAdjustment(entry, bit_size, is_little_endian_binary);
251     return bit_offset + bitfield_adjusment;
252   } else {
253     // If the beginning of the data member is the same as the beginning of the
254     // containing entity then neither attribute is required.
255     return 0;
256   }
257 }
258 
259 }  // namespace
260 
261 // Transforms DWARF entries to STG.
262 class Processor {
263  public:
Processor(Graph & graph,Id void_id,Id variadic_id,bool is_little_endian_binary,const std::unique_ptr<Filter> & file_filter,Types & result)264   Processor(Graph& graph, Id void_id, Id variadic_id,
265             bool is_little_endian_binary,
266             const std::unique_ptr<Filter>& file_filter, Types& result)
267       : maker_(graph),
268         void_id_(void_id),
269         variadic_id_(variadic_id),
270         is_little_endian_binary_(is_little_endian_binary),
271         file_filter_(file_filter),
272         result_(result) {}
273 
ProcessCompilationUnit(CompilationUnit & compilation_unit)274   void ProcessCompilationUnit(CompilationUnit& compilation_unit) {
275     version_ = compilation_unit.version;
276     if (file_filter_ != nullptr) {
277       files_ = dwarf::Files(compilation_unit.entry);
278     }
279     Process(compilation_unit.entry);
280   }
281 
ResolveSymbolSpecifications()282   void ResolveSymbolSpecifications() {
283     std::sort(unresolved_symbol_specifications_.begin(),
284               unresolved_symbol_specifications_.end());
285     std::sort(scoped_names_.begin(), scoped_names_.end());
286     auto symbols_it = unresolved_symbol_specifications_.begin();
287     auto names_it = scoped_names_.begin();
288     while (symbols_it != unresolved_symbol_specifications_.end()) {
289       while (names_it != scoped_names_.end() &&
290              names_it->first < symbols_it->first) {
291         ++names_it;
292       }
293       if (names_it == scoped_names_.end() ||
294           names_it->first != symbols_it->first) {
295         Die() << "Scoped name not found for entry " << Hex(symbols_it->first);
296       }
297       result_.symbols[symbols_it->second].scoped_name = names_it->second;
298       ++symbols_it;
299     }
300   }
301 
302  private:
Process(Entry & entry)303   void Process(Entry& entry) {
304     try {
305       return ProcessInternal(entry);
306     } catch (Exception& e) {
307       std::ostringstream os;
308       os << "processing DIE " << Hex(entry.GetOffset());
309       e.Add(os.str());
310       throw;
311     }
312   }
313 
ProcessInternal(Entry & entry)314   void ProcessInternal(Entry& entry) {
315     ++result_.processed_entries;
316     const auto tag = entry.GetTag();
317     switch (tag) {
318       case DW_TAG_array_type:
319         ProcessArray(entry);
320         break;
321       case DW_TAG_enumeration_type:
322         ProcessEnum(entry);
323         break;
324       case DW_TAG_class_type:
325         ProcessStructUnion(entry, StructUnion::Kind::STRUCT);
326         break;
327       case DW_TAG_structure_type:
328         ProcessStructUnion(entry, StructUnion::Kind::STRUCT);
329         break;
330       case DW_TAG_union_type:
331         ProcessStructUnion(entry, StructUnion::Kind::UNION);
332         break;
333       case DW_TAG_member:
334         Die() << "DW_TAG_member outside of struct/class/union";
335         break;
336       case DW_TAG_pointer_type:
337         ProcessReference<PointerReference>(
338             entry, PointerReference::Kind::POINTER);
339         break;
340       case DW_TAG_reference_type:
341         ProcessReference<PointerReference>(
342             entry, PointerReference::Kind::LVALUE_REFERENCE);
343         break;
344       case DW_TAG_rvalue_reference_type:
345         ProcessReference<PointerReference>(
346             entry, PointerReference::Kind::RVALUE_REFERENCE);
347         break;
348       case DW_TAG_ptr_to_member_type:
349         ProcessPointerToMember(entry);
350         break;
351       case DW_TAG_unspecified_type:
352         ProcessUnspecifiedType(entry);
353         break;
354       case DW_TAG_compile_unit:
355         language_ = entry.MustGetUnsignedConstant(DW_AT_language);
356         ProcessAllChildren(entry);
357         break;
358       case DW_TAG_typedef:
359         ProcessTypedef(entry);
360         break;
361       case DW_TAG_base_type:
362         ProcessBaseType(entry);
363         break;
364       case DW_TAG_const_type:
365         ProcessReference<Qualified>(entry, Qualifier::CONST);
366         break;
367       case DW_TAG_volatile_type:
368         ProcessReference<Qualified>(entry, Qualifier::VOLATILE);
369         break;
370       case DW_TAG_restrict_type:
371         ProcessReference<Qualified>(entry, Qualifier::RESTRICT);
372         break;
373       case DW_TAG_atomic_type:
374         // TODO: test pending BTF / test suite support
375         ProcessReference<Qualified>(entry, Qualifier::ATOMIC);
376         break;
377       case DW_TAG_variable:
378         // Process only variables visible externally
379         if (entry.GetFlag(DW_AT_external)) {
380           ProcessVariable(entry);
381         }
382         break;
383       case DW_TAG_subroutine_type:
384         // Standalone function type, for example, used in function pointers.
385         ProcessFunction(entry);
386         break;
387       case DW_TAG_subprogram:
388         // DWARF equivalent of ELF function symbol.
389         ProcessFunction(entry);
390         break;
391       case DW_TAG_namespace:
392         ProcessNamespace(entry);
393         break;
394       case DW_TAG_lexical_block:
395         ProcessAllChildren(entry);
396         break;
397 
398       default:
399         // TODO: die on unexpected tag, when this switch contains
400         // all expected tags
401         break;
402     }
403   }
404 
ProcessAllChildren(Entry & entry)405   void ProcessAllChildren(Entry& entry) {
406     for (auto& child : entry.GetChildren()) {
407       Process(child);
408     }
409   }
410 
CheckNoChildren(Entry & entry)411   void CheckNoChildren(Entry& entry) {
412     if (!entry.GetChildren().empty()) {
413       Die() << "Entry expected to have no children";
414     }
415   }
416 
ProcessNamespace(Entry & entry)417   void ProcessNamespace(Entry& entry) {
418     const auto name = GetNameOrEmpty(entry);
419     const PushScopeName push_scope_name(scope_, "namespace", name);
420     ProcessAllChildren(entry);
421   }
422 
ProcessBaseType(Entry & entry)423   void ProcessBaseType(Entry& entry) {
424     CheckNoChildren(entry);
425     const auto type_name = GetName(entry);
426     const size_t bit_size = GetBitSize(entry);
427     if (bit_size % 8) {
428       Die() << "type '" << type_name << "' size is not a multiple of 8";
429     }
430     const size_t byte_size = bit_size / 8;
431     AddProcessedNode<Primitive>(entry, type_name, GetEncoding(entry),
432                                 byte_size);
433   }
434 
ProcessTypedef(Entry & entry)435   void ProcessTypedef(Entry& entry) {
436     const auto type_name = GetName(entry);
437     const auto full_name = scope_.name + type_name;
438     const Id referred_type_id = GetReferredTypeId(MaybeGetReferredType(entry));
439     const Id id = AddProcessedNode<Typedef>(entry, full_name, referred_type_id);
440     if (!ShouldKeepDefinition(entry, type_name)) {
441       // We always model (and keep) typedef definitions. But we should exclude
442       // filtered out types from being type roots.
443       return;
444     }
445     AddNamedTypeNode(id);
446   }
447 
448   template<typename Node, typename KindType>
ProcessReference(Entry & entry,KindType kind)449   void ProcessReference(Entry& entry, KindType kind) {
450     const Id referred_type_id = GetReferredTypeId(MaybeGetReferredType(entry));
451     AddProcessedNode<Node>(entry, kind, referred_type_id);
452   }
453 
ProcessPointerToMember(Entry & entry)454   void ProcessPointerToMember(Entry& entry) {
455     const Id containing_type_id =
456         GetReferredTypeId(entry.MaybeGetReference(DW_AT_containing_type));
457     const Id pointee_type_id = GetReferredTypeId(MaybeGetReferredType(entry));
458     AddProcessedNode<PointerToMember>(entry, containing_type_id,
459                                       pointee_type_id);
460   }
461 
ProcessUnspecifiedType(Entry & entry)462   void ProcessUnspecifiedType(Entry& entry) {
463     const std::string type_name =  GetName(entry);
464     Check(type_name == "decltype(nullptr)")
465         << "Unsupported DW_TAG_unspecified_type: " << type_name;
466     AddProcessedNode<Special>(entry, Special::Kind::NULLPTR);
467   }
468 
ShouldKeepDefinition(Entry & entry,const std::string & name) const469   bool ShouldKeepDefinition(Entry& entry, const std::string& name) const {
470     if (!HasIncompleteTypes(language_) || file_filter_ == nullptr) {
471       return true;
472     }
473     const auto file = files_.MaybeGetFile(entry, DW_AT_decl_file);
474     if (!file) {
475       // Built in types that do not have DW_AT_decl_file should be preserved.
476       static constexpr std::string_view kBuiltinPrefix = "__";
477       if (name.starts_with(kBuiltinPrefix)) {
478         return true;
479       }
480       Die() << "File filter is provided, but " << name << " ("
481             << EntryToString(entry) << ") doesn't have DW_AT_decl_file";
482     }
483     return (*file_filter_)(*file);
484   }
485 
ProcessStructUnion(Entry & entry,StructUnion::Kind kind)486   void ProcessStructUnion(Entry& entry, StructUnion::Kind kind) {
487     const auto type_name = GetNameOrEmpty(entry);
488     const auto full_name =
489         type_name.empty() ? type_name : scope_.name + type_name;
490     const PushScopeName push_scope_name(scope_, kind, type_name);
491 
492     std::vector<Id> base_classes;
493     std::vector<Id> members;
494     std::vector<Id> methods;
495     std::optional<VariantAndMembers> variant_and_members = std::nullopt;
496 
497     for (auto& child : entry.GetChildren()) {
498       auto child_tag = child.GetTag();
499       // All possible children of struct/class/union
500       switch (child_tag) {
501         case DW_TAG_member:
502           if (child.GetFlag(DW_AT_external)) {
503             // static members are interpreted as variables and not included in
504             // members.
505             ProcessVariable(child);
506           } else {
507             members.push_back(GetIdForEntry(child));
508             ProcessMember(child);
509           }
510           break;
511         case DW_TAG_subprogram:
512           ProcessMethod(methods, child);
513           break;
514         case DW_TAG_inheritance:
515           base_classes.push_back(GetIdForEntry(child));
516           ProcessBaseClass(child);
517           break;
518         case DW_TAG_structure_type:
519         case DW_TAG_class_type:
520         case DW_TAG_union_type:
521         case DW_TAG_enumeration_type:
522         case DW_TAG_typedef:
523         case DW_TAG_const_type:
524         case DW_TAG_volatile_type:
525         case DW_TAG_restrict_type:
526         case DW_TAG_atomic_type:
527         case DW_TAG_array_type:
528         case DW_TAG_pointer_type:
529         case DW_TAG_reference_type:
530         case DW_TAG_rvalue_reference_type:
531         case DW_TAG_ptr_to_member_type:
532         case DW_TAG_unspecified_type:
533         case DW_TAG_variable:
534           Process(child);
535           break;
536         case DW_TAG_imported_declaration:
537         case DW_TAG_imported_module:
538           // For now information there is useless for ABI monitoring, but we
539           // need to check that there is no missing information in descendants.
540           CheckNoChildren(child);
541           break;
542         case DW_TAG_template_type_parameter:
543         case DW_TAG_template_value_parameter:
544         case DW_TAG_GNU_template_template_param:
545         case DW_TAG_GNU_template_parameter_pack:
546           // We just skip these as neither GCC nor Clang seem to use them
547           // properly (resulting in no references to such DIEs).
548           break;
549         case DW_TAG_variant_part:
550           if (full_name.empty()) {
551             Die() << "Variant name should not be empty: "
552                   << EntryToString(entry);
553           }
554           variant_and_members = GetVariantAndMembers(child);
555           break;
556         default:
557           Die() << "Unexpected tag for child of struct/class/union: "
558                 << Hex(child_tag) << ", " << EntryToString(child);
559       }
560     }
561 
562     if (variant_and_members.has_value()) {
563       // Add a Variant node since this entry represents a variant rather than a
564       // struct or union.
565       const Id id =
566           AddProcessedNode<Variant>(entry, full_name, GetByteSize(entry),
567                                     variant_and_members->discriminant,
568                                     std::move(variant_and_members->members));
569       AddNamedTypeNode(id);
570       return;
571     }
572 
573     if (entry.GetFlag(DW_AT_declaration) ||
574         !ShouldKeepDefinition(entry, type_name)) {
575       // Declaration may have partial information about members or method.
576       // We only need to parse children for information that will be needed in
577       // complete definition, but don't need to store them in incomplete node.
578       AddProcessedNode<StructUnion>(entry, kind, full_name);
579       return;
580     }
581 
582     const auto byte_size = GetByteSize(entry);
583 
584     const Id id = AddProcessedNode<StructUnion>(
585         entry, kind, full_name, byte_size, std::move(base_classes),
586         std::move(methods), std::move(members));
587     if (!full_name.empty()) {
588       AddNamedTypeNode(id);
589     }
590   }
591 
ProcessVariantMember(Entry & entry)592   void ProcessVariantMember(Entry& entry) {
593     // TODO: Process signed discriminant values.
594     auto dw_discriminant_value =
595         entry.MaybeGetUnsignedConstant(DW_AT_discr_value);
596     auto discriminant_value =
597         dw_discriminant_value
598             ? std::optional(static_cast<int64_t>(*dw_discriminant_value))
599             : std::nullopt;
600 
601     auto children = entry.GetChildren();
602     if (children.size() != 1) {
603       Die() << "Unexpected number of children for variant member: "
604             << EntryToString(entry);
605     }
606 
607     auto child = children[0];
608     if (child.GetTag() != DW_TAG_member) {
609       Die() << "Unexpected tag for variant member child: "
610             << Hex(child.GetTag()) << ", " << EntryToString(child);
611     }
612     if (GetDataBitOffset(child, 0, is_little_endian_binary_) != 0) {
613       Die() << "Unexpected data member location for variant member: "
614             << EntryToString(child);
615     }
616 
617     const std::string name = GetNameOrEmpty(child);
618     auto referred_type_id = GetReferredTypeId(GetReferredType(child));
619     AddProcessedNode<VariantMember>(entry, name, discriminant_value,
620                                     referred_type_id);
621   }
622 
ProcessMember(Entry & entry)623   void ProcessMember(Entry& entry) {
624     const auto name = GetNameOrEmpty(entry);
625     auto referred_type = GetReferredType(entry);
626     const Id referred_type_id = GetIdForEntry(referred_type);
627     auto optional_bit_size = entry.MaybeGetUnsignedConstant(DW_AT_bit_size);
628     // Member has DW_AT_bit_size if and only if it is bit field.
629     // STG uses bit_size == 0 to mark that the member is not a bit field.
630     Check(!optional_bit_size || *optional_bit_size > 0)
631         << "DW_AT_bit_size should be a positive number";
632     auto bit_size = optional_bit_size ? *optional_bit_size : 0;
633     AddProcessedNode<Member>(
634         entry, std::move(name), referred_type_id,
635         GetDataBitOffset(entry, bit_size, is_little_endian_binary_), bit_size);
636   }
637 
ProcessMethod(std::vector<Id> & methods,Entry & entry)638   void ProcessMethod(std::vector<Id>& methods, Entry& entry) {
639     Subprogram subprogram = GetSubprogram(entry);
640     auto id = maker_.Add<Function>(std::move(subprogram.node));
641     if (subprogram.external && subprogram.address) {
642       // Only external functions with address are useful for ABI monitoring
643       // TODO: cover virtual methods
644       const auto new_symbol_idx = result_.symbols.size();
645       result_.symbols.push_back(Types::Symbol{
646           .scoped_name = GetScopedNameForSymbol(
647               new_symbol_idx, subprogram.name_with_context),
648           .linkage_name = subprogram.linkage_name,
649           .address = *subprogram.address,
650           .type_id = id});
651     }
652     const auto virtuality = entry.MaybeGetUnsignedConstant(DW_AT_virtuality)
653                                  .value_or(DW_VIRTUALITY_none);
654     if (virtuality == DW_VIRTUALITY_virtual ||
655         virtuality == DW_VIRTUALITY_pure_virtual) {
656       if (!subprogram.name_with_context.unscoped_name) {
657         Die() << "Method " << EntryToString(entry) << " should have name";
658       }
659       if (subprogram.name_with_context.specification) {
660         Die() << "Method " << EntryToString(entry)
661               << " shouldn't have specification";
662       }
663       const auto vtable_offset = entry.MaybeGetVtableOffset().value_or(0);
664       methods.push_back(AddProcessedNode<Method>(
665           entry, subprogram.linkage_name,
666           *subprogram.name_with_context.unscoped_name, vtable_offset, id));
667     }
668   }
669 
ProcessBaseClass(Entry & entry)670   void ProcessBaseClass(Entry& entry) {
671     const Id type_id = GetReferredTypeId(GetReferredType(entry));
672     const auto byte_offset = entry.MaybeGetMemberByteOffset();
673     if (!byte_offset) {
674       Die() << "No offset found for base class " << EntryToString(entry);
675     }
676     const auto bit_offset = *byte_offset * 8;
677     const auto virtuality = entry.MaybeGetUnsignedConstant(DW_AT_virtuality)
678                                  .value_or(DW_VIRTUALITY_none);
679     BaseClass::Inheritance inheritance;
680     if (virtuality == DW_VIRTUALITY_none) {
681       inheritance = BaseClass::Inheritance::NON_VIRTUAL;
682     } else if (virtuality == DW_VIRTUALITY_virtual) {
683       inheritance = BaseClass::Inheritance::VIRTUAL;
684     } else {
685       Die() << "Unexpected base class virtuality: " << virtuality;
686     }
687     AddProcessedNode<BaseClass>(entry, type_id, bit_offset, inheritance);
688   }
689 
ProcessArray(Entry & entry)690   void ProcessArray(Entry& entry) {
691     auto referred_type = GetReferredType(entry);
692     Id referred_type_id = GetIdForEntry(referred_type);
693     auto children = entry.GetChildren();
694     // Multiple children in array describe multiple dimensions of this array.
695     // For example, int[M][N] contains two children, M located in the first
696     // child, N located in the second child. But in STG multidimensional arrays
697     // are represented as chain of arrays: int[M][N] is array[M] of array[N] of
698     // int.
699     //
700     // We need to chain children as types together in reversed order.
701     // "referred_type_id" is updated every time to contain the top element in
702     // the chain. Rightmost chldren refers to the original "referred_type_id".
703     for (auto it = children.rbegin(); it != children.rend(); ++it) {
704       auto& child = *it;
705       // All subarrays except the first (last in the reversed order) are
706       // attached to the corresponding child. First subarray (last in the
707       // reversed order) is attached to the original entry itself.
708       auto& entry_to_attach = (it + 1 == children.rend()) ? entry : child;
709       // Update referred_type_id so next array in chain points there.
710       referred_type_id = AddProcessedNode<Array>(
711           entry_to_attach, GetNumberOfElements(child), referred_type_id);
712     }
713   }
714 
ProcessEnum(Entry & entry)715   void ProcessEnum(Entry& entry) {
716     const auto type_name = GetNameOrEmpty(entry);
717     const auto full_name =
718         type_name.empty() ? type_name : scope_.name + type_name;
719 
720     if (entry.GetFlag(DW_AT_declaration)) {
721       // It is expected to have only name and no children in declaration.
722       // However, it is not guaranteed and we should do something if we find an
723       // example.
724       CheckNoChildren(entry);
725       AddProcessedNode<Enumeration>(entry, full_name);
726       return;
727     }
728     const Id underlying_type_id =
729         GetReferredTypeId(MaybeGetReferredType(entry));
730     auto children = entry.GetChildren();
731     Enumeration::Enumerators enumerators;
732     enumerators.reserve(children.size());
733     for (auto& child : children) {
734       auto child_tag = child.GetTag();
735       switch (child_tag) {
736         case DW_TAG_enumerator: {
737           const std::string enumerator_name = GetName(child);
738           // TODO: detect signedness of underlying type and call
739           // an appropriate method.
740           std::optional<size_t> value_optional =
741               child.MaybeGetUnsignedConstant(DW_AT_const_value);
742           Check(value_optional.has_value()) << "Enumerator should have value";
743           // TODO: support both uint64_t and int64_t, depending on
744           // signedness of underlying type.
745           enumerators.emplace_back(enumerator_name,
746                                    static_cast<int64_t>(*value_optional));
747           break;
748         }
749         case DW_TAG_subprogram:
750           // STG does not support virtual methods for enums.
751           Check(child.MaybeGetUnsignedConstant(DW_AT_virtuality)
752                     .value_or(DW_VIRTUALITY_none) == DW_VIRTUALITY_none)
753               << "Enums can not have virtual methods: " << EntryToString(child);
754           ProcessFunction(child);
755           break;
756         default:
757           Die() << "Unexpected tag for child of enum: " << Hex(child_tag)
758                 << ", " << EntryToString(child);
759       }
760     }
761     if (!ShouldKeepDefinition(entry, type_name)) {
762       AddProcessedNode<Enumeration>(entry, full_name);
763       return;
764     }
765     const Id id = AddProcessedNode<Enumeration>(
766         entry, full_name, underlying_type_id, std::move(enumerators));
767     if (!full_name.empty()) {
768       AddNamedTypeNode(id);
769     }
770   }
771 
772   struct VariantAndMembers {
773     std::optional<Id> discriminant;
774     std::vector<Id> members;
775   };
776 
GetVariantAndMembers(Entry & entry)777   VariantAndMembers GetVariantAndMembers(Entry& entry) {
778     std::vector<Id> members;
779     std::optional<Id> discriminant = std::nullopt;
780     auto discriminant_entry = entry.MaybeGetReference(DW_AT_discr);
781     if (discriminant_entry.has_value()) {
782       discriminant = GetIdForEntry(*discriminant_entry);
783       ProcessMember(*discriminant_entry);
784     }
785 
786     for (auto& child : entry.GetChildren()) {
787       auto child_tag = child.GetTag();
788       switch (child_tag) {
789         case DW_TAG_member: {
790           if (child.GetOffset() != discriminant_entry->GetOffset()) {
791             Die() << "Encountered rogue member for variant: "
792                   << EntryToString(entry);
793           }
794           if (!child.GetFlag(DW_AT_artificial)) {
795             Die() << "Variant discriminant must be an artificial member: "
796                   << EntryToString(child);
797           }
798           break;
799         }
800         case DW_TAG_variant:
801           members.push_back(GetIdForEntry(child));
802           ProcessVariantMember(child);
803           break;
804         default:
805           Die() << "Unexpected tag for child of variant: " << Hex(child_tag)
806                 << ", " << EntryToString(child);
807       }
808     }
809     return VariantAndMembers{.discriminant = discriminant,
810                              .members = std::move(members)};
811   }
812 
813   struct NameWithContext {
814     std::optional<Dwarf_Off> specification;
815     std::optional<std::string> unscoped_name;
816     std::optional<std::string> scoped_name;
817   };
818 
GetNameWithContext(Entry & entry)819   NameWithContext GetNameWithContext(Entry& entry) {
820     NameWithContext result;
821     // Leaf of specification tree is usually a declaration (of a function or a
822     // method). Then goes definition, which references declaration by
823     // DW_AT_specification. And on top we have instantiation, which references
824     // definition by DW_AT_abstract_origin. In the worst case we have:
825     // * instantiation
826     //     >-DW_AT_abstract_origin-> definition
827     //         >-DW_AT_specification-> declaration
828     //
829     // By using attribute integration we fold all information from definition to
830     // instantiation, flattening hierarchy:
831     // * instantiation + definition
832     //     >-DW_AT_specification-> declaration
833     // NB: DW_AT_abstract_origin attribute is also visible, but it should be
834     // ignored, since we already used it during integration.
835     //
836     // We also need to support this case, when we don't have separate
837     // declaration:
838     // * instantiation +
839     //     >-DW_AT_abstract_origin -> definition
840     //
841     // So the final algorithm is to get final DW_AT_specification through the
842     // whole chain, or use DW_AT_abstract_origin if there is no
843     // DW_AT_specification.
844     if (auto specification = entry.MaybeGetReference(DW_AT_specification)) {
845       result.specification = specification->GetOffset();
846     } else if (auto abstract_origin =
847                    entry.MaybeGetReference(DW_AT_abstract_origin)) {
848       result.specification = abstract_origin->GetOffset();
849     }
850     result.unscoped_name = entry.MaybeGetDirectString(DW_AT_name);
851     if (!result.unscoped_name && !result.specification) {
852       // If there is no name and specification, then this entry is anonymous.
853       // Anonymous entries are modelled as the empty string and not nullopt.
854       // This allows us to fill and register scoped_name (also empty string) to
855       // be used in references.
856       result.unscoped_name = std::string();
857     }
858     if (result.unscoped_name) {
859       result.scoped_name = scope_.name + *result.unscoped_name;
860       scoped_names_.emplace_back(
861           entry.GetOffset(), *result.scoped_name);
862     }
863     return result;
864   }
865 
GetScopedNameForSymbol(size_t symbol_idx,const NameWithContext & name)866   std::string GetScopedNameForSymbol(size_t symbol_idx,
867                                      const NameWithContext& name) {
868     // This method is designed to resolve this topology:
869     //   A: specification=B
870     //   B: name="foo"
871     // Any other topologies are rejected:
872     //   * Name and specification in one DIE: checked right below.
873     //   * Chain of specifications will result in symbol referencing another
874     //     specification, which will not be in scoped_names_, because "name and
875     //     specification in one DIE" is rejected.
876     if (name.scoped_name) {
877       if (name.specification) {
878         Die() << "Entry has name " << *name.scoped_name
879               << " and specification " << Hex(*name.specification);
880       }
881       return *name.scoped_name;
882     }
883     if (name.specification) {
884       unresolved_symbol_specifications_.emplace_back(*name.specification,
885                                                      symbol_idx);
886       // Name will be filled in ResolveSymbolSpecifications
887       return {};
888     }
889     Die() << "Entry should have either name or specification";
890   }
891 
ProcessVariable(Entry & entry)892   void ProcessVariable(Entry& entry) {
893     auto name_with_context = GetNameWithContext(entry);
894 
895     auto referred_type = GetReferredType(entry);
896     const Id referred_type_id = GetIdForEntry(referred_type);
897 
898     if (auto address = entry.MaybeGetAddress(DW_AT_location)) {
899       // Only external variables with address are useful for ABI monitoring
900       const auto new_symbol_idx = result_.symbols.size();
901       result_.symbols.push_back(Types::Symbol{
902           .scoped_name = GetScopedNameForSymbol(
903               new_symbol_idx, name_with_context),
904           .linkage_name = GetLinkageName(version_, entry),
905           .address = *address,
906           .type_id = referred_type_id});
907     }
908   }
909 
ProcessFunction(Entry & entry)910   void ProcessFunction(Entry& entry) {
911     Subprogram subprogram = GetSubprogram(entry);
912     const Id id = AddProcessedNode<Function>(entry, std::move(subprogram.node));
913     if (subprogram.external && subprogram.address) {
914       // Only external functions with address are useful for ABI monitoring
915       const auto new_symbol_idx = result_.symbols.size();
916       result_.symbols.push_back(Types::Symbol{
917           .scoped_name = GetScopedNameForSymbol(
918               new_symbol_idx, subprogram.name_with_context),
919           .linkage_name = std::move(subprogram.linkage_name),
920           .address = *subprogram.address,
921           .type_id = id});
922     }
923   }
924 
925   struct Subprogram {
926     Function node;
927     NameWithContext name_with_context;
928     std::string linkage_name;
929     std::optional<Address> address;
930     bool external;
931   };
932 
GetSubprogram(Entry & entry)933   Subprogram GetSubprogram(Entry& entry) {
934     const Id return_type_id = GetReferredTypeId(MaybeGetReferredType(entry));
935 
936     std::vector<Id> parameters;
937     for (auto& child : entry.GetChildren()) {
938       auto child_tag = child.GetTag();
939       switch (child_tag) {
940         case DW_TAG_formal_parameter:
941           parameters.push_back(GetReferredTypeId(GetReferredType(child)));
942           break;
943         case DW_TAG_unspecified_parameters:
944           // Note: C++ allows a single ... argument specification but C does
945           // not. However, "extern int foo();" (note lack of "void" in
946           // parameters) in C will produce the same DWARF as "extern int
947           // foo(...);" in C++.
948           CheckNoChildren(child);
949           parameters.push_back(variadic_id_);
950           break;
951         case DW_TAG_enumeration_type:
952         case DW_TAG_label:
953         case DW_TAG_lexical_block:
954         case DW_TAG_structure_type:
955         case DW_TAG_class_type:
956         case DW_TAG_union_type:
957         case DW_TAG_typedef:
958         case DW_TAG_const_type:
959         case DW_TAG_volatile_type:
960         case DW_TAG_restrict_type:
961         case DW_TAG_atomic_type:
962         case DW_TAG_array_type:
963         case DW_TAG_pointer_type:
964         case DW_TAG_reference_type:
965         case DW_TAG_rvalue_reference_type:
966         case DW_TAG_ptr_to_member_type:
967         case DW_TAG_unspecified_type:
968         case DW_TAG_inlined_subroutine:
969         case DW_TAG_subprogram:
970         case DW_TAG_variable:
971         case DW_TAG_call_site:
972         case DW_TAG_GNU_call_site:
973           // TODO: Do not leak local types outside this scope.
974           // TODO: It would be better to not process any
975           // information that is function local but there is a dangling
976           // reference Clang bug.
977           Process(child);
978           break;
979         case DW_TAG_imported_declaration:
980         case DW_TAG_imported_module:
981           // For now information there is useless for ABI monitoring, but we
982           // need to check that there is no missing information in descendants.
983           CheckNoChildren(child);
984           break;
985         case DW_TAG_template_type_parameter:
986         case DW_TAG_template_value_parameter:
987         case DW_TAG_GNU_template_template_param:
988         case DW_TAG_GNU_template_parameter_pack:
989           // We just skip these as neither GCC nor Clang seem to use them
990           // properly (resulting in no references to such DIEs).
991           break;
992         case DW_TAG_GNU_formal_parameter_pack:
993           // https://wiki.dwarfstd.org/C++0x_Variadic_templates.md
994           //
995           // As per this (rejected) proposal, GCC includes parameters as
996           // children of this DIE.
997           for (auto& child2 : child.GetChildren()) {
998             if (child2.GetTag() == DW_TAG_formal_parameter) {
999               parameters.push_back(GetReferredTypeId(GetReferredType(child2)));
1000             }
1001           }
1002           break;
1003         default:
1004           Die() << "Unexpected tag for child of function: " << Hex(child_tag)
1005                 << ", " << EntryToString(child);
1006       }
1007     }
1008 
1009     return Subprogram{.node = Function(return_type_id, parameters),
1010                       .name_with_context = GetNameWithContext(entry),
1011                       .linkage_name = GetLinkageName(version_, entry),
1012                       .address = entry.MaybeGetAddress(DW_AT_low_pc),
1013                       .external = entry.GetFlag(DW_AT_external)};
1014   }
1015 
1016   // Allocate or get already allocated STG Id for Entry.
GetIdForEntry(Entry & entry)1017   Id GetIdForEntry(Entry& entry) {
1018     return maker_.Get(Hex(entry.GetOffset()));
1019   }
1020 
1021   // Same as GetIdForEntry, but returns "void_id_" for "unspecified" references,
1022   // because it is normal for DWARF (5.2 Unspecified Type Entries).
GetReferredTypeId(std::optional<Entry> referred_type)1023   Id GetReferredTypeId(std::optional<Entry> referred_type) {
1024     return referred_type ? GetIdForEntry(*referred_type) : void_id_;
1025   }
1026 
1027   // Wrapper for GetIdForEntry to allow lvalues.
GetReferredTypeId(Entry referred_type)1028   Id GetReferredTypeId(Entry referred_type) {
1029     return GetIdForEntry(referred_type);
1030   }
1031 
1032   // Populate Id from method above with processed Node.
1033   template <typename Node, typename... Args>
AddProcessedNode(Entry & entry,Args &&...args)1034   Id AddProcessedNode(Entry& entry, Args&&... args) {
1035     return maker_.Set<Node>(Hex(entry.GetOffset()),
1036                             std::forward<Args>(args)...);
1037   }
1038 
AddNamedTypeNode(Id id)1039   void AddNamedTypeNode(Id id) {
1040     if (scope_.named) {
1041       result_.named_type_ids.push_back(id);
1042     }
1043   }
1044 
1045   Maker<Hex<Dwarf_Off>> maker_;
1046   Id void_id_;
1047   Id variadic_id_;
1048   bool is_little_endian_binary_;
1049   const std::unique_ptr<Filter>& file_filter_;
1050   Types& result_;
1051   std::vector<std::pair<Dwarf_Off, std::string>> scoped_names_;
1052   std::vector<std::pair<Dwarf_Off, size_t>> unresolved_symbol_specifications_;
1053 
1054   // Current scope.
1055   Scope scope_;
1056   int version_;
1057   dwarf::Files files_;
1058   uint64_t language_;
1059 };
1060 
Process(Dwarf * dwarf,bool is_little_endian_binary,const std::unique_ptr<Filter> & file_filter,Graph & graph)1061 Types Process(Dwarf* dwarf, bool is_little_endian_binary,
1062               const std::unique_ptr<Filter>& file_filter, Graph& graph) {
1063   Types result;
1064 
1065   if (dwarf == nullptr) {
1066     return result;
1067   }
1068 
1069   const Id void_id = graph.Add<Special>(Special::Kind::VOID);
1070   const Id variadic_id = graph.Add<Special>(Special::Kind::VARIADIC);
1071   // TODO: Scope Processor to compilation units?
1072   Processor processor(graph, void_id, variadic_id, is_little_endian_binary,
1073                       file_filter, result);
1074   for (auto& compilation_unit : GetCompilationUnits(*dwarf)) {
1075     // Could fetch top-level attributes like compiler here.
1076     processor.ProcessCompilationUnit(compilation_unit);
1077   }
1078   processor.ResolveSymbolSpecifications();
1079 
1080   return result;
1081 }
1082 
1083 }  // namespace dwarf
1084 }  // namespace stg
1085