1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2022-2023 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License. You may obtain a copy of the License at
9 //
10 // https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Aleksei Vetrov
19
20 #include "dwarf_processor.h"
21
22 #include <dwarf.h>
23 #include <elfutils/libdw.h>
24
25 #include <algorithm>
26 #include <cstddef>
27 #include <cstdint>
28 #include <memory>
29 #include <optional>
30 #include <sstream>
31 #include <string>
32 #include <string_view>
33 #include <utility>
34 #include <vector>
35
36 #include "dwarf_wrappers.h"
37 #include "error.h"
38 #include "filter.h"
39 #include "hex.h"
40 #include "graph.h"
41 #include "scope.h"
42
43 namespace stg {
44 namespace dwarf {
45
46 namespace {
47
HasIncompleteTypes(uint64_t language)48 bool HasIncompleteTypes(uint64_t language) {
49 return language != DW_LANG_Rust;
50 }
51
EntryToString(Entry & entry)52 std::string EntryToString(Entry& entry) {
53 std::ostringstream os;
54 os << "DWARF entry <" << Hex(entry.GetOffset()) << ">";
55 return os.str();
56 }
57
MaybeGetName(Entry & entry)58 std::optional<std::string> MaybeGetName(Entry& entry) {
59 return entry.MaybeGetString(DW_AT_name);
60 }
61
GetName(Entry & entry)62 std::string GetName(Entry& entry) {
63 auto result = MaybeGetName(entry);
64 if (!result.has_value()) {
65 Die() << "Name was not found for " << EntryToString(entry);
66 }
67 return std::move(*result);
68 }
69
GetNameOrEmpty(Entry & entry)70 std::string GetNameOrEmpty(Entry& entry) {
71 auto result = MaybeGetName(entry);
72 if (!result.has_value()) {
73 return {};
74 }
75 return std::move(*result);
76 }
77
GetLinkageName(int version,Entry & entry)78 std::string GetLinkageName(int version, Entry& entry) {
79 auto linkage_name = entry.MaybeGetString(
80 version < 4 ? DW_AT_MIPS_linkage_name : DW_AT_linkage_name);
81 if (linkage_name.has_value()) {
82 return std::move(*linkage_name);
83 }
84 return GetNameOrEmpty(entry);
85 }
86
GetBitSize(Entry & entry)87 size_t GetBitSize(Entry& entry) {
88 if (auto byte_size = entry.MaybeGetUnsignedConstant(DW_AT_byte_size)) {
89 return *byte_size * 8;
90 } else if (auto bit_size = entry.MaybeGetUnsignedConstant(DW_AT_bit_size)) {
91 return *bit_size;
92 }
93 Die() << "Bit size was not found for " << EntryToString(entry);
94 }
95
GetByteSize(Entry & entry)96 size_t GetByteSize(Entry& entry) {
97 if (auto byte_size = entry.MaybeGetUnsignedConstant(DW_AT_byte_size)) {
98 return *byte_size;
99 } else if (auto bit_size = entry.MaybeGetUnsignedConstant(DW_AT_bit_size)) {
100 // Round up bit_size / 8 to get minimal needed storage size in bytes.
101 return (*bit_size + 7) / 8;
102 }
103 Die() << "Byte size was not found for " << EntryToString(entry);
104 }
105
GetEncoding(Entry & entry)106 Primitive::Encoding GetEncoding(Entry& entry) {
107 const auto dwarf_encoding = entry.MaybeGetUnsignedConstant(DW_AT_encoding);
108 if (!dwarf_encoding) {
109 Die() << "Encoding was not found for " << EntryToString(entry);
110 }
111 switch (*dwarf_encoding) {
112 case DW_ATE_boolean:
113 return Primitive::Encoding::BOOLEAN;
114 case DW_ATE_complex_float:
115 return Primitive::Encoding::COMPLEX_NUMBER;
116 case DW_ATE_float:
117 return Primitive::Encoding::REAL_NUMBER;
118 case DW_ATE_signed:
119 return Primitive::Encoding::SIGNED_INTEGER;
120 case DW_ATE_signed_char:
121 return Primitive::Encoding::SIGNED_CHARACTER;
122 case DW_ATE_unsigned:
123 return Primitive::Encoding::UNSIGNED_INTEGER;
124 case DW_ATE_unsigned_char:
125 return Primitive::Encoding::UNSIGNED_CHARACTER;
126 case DW_ATE_UTF:
127 return Primitive::Encoding::UTF;
128 default:
129 Die() << "Unknown encoding " << Hex(*dwarf_encoding) << " for "
130 << EntryToString(entry);
131 }
132 }
133
MaybeGetReferredType(Entry & entry)134 std::optional<Entry> MaybeGetReferredType(Entry& entry) {
135 return entry.MaybeGetReference(DW_AT_type);
136 }
137
GetReferredType(Entry & entry)138 Entry GetReferredType(Entry& entry) {
139 auto result = MaybeGetReferredType(entry);
140 if (!result.has_value()) {
141 Die() << "Type reference was not found in " << EntryToString(entry);
142 }
143 return *result;
144 }
145
GetNumberOfElements(Entry & entry)146 size_t GetNumberOfElements(Entry& entry) {
147 // DWARF standard says, that array dimensions could be an entry with
148 // either DW_TAG_subrange_type or DW_TAG_enumeration_type. However, this
149 // code supports only the DW_TAG_subrange_type.
150 Check(entry.GetTag() == DW_TAG_subrange_type)
151 << "Array's dimensions should be an entry of DW_TAG_subrange_type";
152 std::optional<size_t> number_of_elements = entry.MaybeGetCount();
153 if (number_of_elements) {
154 return *number_of_elements;
155 }
156 // If a subrange has no DW_AT_count and no DW_AT_upper_bound attribute, its
157 // size is unknown.
158 return 0;
159 }
160
161 // Calculate number of bits from the "beginning" of the containing entity to
162 // the "beginning" of the data member using DW_AT_bit_offset.
163 //
164 // "Number of bits from the beginning", depends on the definition of the
165 // "beginning", which is different for big- and little-endian architectures.
166 // However, DW_AT_bit_offset is defined from the high order bit of the storage
167 // unit to the high order bit of a field and is the same for both architectures.
168
169 // So this function converts DW_AT_bit_offset to the "number of bits from the
170 // beginning".
CalculateBitfieldAdjustment(Entry & entry,size_t bit_size,bool is_little_endian_binary)171 size_t CalculateBitfieldAdjustment(Entry& entry, size_t bit_size,
172 bool is_little_endian_binary) {
173 if (bit_size == 0) {
174 // bit_size == 0 marks that it is not a bit field. No adjustment needed.
175 return 0;
176 }
177 auto container_byte_size = entry.MaybeGetUnsignedConstant(DW_AT_byte_size);
178 auto bit_offset = entry.MaybeGetUnsignedConstant(DW_AT_bit_offset);
179 Check(container_byte_size.has_value() && bit_offset.has_value())
180 << "If member offset is defined as DW_AT_data_member_location, bit field "
181 "should have DW_AT_byte_size and DW_AT_bit_offset";
182 // The following structure will be used as an example in the explanations:
183 // struct foo {
184 // uint16_t rest_of_the_struct;
185 // uint16_t x : 5;
186 // uint16_t y : 6;
187 // uint16_t z : 5;
188 // };
189 if (is_little_endian_binary) {
190 // Compiler usualy packs bit fields starting with the least significant
191 // bits, but DW_AT_bit_offset is counted from high to low bits:
192 //
193 // rest of the struct|< container >
194 // Container bits: 01234|56789A|BCDEF
195 // Bit-fields' bits: 01234|012345|01234
196 // bit_offset: <<<<B<<<<<<5<<<<<0
197 // bits from start: 0>>>>>5>>>>>>B>>>>
198 // <x:5>|< y:6>|<z:5>
199 //
200 // x.bit_offset: 11 (0xB) bits
201 // y.bit_offset: 5 bits
202 // z.bit_offset: 0 bits
203 //
204 // So we need to subtract bit_offset from the container bit size
205 // (container_byte_size * 8) to inverse direction. Also we need to convert
206 // from high- to low-order bit, because the field "begins" with low-order
207 // bit. To do so we need to subtract field's bit size. Resulting formula is:
208 //
209 // container_byte_size * 8 - bit_offset - bit_size
210 //
211 // If we try it on example, we get correct values:
212 // x: 2 * 8 - 11 - 5 = 0
213 // y: 2 * 8 - 5 - 6 = 5
214 // z: 2 * 8 - 0 - 5 = 11 (0xB)
215 return *container_byte_size * 8 - *bit_offset - bit_size;
216 }
217 // Big-endian orders begins with high-order bit and the bit_offset is from the
218 // high order bit:
219 //
220 // rest of the struct|< container >
221 // Container bits: FEDCB|A98765|43210
222 // Bit-fields' bits: 43210|543210|43210
223 // bit_offset: 0>>>>>5>>>>>>B>>>>
224 // bits from start: 0>>>>>5>>>>>>B>>>>
225 // <x:5>|< y:6>|<z:5>
226 //
227 // So we just return bit_offset.
228 return *bit_offset;
229 }
230
231 // Calculate the number of bits from the beginning of the structure to the
232 // beginning of the data member.
GetDataBitOffset(Entry & entry,size_t bit_size,bool is_little_endian_binary)233 size_t GetDataBitOffset(Entry& entry, size_t bit_size,
234 bool is_little_endian_binary) {
235 // Offset may be represented either by DW_AT_data_bit_offset (in bits) or by
236 // DW_AT_data_member_location (in bytes).
237 if (auto data_bit_offset =
238 entry.MaybeGetUnsignedConstant(DW_AT_data_bit_offset)) {
239 // DW_AT_data_bit_offset contains what this function needs for any type
240 // of member (bitfield or not) on architecture of any endianness.
241 return *data_bit_offset;
242 } else if (auto byte_offset = entry.MaybeGetMemberByteOffset()) {
243 // DW_AT_data_member_location contains offset in bytes.
244 const size_t bit_offset = *byte_offset * 8;
245 // But there can be offset part, coming from DW_AT_bit_offset. DWARF 5
246 // standard requires to use DW_AT_data_bit_offset in this case, but a lot
247 // of binaries still use combination of DW_AT_data_member_location and
248 // DW_AT_bit_offset.
249 const size_t bitfield_adjusment =
250 CalculateBitfieldAdjustment(entry, bit_size, is_little_endian_binary);
251 return bit_offset + bitfield_adjusment;
252 } else {
253 // If the beginning of the data member is the same as the beginning of the
254 // containing entity then neither attribute is required.
255 return 0;
256 }
257 }
258
259 } // namespace
260
261 // Transforms DWARF entries to STG.
262 class Processor {
263 public:
Processor(Graph & graph,Id void_id,Id variadic_id,bool is_little_endian_binary,const std::unique_ptr<Filter> & file_filter,Types & result)264 Processor(Graph& graph, Id void_id, Id variadic_id,
265 bool is_little_endian_binary,
266 const std::unique_ptr<Filter>& file_filter, Types& result)
267 : maker_(graph),
268 void_id_(void_id),
269 variadic_id_(variadic_id),
270 is_little_endian_binary_(is_little_endian_binary),
271 file_filter_(file_filter),
272 result_(result) {}
273
ProcessCompilationUnit(CompilationUnit & compilation_unit)274 void ProcessCompilationUnit(CompilationUnit& compilation_unit) {
275 version_ = compilation_unit.version;
276 if (file_filter_ != nullptr) {
277 files_ = dwarf::Files(compilation_unit.entry);
278 }
279 Process(compilation_unit.entry);
280 }
281
ResolveSymbolSpecifications()282 void ResolveSymbolSpecifications() {
283 std::sort(unresolved_symbol_specifications_.begin(),
284 unresolved_symbol_specifications_.end());
285 std::sort(scoped_names_.begin(), scoped_names_.end());
286 auto symbols_it = unresolved_symbol_specifications_.begin();
287 auto names_it = scoped_names_.begin();
288 while (symbols_it != unresolved_symbol_specifications_.end()) {
289 while (names_it != scoped_names_.end() &&
290 names_it->first < symbols_it->first) {
291 ++names_it;
292 }
293 if (names_it == scoped_names_.end() ||
294 names_it->first != symbols_it->first) {
295 Die() << "Scoped name not found for entry " << Hex(symbols_it->first);
296 }
297 result_.symbols[symbols_it->second].scoped_name = names_it->second;
298 ++symbols_it;
299 }
300 }
301
302 private:
Process(Entry & entry)303 void Process(Entry& entry) {
304 try {
305 return ProcessInternal(entry);
306 } catch (Exception& e) {
307 std::ostringstream os;
308 os << "processing DIE " << Hex(entry.GetOffset());
309 e.Add(os.str());
310 throw;
311 }
312 }
313
ProcessInternal(Entry & entry)314 void ProcessInternal(Entry& entry) {
315 ++result_.processed_entries;
316 const auto tag = entry.GetTag();
317 switch (tag) {
318 case DW_TAG_array_type:
319 ProcessArray(entry);
320 break;
321 case DW_TAG_enumeration_type:
322 ProcessEnum(entry);
323 break;
324 case DW_TAG_class_type:
325 ProcessStructUnion(entry, StructUnion::Kind::STRUCT);
326 break;
327 case DW_TAG_structure_type:
328 ProcessStructUnion(entry, StructUnion::Kind::STRUCT);
329 break;
330 case DW_TAG_union_type:
331 ProcessStructUnion(entry, StructUnion::Kind::UNION);
332 break;
333 case DW_TAG_member:
334 Die() << "DW_TAG_member outside of struct/class/union";
335 break;
336 case DW_TAG_pointer_type:
337 ProcessReference<PointerReference>(
338 entry, PointerReference::Kind::POINTER);
339 break;
340 case DW_TAG_reference_type:
341 ProcessReference<PointerReference>(
342 entry, PointerReference::Kind::LVALUE_REFERENCE);
343 break;
344 case DW_TAG_rvalue_reference_type:
345 ProcessReference<PointerReference>(
346 entry, PointerReference::Kind::RVALUE_REFERENCE);
347 break;
348 case DW_TAG_ptr_to_member_type:
349 ProcessPointerToMember(entry);
350 break;
351 case DW_TAG_unspecified_type:
352 ProcessUnspecifiedType(entry);
353 break;
354 case DW_TAG_compile_unit:
355 language_ = entry.MustGetUnsignedConstant(DW_AT_language);
356 ProcessAllChildren(entry);
357 break;
358 case DW_TAG_typedef:
359 ProcessTypedef(entry);
360 break;
361 case DW_TAG_base_type:
362 ProcessBaseType(entry);
363 break;
364 case DW_TAG_const_type:
365 ProcessReference<Qualified>(entry, Qualifier::CONST);
366 break;
367 case DW_TAG_volatile_type:
368 ProcessReference<Qualified>(entry, Qualifier::VOLATILE);
369 break;
370 case DW_TAG_restrict_type:
371 ProcessReference<Qualified>(entry, Qualifier::RESTRICT);
372 break;
373 case DW_TAG_atomic_type:
374 // TODO: test pending BTF / test suite support
375 ProcessReference<Qualified>(entry, Qualifier::ATOMIC);
376 break;
377 case DW_TAG_variable:
378 // Process only variables visible externally
379 if (entry.GetFlag(DW_AT_external)) {
380 ProcessVariable(entry);
381 }
382 break;
383 case DW_TAG_subroutine_type:
384 // Standalone function type, for example, used in function pointers.
385 ProcessFunction(entry);
386 break;
387 case DW_TAG_subprogram:
388 // DWARF equivalent of ELF function symbol.
389 ProcessFunction(entry);
390 break;
391 case DW_TAG_namespace:
392 ProcessNamespace(entry);
393 break;
394 case DW_TAG_lexical_block:
395 ProcessAllChildren(entry);
396 break;
397
398 default:
399 // TODO: die on unexpected tag, when this switch contains
400 // all expected tags
401 break;
402 }
403 }
404
ProcessAllChildren(Entry & entry)405 void ProcessAllChildren(Entry& entry) {
406 for (auto& child : entry.GetChildren()) {
407 Process(child);
408 }
409 }
410
CheckNoChildren(Entry & entry)411 void CheckNoChildren(Entry& entry) {
412 if (!entry.GetChildren().empty()) {
413 Die() << "Entry expected to have no children";
414 }
415 }
416
ProcessNamespace(Entry & entry)417 void ProcessNamespace(Entry& entry) {
418 const auto name = GetNameOrEmpty(entry);
419 const PushScopeName push_scope_name(scope_, "namespace", name);
420 ProcessAllChildren(entry);
421 }
422
ProcessBaseType(Entry & entry)423 void ProcessBaseType(Entry& entry) {
424 CheckNoChildren(entry);
425 const auto type_name = GetName(entry);
426 const size_t bit_size = GetBitSize(entry);
427 if (bit_size % 8) {
428 Die() << "type '" << type_name << "' size is not a multiple of 8";
429 }
430 const size_t byte_size = bit_size / 8;
431 AddProcessedNode<Primitive>(entry, type_name, GetEncoding(entry),
432 byte_size);
433 }
434
ProcessTypedef(Entry & entry)435 void ProcessTypedef(Entry& entry) {
436 const auto type_name = GetName(entry);
437 const auto full_name = scope_.name + type_name;
438 const Id referred_type_id = GetReferredTypeId(MaybeGetReferredType(entry));
439 const Id id = AddProcessedNode<Typedef>(entry, full_name, referred_type_id);
440 if (!ShouldKeepDefinition(entry, type_name)) {
441 // We always model (and keep) typedef definitions. But we should exclude
442 // filtered out types from being type roots.
443 return;
444 }
445 AddNamedTypeNode(id);
446 }
447
448 template<typename Node, typename KindType>
ProcessReference(Entry & entry,KindType kind)449 void ProcessReference(Entry& entry, KindType kind) {
450 const Id referred_type_id = GetReferredTypeId(MaybeGetReferredType(entry));
451 AddProcessedNode<Node>(entry, kind, referred_type_id);
452 }
453
ProcessPointerToMember(Entry & entry)454 void ProcessPointerToMember(Entry& entry) {
455 const Id containing_type_id =
456 GetReferredTypeId(entry.MaybeGetReference(DW_AT_containing_type));
457 const Id pointee_type_id = GetReferredTypeId(MaybeGetReferredType(entry));
458 AddProcessedNode<PointerToMember>(entry, containing_type_id,
459 pointee_type_id);
460 }
461
ProcessUnspecifiedType(Entry & entry)462 void ProcessUnspecifiedType(Entry& entry) {
463 const std::string type_name = GetName(entry);
464 Check(type_name == "decltype(nullptr)")
465 << "Unsupported DW_TAG_unspecified_type: " << type_name;
466 AddProcessedNode<Special>(entry, Special::Kind::NULLPTR);
467 }
468
ShouldKeepDefinition(Entry & entry,const std::string & name) const469 bool ShouldKeepDefinition(Entry& entry, const std::string& name) const {
470 if (!HasIncompleteTypes(language_) || file_filter_ == nullptr) {
471 return true;
472 }
473 const auto file = files_.MaybeGetFile(entry, DW_AT_decl_file);
474 if (!file) {
475 // Built in types that do not have DW_AT_decl_file should be preserved.
476 static constexpr std::string_view kBuiltinPrefix = "__";
477 if (name.starts_with(kBuiltinPrefix)) {
478 return true;
479 }
480 Die() << "File filter is provided, but " << name << " ("
481 << EntryToString(entry) << ") doesn't have DW_AT_decl_file";
482 }
483 return (*file_filter_)(*file);
484 }
485
ProcessStructUnion(Entry & entry,StructUnion::Kind kind)486 void ProcessStructUnion(Entry& entry, StructUnion::Kind kind) {
487 const auto type_name = GetNameOrEmpty(entry);
488 const auto full_name =
489 type_name.empty() ? type_name : scope_.name + type_name;
490 const PushScopeName push_scope_name(scope_, kind, type_name);
491
492 std::vector<Id> base_classes;
493 std::vector<Id> members;
494 std::vector<Id> methods;
495 std::optional<VariantAndMembers> variant_and_members = std::nullopt;
496
497 for (auto& child : entry.GetChildren()) {
498 auto child_tag = child.GetTag();
499 // All possible children of struct/class/union
500 switch (child_tag) {
501 case DW_TAG_member:
502 if (child.GetFlag(DW_AT_external)) {
503 // static members are interpreted as variables and not included in
504 // members.
505 ProcessVariable(child);
506 } else {
507 members.push_back(GetIdForEntry(child));
508 ProcessMember(child);
509 }
510 break;
511 case DW_TAG_subprogram:
512 ProcessMethod(methods, child);
513 break;
514 case DW_TAG_inheritance:
515 base_classes.push_back(GetIdForEntry(child));
516 ProcessBaseClass(child);
517 break;
518 case DW_TAG_structure_type:
519 case DW_TAG_class_type:
520 case DW_TAG_union_type:
521 case DW_TAG_enumeration_type:
522 case DW_TAG_typedef:
523 case DW_TAG_const_type:
524 case DW_TAG_volatile_type:
525 case DW_TAG_restrict_type:
526 case DW_TAG_atomic_type:
527 case DW_TAG_array_type:
528 case DW_TAG_pointer_type:
529 case DW_TAG_reference_type:
530 case DW_TAG_rvalue_reference_type:
531 case DW_TAG_ptr_to_member_type:
532 case DW_TAG_unspecified_type:
533 case DW_TAG_variable:
534 Process(child);
535 break;
536 case DW_TAG_imported_declaration:
537 case DW_TAG_imported_module:
538 // For now information there is useless for ABI monitoring, but we
539 // need to check that there is no missing information in descendants.
540 CheckNoChildren(child);
541 break;
542 case DW_TAG_template_type_parameter:
543 case DW_TAG_template_value_parameter:
544 case DW_TAG_GNU_template_template_param:
545 case DW_TAG_GNU_template_parameter_pack:
546 // We just skip these as neither GCC nor Clang seem to use them
547 // properly (resulting in no references to such DIEs).
548 break;
549 case DW_TAG_variant_part:
550 if (full_name.empty()) {
551 Die() << "Variant name should not be empty: "
552 << EntryToString(entry);
553 }
554 variant_and_members = GetVariantAndMembers(child);
555 break;
556 default:
557 Die() << "Unexpected tag for child of struct/class/union: "
558 << Hex(child_tag) << ", " << EntryToString(child);
559 }
560 }
561
562 if (variant_and_members.has_value()) {
563 // Add a Variant node since this entry represents a variant rather than a
564 // struct or union.
565 const Id id =
566 AddProcessedNode<Variant>(entry, full_name, GetByteSize(entry),
567 variant_and_members->discriminant,
568 std::move(variant_and_members->members));
569 AddNamedTypeNode(id);
570 return;
571 }
572
573 if (entry.GetFlag(DW_AT_declaration) ||
574 !ShouldKeepDefinition(entry, type_name)) {
575 // Declaration may have partial information about members or method.
576 // We only need to parse children for information that will be needed in
577 // complete definition, but don't need to store them in incomplete node.
578 AddProcessedNode<StructUnion>(entry, kind, full_name);
579 return;
580 }
581
582 const auto byte_size = GetByteSize(entry);
583
584 const Id id = AddProcessedNode<StructUnion>(
585 entry, kind, full_name, byte_size, std::move(base_classes),
586 std::move(methods), std::move(members));
587 if (!full_name.empty()) {
588 AddNamedTypeNode(id);
589 }
590 }
591
ProcessVariantMember(Entry & entry)592 void ProcessVariantMember(Entry& entry) {
593 // TODO: Process signed discriminant values.
594 auto dw_discriminant_value =
595 entry.MaybeGetUnsignedConstant(DW_AT_discr_value);
596 auto discriminant_value =
597 dw_discriminant_value
598 ? std::optional(static_cast<int64_t>(*dw_discriminant_value))
599 : std::nullopt;
600
601 auto children = entry.GetChildren();
602 if (children.size() != 1) {
603 Die() << "Unexpected number of children for variant member: "
604 << EntryToString(entry);
605 }
606
607 auto child = children[0];
608 if (child.GetTag() != DW_TAG_member) {
609 Die() << "Unexpected tag for variant member child: "
610 << Hex(child.GetTag()) << ", " << EntryToString(child);
611 }
612 if (GetDataBitOffset(child, 0, is_little_endian_binary_) != 0) {
613 Die() << "Unexpected data member location for variant member: "
614 << EntryToString(child);
615 }
616
617 const std::string name = GetNameOrEmpty(child);
618 auto referred_type_id = GetReferredTypeId(GetReferredType(child));
619 AddProcessedNode<VariantMember>(entry, name, discriminant_value,
620 referred_type_id);
621 }
622
ProcessMember(Entry & entry)623 void ProcessMember(Entry& entry) {
624 const auto name = GetNameOrEmpty(entry);
625 auto referred_type = GetReferredType(entry);
626 const Id referred_type_id = GetIdForEntry(referred_type);
627 auto optional_bit_size = entry.MaybeGetUnsignedConstant(DW_AT_bit_size);
628 // Member has DW_AT_bit_size if and only if it is bit field.
629 // STG uses bit_size == 0 to mark that the member is not a bit field.
630 Check(!optional_bit_size || *optional_bit_size > 0)
631 << "DW_AT_bit_size should be a positive number";
632 auto bit_size = optional_bit_size ? *optional_bit_size : 0;
633 AddProcessedNode<Member>(
634 entry, std::move(name), referred_type_id,
635 GetDataBitOffset(entry, bit_size, is_little_endian_binary_), bit_size);
636 }
637
ProcessMethod(std::vector<Id> & methods,Entry & entry)638 void ProcessMethod(std::vector<Id>& methods, Entry& entry) {
639 Subprogram subprogram = GetSubprogram(entry);
640 auto id = maker_.Add<Function>(std::move(subprogram.node));
641 if (subprogram.external && subprogram.address) {
642 // Only external functions with address are useful for ABI monitoring
643 // TODO: cover virtual methods
644 const auto new_symbol_idx = result_.symbols.size();
645 result_.symbols.push_back(Types::Symbol{
646 .scoped_name = GetScopedNameForSymbol(
647 new_symbol_idx, subprogram.name_with_context),
648 .linkage_name = subprogram.linkage_name,
649 .address = *subprogram.address,
650 .type_id = id});
651 }
652 const auto virtuality = entry.MaybeGetUnsignedConstant(DW_AT_virtuality)
653 .value_or(DW_VIRTUALITY_none);
654 if (virtuality == DW_VIRTUALITY_virtual ||
655 virtuality == DW_VIRTUALITY_pure_virtual) {
656 if (!subprogram.name_with_context.unscoped_name) {
657 Die() << "Method " << EntryToString(entry) << " should have name";
658 }
659 if (subprogram.name_with_context.specification) {
660 Die() << "Method " << EntryToString(entry)
661 << " shouldn't have specification";
662 }
663 const auto vtable_offset = entry.MaybeGetVtableOffset().value_or(0);
664 methods.push_back(AddProcessedNode<Method>(
665 entry, subprogram.linkage_name,
666 *subprogram.name_with_context.unscoped_name, vtable_offset, id));
667 }
668 }
669
ProcessBaseClass(Entry & entry)670 void ProcessBaseClass(Entry& entry) {
671 const Id type_id = GetReferredTypeId(GetReferredType(entry));
672 const auto byte_offset = entry.MaybeGetMemberByteOffset();
673 if (!byte_offset) {
674 Die() << "No offset found for base class " << EntryToString(entry);
675 }
676 const auto bit_offset = *byte_offset * 8;
677 const auto virtuality = entry.MaybeGetUnsignedConstant(DW_AT_virtuality)
678 .value_or(DW_VIRTUALITY_none);
679 BaseClass::Inheritance inheritance;
680 if (virtuality == DW_VIRTUALITY_none) {
681 inheritance = BaseClass::Inheritance::NON_VIRTUAL;
682 } else if (virtuality == DW_VIRTUALITY_virtual) {
683 inheritance = BaseClass::Inheritance::VIRTUAL;
684 } else {
685 Die() << "Unexpected base class virtuality: " << virtuality;
686 }
687 AddProcessedNode<BaseClass>(entry, type_id, bit_offset, inheritance);
688 }
689
ProcessArray(Entry & entry)690 void ProcessArray(Entry& entry) {
691 auto referred_type = GetReferredType(entry);
692 Id referred_type_id = GetIdForEntry(referred_type);
693 auto children = entry.GetChildren();
694 // Multiple children in array describe multiple dimensions of this array.
695 // For example, int[M][N] contains two children, M located in the first
696 // child, N located in the second child. But in STG multidimensional arrays
697 // are represented as chain of arrays: int[M][N] is array[M] of array[N] of
698 // int.
699 //
700 // We need to chain children as types together in reversed order.
701 // "referred_type_id" is updated every time to contain the top element in
702 // the chain. Rightmost chldren refers to the original "referred_type_id".
703 for (auto it = children.rbegin(); it != children.rend(); ++it) {
704 auto& child = *it;
705 // All subarrays except the first (last in the reversed order) are
706 // attached to the corresponding child. First subarray (last in the
707 // reversed order) is attached to the original entry itself.
708 auto& entry_to_attach = (it + 1 == children.rend()) ? entry : child;
709 // Update referred_type_id so next array in chain points there.
710 referred_type_id = AddProcessedNode<Array>(
711 entry_to_attach, GetNumberOfElements(child), referred_type_id);
712 }
713 }
714
ProcessEnum(Entry & entry)715 void ProcessEnum(Entry& entry) {
716 const auto type_name = GetNameOrEmpty(entry);
717 const auto full_name =
718 type_name.empty() ? type_name : scope_.name + type_name;
719
720 if (entry.GetFlag(DW_AT_declaration)) {
721 // It is expected to have only name and no children in declaration.
722 // However, it is not guaranteed and we should do something if we find an
723 // example.
724 CheckNoChildren(entry);
725 AddProcessedNode<Enumeration>(entry, full_name);
726 return;
727 }
728 const Id underlying_type_id =
729 GetReferredTypeId(MaybeGetReferredType(entry));
730 auto children = entry.GetChildren();
731 Enumeration::Enumerators enumerators;
732 enumerators.reserve(children.size());
733 for (auto& child : children) {
734 auto child_tag = child.GetTag();
735 switch (child_tag) {
736 case DW_TAG_enumerator: {
737 const std::string enumerator_name = GetName(child);
738 // TODO: detect signedness of underlying type and call
739 // an appropriate method.
740 std::optional<size_t> value_optional =
741 child.MaybeGetUnsignedConstant(DW_AT_const_value);
742 Check(value_optional.has_value()) << "Enumerator should have value";
743 // TODO: support both uint64_t and int64_t, depending on
744 // signedness of underlying type.
745 enumerators.emplace_back(enumerator_name,
746 static_cast<int64_t>(*value_optional));
747 break;
748 }
749 case DW_TAG_subprogram:
750 // STG does not support virtual methods for enums.
751 Check(child.MaybeGetUnsignedConstant(DW_AT_virtuality)
752 .value_or(DW_VIRTUALITY_none) == DW_VIRTUALITY_none)
753 << "Enums can not have virtual methods: " << EntryToString(child);
754 ProcessFunction(child);
755 break;
756 default:
757 Die() << "Unexpected tag for child of enum: " << Hex(child_tag)
758 << ", " << EntryToString(child);
759 }
760 }
761 if (!ShouldKeepDefinition(entry, type_name)) {
762 AddProcessedNode<Enumeration>(entry, full_name);
763 return;
764 }
765 const Id id = AddProcessedNode<Enumeration>(
766 entry, full_name, underlying_type_id, std::move(enumerators));
767 if (!full_name.empty()) {
768 AddNamedTypeNode(id);
769 }
770 }
771
772 struct VariantAndMembers {
773 std::optional<Id> discriminant;
774 std::vector<Id> members;
775 };
776
GetVariantAndMembers(Entry & entry)777 VariantAndMembers GetVariantAndMembers(Entry& entry) {
778 std::vector<Id> members;
779 std::optional<Id> discriminant = std::nullopt;
780 auto discriminant_entry = entry.MaybeGetReference(DW_AT_discr);
781 if (discriminant_entry.has_value()) {
782 discriminant = GetIdForEntry(*discriminant_entry);
783 ProcessMember(*discriminant_entry);
784 }
785
786 for (auto& child : entry.GetChildren()) {
787 auto child_tag = child.GetTag();
788 switch (child_tag) {
789 case DW_TAG_member: {
790 if (child.GetOffset() != discriminant_entry->GetOffset()) {
791 Die() << "Encountered rogue member for variant: "
792 << EntryToString(entry);
793 }
794 if (!child.GetFlag(DW_AT_artificial)) {
795 Die() << "Variant discriminant must be an artificial member: "
796 << EntryToString(child);
797 }
798 break;
799 }
800 case DW_TAG_variant:
801 members.push_back(GetIdForEntry(child));
802 ProcessVariantMember(child);
803 break;
804 default:
805 Die() << "Unexpected tag for child of variant: " << Hex(child_tag)
806 << ", " << EntryToString(child);
807 }
808 }
809 return VariantAndMembers{.discriminant = discriminant,
810 .members = std::move(members)};
811 }
812
813 struct NameWithContext {
814 std::optional<Dwarf_Off> specification;
815 std::optional<std::string> unscoped_name;
816 std::optional<std::string> scoped_name;
817 };
818
GetNameWithContext(Entry & entry)819 NameWithContext GetNameWithContext(Entry& entry) {
820 NameWithContext result;
821 // Leaf of specification tree is usually a declaration (of a function or a
822 // method). Then goes definition, which references declaration by
823 // DW_AT_specification. And on top we have instantiation, which references
824 // definition by DW_AT_abstract_origin. In the worst case we have:
825 // * instantiation
826 // >-DW_AT_abstract_origin-> definition
827 // >-DW_AT_specification-> declaration
828 //
829 // By using attribute integration we fold all information from definition to
830 // instantiation, flattening hierarchy:
831 // * instantiation + definition
832 // >-DW_AT_specification-> declaration
833 // NB: DW_AT_abstract_origin attribute is also visible, but it should be
834 // ignored, since we already used it during integration.
835 //
836 // We also need to support this case, when we don't have separate
837 // declaration:
838 // * instantiation +
839 // >-DW_AT_abstract_origin -> definition
840 //
841 // So the final algorithm is to get final DW_AT_specification through the
842 // whole chain, or use DW_AT_abstract_origin if there is no
843 // DW_AT_specification.
844 if (auto specification = entry.MaybeGetReference(DW_AT_specification)) {
845 result.specification = specification->GetOffset();
846 } else if (auto abstract_origin =
847 entry.MaybeGetReference(DW_AT_abstract_origin)) {
848 result.specification = abstract_origin->GetOffset();
849 }
850 result.unscoped_name = entry.MaybeGetDirectString(DW_AT_name);
851 if (!result.unscoped_name && !result.specification) {
852 // If there is no name and specification, then this entry is anonymous.
853 // Anonymous entries are modelled as the empty string and not nullopt.
854 // This allows us to fill and register scoped_name (also empty string) to
855 // be used in references.
856 result.unscoped_name = std::string();
857 }
858 if (result.unscoped_name) {
859 result.scoped_name = scope_.name + *result.unscoped_name;
860 scoped_names_.emplace_back(
861 entry.GetOffset(), *result.scoped_name);
862 }
863 return result;
864 }
865
GetScopedNameForSymbol(size_t symbol_idx,const NameWithContext & name)866 std::string GetScopedNameForSymbol(size_t symbol_idx,
867 const NameWithContext& name) {
868 // This method is designed to resolve this topology:
869 // A: specification=B
870 // B: name="foo"
871 // Any other topologies are rejected:
872 // * Name and specification in one DIE: checked right below.
873 // * Chain of specifications will result in symbol referencing another
874 // specification, which will not be in scoped_names_, because "name and
875 // specification in one DIE" is rejected.
876 if (name.scoped_name) {
877 if (name.specification) {
878 Die() << "Entry has name " << *name.scoped_name
879 << " and specification " << Hex(*name.specification);
880 }
881 return *name.scoped_name;
882 }
883 if (name.specification) {
884 unresolved_symbol_specifications_.emplace_back(*name.specification,
885 symbol_idx);
886 // Name will be filled in ResolveSymbolSpecifications
887 return {};
888 }
889 Die() << "Entry should have either name or specification";
890 }
891
ProcessVariable(Entry & entry)892 void ProcessVariable(Entry& entry) {
893 auto name_with_context = GetNameWithContext(entry);
894
895 auto referred_type = GetReferredType(entry);
896 const Id referred_type_id = GetIdForEntry(referred_type);
897
898 if (auto address = entry.MaybeGetAddress(DW_AT_location)) {
899 // Only external variables with address are useful for ABI monitoring
900 const auto new_symbol_idx = result_.symbols.size();
901 result_.symbols.push_back(Types::Symbol{
902 .scoped_name = GetScopedNameForSymbol(
903 new_symbol_idx, name_with_context),
904 .linkage_name = GetLinkageName(version_, entry),
905 .address = *address,
906 .type_id = referred_type_id});
907 }
908 }
909
ProcessFunction(Entry & entry)910 void ProcessFunction(Entry& entry) {
911 Subprogram subprogram = GetSubprogram(entry);
912 const Id id = AddProcessedNode<Function>(entry, std::move(subprogram.node));
913 if (subprogram.external && subprogram.address) {
914 // Only external functions with address are useful for ABI monitoring
915 const auto new_symbol_idx = result_.symbols.size();
916 result_.symbols.push_back(Types::Symbol{
917 .scoped_name = GetScopedNameForSymbol(
918 new_symbol_idx, subprogram.name_with_context),
919 .linkage_name = std::move(subprogram.linkage_name),
920 .address = *subprogram.address,
921 .type_id = id});
922 }
923 }
924
925 struct Subprogram {
926 Function node;
927 NameWithContext name_with_context;
928 std::string linkage_name;
929 std::optional<Address> address;
930 bool external;
931 };
932
GetSubprogram(Entry & entry)933 Subprogram GetSubprogram(Entry& entry) {
934 const Id return_type_id = GetReferredTypeId(MaybeGetReferredType(entry));
935
936 std::vector<Id> parameters;
937 for (auto& child : entry.GetChildren()) {
938 auto child_tag = child.GetTag();
939 switch (child_tag) {
940 case DW_TAG_formal_parameter:
941 parameters.push_back(GetReferredTypeId(GetReferredType(child)));
942 break;
943 case DW_TAG_unspecified_parameters:
944 // Note: C++ allows a single ... argument specification but C does
945 // not. However, "extern int foo();" (note lack of "void" in
946 // parameters) in C will produce the same DWARF as "extern int
947 // foo(...);" in C++.
948 CheckNoChildren(child);
949 parameters.push_back(variadic_id_);
950 break;
951 case DW_TAG_enumeration_type:
952 case DW_TAG_label:
953 case DW_TAG_lexical_block:
954 case DW_TAG_structure_type:
955 case DW_TAG_class_type:
956 case DW_TAG_union_type:
957 case DW_TAG_typedef:
958 case DW_TAG_const_type:
959 case DW_TAG_volatile_type:
960 case DW_TAG_restrict_type:
961 case DW_TAG_atomic_type:
962 case DW_TAG_array_type:
963 case DW_TAG_pointer_type:
964 case DW_TAG_reference_type:
965 case DW_TAG_rvalue_reference_type:
966 case DW_TAG_ptr_to_member_type:
967 case DW_TAG_unspecified_type:
968 case DW_TAG_inlined_subroutine:
969 case DW_TAG_subprogram:
970 case DW_TAG_variable:
971 case DW_TAG_call_site:
972 case DW_TAG_GNU_call_site:
973 // TODO: Do not leak local types outside this scope.
974 // TODO: It would be better to not process any
975 // information that is function local but there is a dangling
976 // reference Clang bug.
977 Process(child);
978 break;
979 case DW_TAG_imported_declaration:
980 case DW_TAG_imported_module:
981 // For now information there is useless for ABI monitoring, but we
982 // need to check that there is no missing information in descendants.
983 CheckNoChildren(child);
984 break;
985 case DW_TAG_template_type_parameter:
986 case DW_TAG_template_value_parameter:
987 case DW_TAG_GNU_template_template_param:
988 case DW_TAG_GNU_template_parameter_pack:
989 // We just skip these as neither GCC nor Clang seem to use them
990 // properly (resulting in no references to such DIEs).
991 break;
992 case DW_TAG_GNU_formal_parameter_pack:
993 // https://wiki.dwarfstd.org/C++0x_Variadic_templates.md
994 //
995 // As per this (rejected) proposal, GCC includes parameters as
996 // children of this DIE.
997 for (auto& child2 : child.GetChildren()) {
998 if (child2.GetTag() == DW_TAG_formal_parameter) {
999 parameters.push_back(GetReferredTypeId(GetReferredType(child2)));
1000 }
1001 }
1002 break;
1003 default:
1004 Die() << "Unexpected tag for child of function: " << Hex(child_tag)
1005 << ", " << EntryToString(child);
1006 }
1007 }
1008
1009 return Subprogram{.node = Function(return_type_id, parameters),
1010 .name_with_context = GetNameWithContext(entry),
1011 .linkage_name = GetLinkageName(version_, entry),
1012 .address = entry.MaybeGetAddress(DW_AT_low_pc),
1013 .external = entry.GetFlag(DW_AT_external)};
1014 }
1015
1016 // Allocate or get already allocated STG Id for Entry.
GetIdForEntry(Entry & entry)1017 Id GetIdForEntry(Entry& entry) {
1018 return maker_.Get(Hex(entry.GetOffset()));
1019 }
1020
1021 // Same as GetIdForEntry, but returns "void_id_" for "unspecified" references,
1022 // because it is normal for DWARF (5.2 Unspecified Type Entries).
GetReferredTypeId(std::optional<Entry> referred_type)1023 Id GetReferredTypeId(std::optional<Entry> referred_type) {
1024 return referred_type ? GetIdForEntry(*referred_type) : void_id_;
1025 }
1026
1027 // Wrapper for GetIdForEntry to allow lvalues.
GetReferredTypeId(Entry referred_type)1028 Id GetReferredTypeId(Entry referred_type) {
1029 return GetIdForEntry(referred_type);
1030 }
1031
1032 // Populate Id from method above with processed Node.
1033 template <typename Node, typename... Args>
AddProcessedNode(Entry & entry,Args &&...args)1034 Id AddProcessedNode(Entry& entry, Args&&... args) {
1035 return maker_.Set<Node>(Hex(entry.GetOffset()),
1036 std::forward<Args>(args)...);
1037 }
1038
AddNamedTypeNode(Id id)1039 void AddNamedTypeNode(Id id) {
1040 if (scope_.named) {
1041 result_.named_type_ids.push_back(id);
1042 }
1043 }
1044
1045 Maker<Hex<Dwarf_Off>> maker_;
1046 Id void_id_;
1047 Id variadic_id_;
1048 bool is_little_endian_binary_;
1049 const std::unique_ptr<Filter>& file_filter_;
1050 Types& result_;
1051 std::vector<std::pair<Dwarf_Off, std::string>> scoped_names_;
1052 std::vector<std::pair<Dwarf_Off, size_t>> unresolved_symbol_specifications_;
1053
1054 // Current scope.
1055 Scope scope_;
1056 int version_;
1057 dwarf::Files files_;
1058 uint64_t language_;
1059 };
1060
Process(Dwarf * dwarf,bool is_little_endian_binary,const std::unique_ptr<Filter> & file_filter,Graph & graph)1061 Types Process(Dwarf* dwarf, bool is_little_endian_binary,
1062 const std::unique_ptr<Filter>& file_filter, Graph& graph) {
1063 Types result;
1064
1065 if (dwarf == nullptr) {
1066 return result;
1067 }
1068
1069 const Id void_id = graph.Add<Special>(Special::Kind::VOID);
1070 const Id variadic_id = graph.Add<Special>(Special::Kind::VARIADIC);
1071 // TODO: Scope Processor to compilation units?
1072 Processor processor(graph, void_id, variadic_id, is_little_endian_binary,
1073 file_filter, result);
1074 for (auto& compilation_unit : GetCompilationUnits(*dwarf)) {
1075 // Could fetch top-level attributes like compiler here.
1076 processor.ProcessCompilationUnit(compilation_unit);
1077 }
1078 processor.ResolveSymbolSpecifications();
1079
1080 return result;
1081 }
1082
1083 } // namespace dwarf
1084 } // namespace stg
1085