1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2022 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License. You may obtain a copy of the License at
9 //
10 // https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Aleksei Vetrov
19
20 #include "dwarf_wrappers.h"
21
22 #include <dwarf.h>
23 #include <elfutils/libdw.h>
24
25 #include <cstddef>
26 #include <cstdint>
27 #include <optional>
28 #include <ostream>
29 #include <string>
30 #include <utility>
31 #include <vector>
32
33 #include "error.h"
34 #include "hex.h"
35
36 namespace stg {
37 namespace dwarf {
38
operator <<(std::ostream & os,const Address & address)39 std::ostream& operator<<(std::ostream& os, const Address& address) {
40 switch (address.kind) {
41 case Address::Kind::ADDRESS:
42 return os << Hex(address.value);
43 case Address::Kind::TLS:
44 return os << "TLS:" << Hex(address.value);
45 }
46 }
47
48 namespace {
49
50 constexpr int kReturnOk = 0;
51 constexpr int kReturnNoEntry = 1;
52
GetAttribute(Dwarf_Die * die,uint32_t attribute)53 std::optional<Dwarf_Attribute> GetAttribute(Dwarf_Die* die,
54 uint32_t attribute) {
55 // Create an optional with default-initialized value already inside
56 std::optional<Dwarf_Attribute> result(std::in_place);
57 // "integrate" automatically resolves DW_AT_abstract_origin and
58 // DW_AT_specification references, fetching the attribute from the linked DIE.
59 //
60 // libdw has infinite loop protection, as it stops after 16 dereferences.
61 // TODO: don't use dwarf_attr_integrate by default
62 if (!dwarf_attr_integrate(die, attribute, &result.value())) {
63 result.reset();
64 }
65 return result;
66 }
67
68 // Get the attribute directly from DIE without following DW_AT_specification and
69 // DW_AT_abstract_origin references.
GetDirectAttribute(Dwarf_Die * die,uint32_t attribute)70 std::optional<Dwarf_Attribute> GetDirectAttribute(Dwarf_Die* die,
71 uint32_t attribute) {
72 // Create an optional with default-initialized value already inside
73 std::optional<Dwarf_Attribute> result(std::in_place);
74 if (!dwarf_attr(die, attribute, &result.value())) {
75 result.reset();
76 }
77 return result;
78 }
79
MaybeGetUnsignedOperand(const Dwarf_Op & operand)80 std::optional<uint64_t> MaybeGetUnsignedOperand(const Dwarf_Op& operand) {
81 switch (operand.atom) {
82 case DW_OP_addr:
83 case DW_OP_const1u:
84 case DW_OP_const2u:
85 case DW_OP_const4u:
86 case DW_OP_const8u:
87 case DW_OP_constu:
88 return operand.number;
89 case DW_OP_const1s:
90 case DW_OP_const2s:
91 case DW_OP_const4s:
92 case DW_OP_const8s:
93 case DW_OP_consts:
94 if (static_cast<int64_t>(operand.number) < 0) {
95 // Atom is not an unsigned constant
96 return std::nullopt;
97 }
98 return operand.number;
99 case DW_OP_lit0...DW_OP_lit31:
100 return operand.atom - DW_OP_lit0;
101 default:
102 return std::nullopt;
103 }
104 }
105
106 struct Expression {
operator []stg::dwarf::__anon4d8866ff0111::Expression107 const Dwarf_Op& operator[](size_t i) const {
108 return atoms[i];
109 }
110
111 Dwarf_Op* atoms = nullptr;
112 size_t length = 0;
113 };
114
MaybeGetExpression(Dwarf_Attribute & attribute)115 std::optional<Expression> MaybeGetExpression(Dwarf_Attribute& attribute) {
116 Expression result;
117
118 Check(dwarf_getlocation(&attribute, &result.atoms, &result.length) ==
119 kReturnOk) << "dwarf_getlocation returned error";
120 // If no location attribute is present or has an empty location description,
121 // the variable is present in the source but not in the object code.
122 // So zero length expression is equivalent of no location attribute.
123 if (result.length == 0) {
124 return std::nullopt;
125 }
126 Check(result.atoms != nullptr)
127 << "dwarf_getlocation returned non-empty expression with NULL atoms";
128 return result;
129 }
130
131 } // namespace
132
GetCompilationUnits(Dwarf & dwarf)133 std::vector<CompilationUnit> GetCompilationUnits(Dwarf& dwarf) {
134 std::vector<CompilationUnit> result;
135 Dwarf_Off offset = 0;
136 while (true) {
137 Dwarf_Off next_offset;
138 size_t header_size = 0;
139 Dwarf_Half version = 0;
140 const int return_code =
141 dwarf_next_unit(&dwarf, offset, &next_offset, &header_size, &version,
142 nullptr, nullptr, nullptr, nullptr, nullptr);
143 Check(return_code == kReturnOk || return_code == kReturnNoEntry)
144 << "dwarf_next_unit returned error";
145 if (return_code == kReturnNoEntry) {
146 break;
147 }
148 result.push_back({version, {}});
149 Check(dwarf_offdie(&dwarf, offset + header_size,
150 &result.back().entry.die) != nullptr)
151 << "dwarf_offdie returned error";
152
153 offset = next_offset;
154 }
155 return result;
156 }
157
GetChildren()158 std::vector<Entry> Entry::GetChildren() {
159 Entry child;
160 int return_code = dwarf_child(&die, &child.die);
161 Check(return_code == kReturnOk || return_code == kReturnNoEntry)
162 << "dwarf_child returned error";
163 std::vector<Entry> result;
164 while (return_code == kReturnOk) {
165 result.push_back(child);
166 return_code = dwarf_siblingof(&child.die, &child.die);
167 Check(return_code == kReturnOk || return_code == kReturnNoEntry)
168 << "dwarf_siblingof returned error";
169 }
170 return result;
171 }
172
GetTag()173 int Entry::GetTag() {
174 return dwarf_tag(&die);
175 }
176
GetOffset()177 Dwarf_Off Entry::GetOffset() {
178 return dwarf_dieoffset(&die);
179 }
180
MaybeGetString(uint32_t attribute)181 std::optional<std::string> Entry::MaybeGetString(uint32_t attribute) {
182 std::optional<std::string> result;
183 auto dwarf_attribute = GetAttribute(&die, attribute);
184 if (!dwarf_attribute) {
185 return result;
186 }
187
188 const char* value = dwarf_formstring(&dwarf_attribute.value());
189 Check(value != nullptr) << "dwarf_formstring returned error";
190 result.emplace(value);
191 return result;
192 }
193
MaybeGetDirectString(uint32_t attribute)194 std::optional<std::string> Entry::MaybeGetDirectString(uint32_t attribute) {
195 std::optional<std::string> result;
196 auto dwarf_attribute = GetDirectAttribute(&die, attribute);
197 if (!dwarf_attribute) {
198 return result;
199 }
200
201 const char* value = dwarf_formstring(&dwarf_attribute.value());
202 Check(value != nullptr) << "dwarf_formstring returned error";
203 result.emplace(value);
204 return result;
205 }
206
MaybeGetUnsignedConstant(uint32_t attribute)207 std::optional<uint64_t> Entry::MaybeGetUnsignedConstant(uint32_t attribute) {
208 auto dwarf_attribute = GetAttribute(&die, attribute);
209 if (!dwarf_attribute) {
210 return {};
211 }
212
213 uint64_t value;
214 if (dwarf_formudata(&dwarf_attribute.value(), &value) != kReturnOk) {
215 Die() << "dwarf_formudata returned error";
216 }
217 return value;
218 }
219
MustGetUnsignedConstant(uint32_t attribute)220 uint64_t Entry::MustGetUnsignedConstant(uint32_t attribute) {
221 auto maybe_constant = MaybeGetUnsignedConstant(attribute);
222 if (!maybe_constant) {
223 Die() << "DWARF entry <" << Hex(GetOffset()) << "> with tag " << GetTag()
224 << " is missing attribute " << Hex(attribute);
225 }
226 return maybe_constant.value();
227 }
228
GetFlag(uint32_t attribute)229 bool Entry::GetFlag(uint32_t attribute) {
230 bool result = false;
231 auto dwarf_attribute = (attribute == DW_AT_declaration)
232 ? GetDirectAttribute(&die, attribute)
233 : GetAttribute(&die, attribute);
234 if (!dwarf_attribute) {
235 return result;
236 }
237
238 Check(dwarf_formflag(&dwarf_attribute.value(), &result) == kReturnOk)
239 << "dwarf_formflag returned error";
240 return result;
241 }
242
MaybeGetReference(uint32_t attribute)243 std::optional<Entry> Entry::MaybeGetReference(uint32_t attribute) {
244 std::optional<Entry> result;
245 auto dwarf_attribute = GetAttribute(&die, attribute);
246 if (!dwarf_attribute) {
247 return result;
248 }
249
250 result.emplace();
251 Check(dwarf_formref_die(&dwarf_attribute.value(), &result->die))
252 << "dwarf_formref_die returned error";
253 return result;
254 }
255
256 namespace {
257
GetAddressFromLocation(Dwarf_Attribute & attribute)258 std::optional<Address> GetAddressFromLocation(Dwarf_Attribute& attribute) {
259 const auto expression_opt = MaybeGetExpression(attribute);
260 if (!expression_opt) {
261 return {};
262 }
263 const Expression& expression = *expression_opt;
264
265 Dwarf_Attribute result_attribute;
266 if (dwarf_getlocation_attr(&attribute, expression.atoms, &result_attribute) ==
267 kReturnOk) {
268 uint64_t address;
269 Check(dwarf_formaddr(&result_attribute, &address) == kReturnOk)
270 << "dwarf_formaddr returned error";
271 return Address{Address::Kind::ADDRESS, address};
272 }
273
274 if (expression.length == 1 && expression[0].atom == DW_OP_addr) {
275 // DW_OP_addr is unsupported by dwarf_getlocation_attr, so we need to
276 // manually extract the address from expression.
277 return Address{Address::Kind::ADDRESS, expression[0].number};
278 }
279 if (expression.length == 2 && expression[0].atom == DW_OP_addr &&
280 expression[1].atom == DW_OP_plus_uconst) {
281 // A rather odd case seen from Clang.
282 return Address{Address::Kind::ADDRESS,
283 expression[0].number + expression[1].number};
284 }
285
286 // TLS operation has different encodings in Clang and GCC:
287 // * Clang 14 uses DW_OP_GNU_push_tls_address
288 // * GCC 12 uses DW_OP_form_tls_address
289 if (expression.length == 2 &&
290 (expression[1].atom == DW_OP_GNU_push_tls_address ||
291 expression[1].atom == DW_OP_form_tls_address)) {
292 // TLS symbols address may be incorrect because of unsupported
293 // relocations. Resetting it to zero the same way as it is done in
294 // elf::Reader::MaybeAddTypeInfo.
295 // TODO: match TLS variables by address
296 return Address{Address::Kind::TLS, 0};
297 }
298
299 Die() << "Unsupported data location expression";
300 }
301
302 } // namespace
303
MaybeGetAddress(uint32_t attribute)304 std::optional<Address> Entry::MaybeGetAddress(uint32_t attribute) {
305 auto dwarf_attribute = GetAttribute(&die, attribute);
306 if (!dwarf_attribute) {
307 return {};
308 }
309 if (attribute == DW_AT_location) {
310 return GetAddressFromLocation(*dwarf_attribute);
311 }
312
313 uint64_t address;
314 Check(dwarf_formaddr(&dwarf_attribute.value(), &address) == kReturnOk)
315 << "dwarf_formaddr returned error";
316 return Address{Address::Kind::ADDRESS, address};
317 }
318
MaybeGetMemberByteOffset()319 std::optional<uint64_t> Entry::MaybeGetMemberByteOffset() {
320 auto attribute = GetAttribute(&die, DW_AT_data_member_location);
321 if (!attribute) {
322 return {};
323 }
324
325 uint64_t offset;
326 // Try to interpret attribute as an unsigned integer constant
327 if (dwarf_formudata(&attribute.value(), &offset) == kReturnOk) {
328 return offset;
329 }
330
331 // Parse location expression
332 const auto expression_opt = MaybeGetExpression(attribute.value());
333 if (!expression_opt) {
334 return {};
335 }
336 const Expression& expression = *expression_opt;
337
338 // Parse virtual base classes offset, which looks like this:
339 // [0] = DW_OP_dup
340 // [1] = DW_OP_deref
341 // [2] = constant operand
342 // [3] = DW_OP_minus
343 // [4] = DW_OP_deref
344 // [5] = DW_OP_plus
345 // This form is not in the standard, but hardcoded in compilers:
346 // * https://github.com/llvm/llvm-project/blob/release/17.x/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp#L1611
347 // * https://github.com/gcc-mirror/gcc/blob/releases/gcc-13/gcc/dwarf2out.cc#L20029
348 if (expression.length == 6 &&
349 expression[0].atom == DW_OP_dup &&
350 expression[1].atom == DW_OP_deref &&
351 expression[3].atom == DW_OP_minus &&
352 expression[4].atom == DW_OP_deref &&
353 expression[5].atom == DW_OP_plus) {
354 const auto byte_offset = MaybeGetUnsignedOperand(expression[2]);
355 if (byte_offset) {
356 return byte_offset;
357 }
358 }
359
360 Die() << "Unsupported member offset expression, " << Hex(GetOffset());
361 }
362
MaybeGetVtableOffset()363 std::optional<uint64_t> Entry::MaybeGetVtableOffset() {
364 auto attribute = GetAttribute(&die, DW_AT_vtable_elem_location);
365 if (!attribute) {
366 return {};
367 }
368
369 // Parse location expression
370 const auto expression_opt = MaybeGetExpression(attribute.value());
371 if (!expression_opt) {
372 return {};
373 }
374 const Expression& expression = *expression_opt;
375
376 // We expect compilers to produce expression with one constant operand
377 if (expression.length == 1) {
378 const auto offset = MaybeGetUnsignedOperand(expression[0]);
379 if (offset) {
380 return offset;
381 }
382 }
383
384 Die() << "Unsupported vtable offset expression, " << Hex(GetOffset());
385 }
386
MaybeGetCount()387 std::optional<uint64_t> Entry::MaybeGetCount() {
388 auto lower_bound_attribute = MaybeGetUnsignedConstant(DW_AT_lower_bound);
389 if (lower_bound_attribute && *lower_bound_attribute != 0) {
390 Die() << "Non-zero DW_AT_lower_bound is not supported";
391 }
392 auto upper_bound_attribute = GetAttribute(&die, DW_AT_upper_bound);
393 auto count_attribute = GetAttribute(&die, DW_AT_count);
394 if (!upper_bound_attribute && !count_attribute) {
395 return {};
396 }
397 if (upper_bound_attribute && count_attribute) {
398 Die() << "Both DW_AT_upper_bound and DW_AT_count given";
399 }
400 Dwarf_Attribute dwarf_attribute;
401 uint64_t addend;
402 if (upper_bound_attribute) {
403 dwarf_attribute = *upper_bound_attribute;
404 addend = 1;
405 } else {
406 dwarf_attribute = *count_attribute;
407 addend = 0;
408 }
409
410 uint64_t value;
411 if (dwarf_formudata(&dwarf_attribute, &value) == kReturnOk) {
412 return value + addend;
413 }
414
415 // Don't fail if attribute is not a constant and treat this as no count
416 // provided. This can happen if array has variable length.
417 // TODO: implement clean solution for separating "not a
418 // constant" errors from other errors.
419 return {};
420 }
421
Files(Entry & compilation_unit)422 Files::Files(Entry& compilation_unit) {
423 if (dwarf_getsrcfiles(&compilation_unit.die, &files_, &files_count_) !=
424 kReturnOk) {
425 Die() << "No source file information in DWARF";
426 }
427 }
428
MaybeGetFile(Entry & entry,uint32_t attribute) const429 std::optional<std::string> Files::MaybeGetFile(Entry& entry,
430 uint32_t attribute) const {
431 auto file_index = entry.MaybeGetUnsignedConstant(attribute);
432 if (!file_index) {
433 return std::nullopt;
434 }
435 Check(files_ != nullptr) << "dwarf::Files was not initialised";
436 if (*file_index >= files_count_) {
437 Die() << "File index is greater than or equal files count (" << *file_index
438 << " >= " << files_count_ << ")";
439 }
440 const char* result = dwarf_filesrc(files_, *file_index, nullptr, nullptr);
441 Check(result != nullptr) << "dwarf_filesrc returned error";
442 return result;
443 }
444
445 } // namespace dwarf
446 } // namespace stg
447