1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2020-2024 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License. You may obtain a copy of the License at
9 //
10 // https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Maria Teguiani
19 // Author: Giuliano Procida
20 // Author: Ignes Simeonova
21 // Author: Aleksei Vetrov
22
23 #include "btf_reader.h"
24
25 #include <algorithm>
26 #include <cstddef>
27 #include <cstdint>
28 #include <cstring>
29 #include <map>
30 #include <optional>
31 #include <sstream>
32 #include <string>
33 #include <string_view>
34 #include <utility>
35 #include <vector>
36
37 #include <linux/btf.h>
38 #include "elf_dwarf_handle.h"
39 #include "elf_loader.h"
40 #include "error.h"
41 #include "graph.h"
42 #include "reader_options.h"
43
44 namespace stg {
45
46 namespace btf {
47
48 namespace {
49
50 // BTF Specification: https://www.kernel.org/doc/html/latest/bpf/btf.html
51 class Structs {
52 public:
53 explicit Structs(Graph& graph);
54 Id Process(std::string_view data);
55
56 private:
57 struct MemoryRange {
58 const char* start;
59 const char* limit;
60 bool Empty() const;
61 template <typename T> const T* Pull(size_t count = 1);
62 };
63
64 MemoryRange string_section_;
65
66 Maker<uint32_t> maker_;
67 std::optional<Id> void_;
68 std::optional<Id> variadic_;
69 std::map<std::string, Id> btf_symbols_;
70
71 Id ProcessAligned(std::string_view data);
72
73 Id GetVoid();
74 Id GetVariadic();
75 Id GetIdRaw(uint32_t btf_index);
76 Id GetId(uint32_t btf_index);
77 Id GetParameterId(uint32_t btf_index);
78 template <typename Node, typename... Args>
79 void Set(uint32_t id, Args&&... args);
80
81 Id BuildTypes(MemoryRange memory);
82 void BuildOneType(const btf_type* t, uint32_t btf_index,
83 MemoryRange& memory);
84 Id BuildSymbols();
85 std::vector<Id> BuildMembers(
86 bool kflag, const btf_member* members, size_t vlen);
87 Enumeration::Enumerators BuildEnums(
88 bool is_signed, const struct btf_enum* enums, size_t vlen);
89 Enumeration::Enumerators BuildEnums64(
90 bool is_signed, const struct btf_enum64* enums, size_t vlen);
91 std::vector<Id> BuildParams(const struct btf_param* params, size_t vlen);
92 Id BuildEnumUnderlyingType(size_t size, bool is_signed);
93 std::string GetName(uint32_t name_off);
94 };
95
Empty() const96 bool Structs::MemoryRange::Empty() const {
97 return start == limit;
98 }
99
100 template <typename T>
Pull(size_t count)101 const T* Structs::MemoryRange::Pull(size_t count) {
102 const char* saved = start;
103 start += sizeof(T) * count;
104 Check(start <= limit) << "type data extends past end of type section";
105 return reinterpret_cast<const T*>(saved);
106 }
107
Structs(Graph & graph)108 Structs::Structs(Graph& graph)
109 : maker_(graph) {}
110
111 // Get the index of the void type, creating one if needed.
GetVoid()112 Id Structs::GetVoid() {
113 if (!void_) {
114 void_ = {maker_.Add<Special>(Special::Kind::VOID)};
115 }
116 return *void_;
117 }
118
119 // Get the index of the variadic parameter type, creating one if needed.
GetVariadic()120 Id Structs::GetVariadic() {
121 if (!variadic_) {
122 variadic_ = {maker_.Add<Special>(Special::Kind::VARIADIC)};
123 }
124 return *variadic_;
125 }
126
127 // Map BTF type index to node ID.
GetIdRaw(uint32_t btf_index)128 Id Structs::GetIdRaw(uint32_t btf_index) {
129 return maker_.Get(btf_index);
130 }
131
132 // Translate BTF type index to node ID, for non-parameters.
GetId(uint32_t btf_index)133 Id Structs::GetId(uint32_t btf_index) {
134 return btf_index ? GetIdRaw(btf_index) : GetVoid();
135 }
136
137 // Translate BTF type index to node ID, for parameters.
GetParameterId(uint32_t btf_index)138 Id Structs::GetParameterId(uint32_t btf_index) {
139 return btf_index ? GetIdRaw(btf_index) : GetVariadic();
140 }
141
142 // For a BTF type index, populate the node with the corresponding ID.
143 template <typename Node, typename... Args>
Set(uint32_t id,Args &&...args)144 void Structs::Set(uint32_t id, Args&&... args) {
145 maker_.Set<Node>(id, std::forward<Args>(args)...);
146 }
147
Process(std::string_view btf_data)148 Id Structs::Process(std::string_view btf_data) {
149 // TODO: Remove this hack once the upstream binaries have proper
150 // alignment.
151 //
152 // Copy the data to aligned heap-allocated memory, if needed.
153 return reinterpret_cast<uintptr_t>(btf_data.data()) % alignof(btf_header) > 0
154 ? ProcessAligned(std::string(btf_data))
155 : ProcessAligned(btf_data);
156 }
157
ProcessAligned(std::string_view btf_data)158 Id Structs::ProcessAligned(std::string_view btf_data) {
159 Check(sizeof(btf_header) <= btf_data.size())
160 << "BTF section too small for header";
161 const btf_header* header =
162 reinterpret_cast<const btf_header*>(btf_data.data());
163 Check(header->magic == 0xEB9F) << "Magic field must be 0xEB9F for BTF";
164
165 const char* header_limit = btf_data.begin() + header->hdr_len;
166 const char* type_start = header_limit + header->type_off;
167 const char* type_limit = type_start + header->type_len;
168 const char* string_start = header_limit + header->str_off;
169 const char* string_limit = string_start + header->str_len;
170
171 Check(btf_data.begin() + sizeof(btf_header) <= header_limit)
172 << "header exceeds length";
173 Check(header_limit <= type_start) << "type section overlaps header";
174 Check(type_start <= type_limit) << "type section ill-formed";
175 Check(reinterpret_cast<uintptr_t>(type_start) % alignof(btf_type) == 0)
176 << "misaligned type section";
177 Check(type_limit <= string_start)
178 << "string section does not follow type section";
179 Check(string_start <= string_limit) << "string section ill-formed";
180 Check(string_limit <= btf_data.end())
181 << "string section extends beyond end of BTF data";
182
183 const MemoryRange type_section{type_start, type_limit};
184 string_section_ = MemoryRange{string_start, string_limit};
185 return BuildTypes(type_section);
186 }
187
188 // vlen: vector length, the number of struct/union members
BuildMembers(bool kflag,const btf_member * members,size_t vlen)189 std::vector<Id> Structs::BuildMembers(
190 bool kflag, const btf_member* members, size_t vlen) {
191 std::vector<Id> result;
192 for (size_t i = 0; i < vlen; ++i) {
193 const auto& raw_member = members[i];
194 const auto name = GetName(raw_member.name_off);
195 const auto raw_offset = raw_member.offset;
196 const auto offset = kflag ? BTF_MEMBER_BIT_OFFSET(raw_offset) : raw_offset;
197 const auto bitfield_size = kflag ? BTF_MEMBER_BITFIELD_SIZE(raw_offset) : 0;
198 result.push_back(
199 maker_.Add<Member>(name, GetId(raw_member.type),
200 static_cast<uint64_t>(offset), bitfield_size));
201 }
202 return result;
203 }
204
205 // vlen: vector length, the number of enum values
BuildEnums(bool is_signed,const struct btf_enum * enums,size_t vlen)206 std::vector<std::pair<std::string, int64_t>> Structs::BuildEnums(
207 bool is_signed, const struct btf_enum* enums, size_t vlen) {
208 std::vector<std::pair<std::string, int64_t>> result;
209 for (size_t i = 0; i < vlen; ++i) {
210 const auto name = GetName(enums[i].name_off);
211 const uint32_t unsigned_value = enums[i].val;
212 if (is_signed) {
213 const int32_t signed_value = unsigned_value;
214 result.emplace_back(name, static_cast<int64_t>(signed_value));
215 } else {
216 result.emplace_back(name, static_cast<int64_t>(unsigned_value));
217 }
218 }
219 return result;
220 }
221
BuildEnums64(bool is_signed,const struct btf_enum64 * enums,size_t vlen)222 std::vector<std::pair<std::string, int64_t>> Structs::BuildEnums64(
223 bool is_signed, const struct btf_enum64* enums, size_t vlen) {
224 std::vector<std::pair<std::string, int64_t>> result;
225 for (size_t i = 0; i < vlen; ++i) {
226 const auto name = GetName(enums[i].name_off);
227 const uint32_t low = enums[i].val_lo32;
228 const uint32_t high = enums[i].val_hi32;
229 const uint64_t unsigned_value = (static_cast<uint64_t>(high) << 32) | low;
230 if (is_signed) {
231 const int64_t signed_value = unsigned_value;
232 result.emplace_back(name, signed_value);
233 } else {
234 // TODO: very large unsigned values are stored as negative numbers
235 result.emplace_back(name, static_cast<int64_t>(unsigned_value));
236 }
237 }
238 return result;
239 }
240
241 // vlen: vector length, the number of parameters
BuildParams(const struct btf_param * params,size_t vlen)242 std::vector<Id> Structs::BuildParams(const struct btf_param* params,
243 size_t vlen) {
244 std::vector<Id> result;
245 result.reserve(vlen);
246 for (size_t i = 0; i < vlen; ++i) {
247 const auto name = GetName(params[i].name_off);
248 const auto type = params[i].type;
249 result.push_back(GetParameterId(type));
250 }
251 return result;
252 }
253
BuildEnumUnderlyingType(size_t size,bool is_signed)254 Id Structs::BuildEnumUnderlyingType(size_t size, bool is_signed) {
255 std::ostringstream os;
256 os << (is_signed ? "enum-underlying-signed-" : "enum-underlying-unsigned-")
257 << (8 * size);
258 const auto encoding = is_signed ? Primitive::Encoding::SIGNED_INTEGER
259 : Primitive::Encoding::UNSIGNED_INTEGER;
260 return maker_.Add<Primitive>(os.str(), encoding, size);
261 }
262
BuildTypes(MemoryRange memory)263 Id Structs::BuildTypes(MemoryRange memory) {
264 // Alas, BTF overloads type id 0 to mean both void (for everything but
265 // function parameters) and variadic (for function parameters). We determine
266 // which is intended and create void and variadic types on demand.
267
268 // The type section is parsed sequentially and each type's index is its id.
269 uint32_t btf_index = 1;
270 while (!memory.Empty()) {
271 const auto* t = memory.Pull<struct btf_type>();
272 BuildOneType(t, btf_index, memory);
273 ++btf_index;
274 }
275
276 return BuildSymbols();
277 }
278
BuildOneType(const btf_type * t,uint32_t btf_index,MemoryRange & memory)279 void Structs::BuildOneType(const btf_type* t, uint32_t btf_index,
280 MemoryRange& memory) {
281 const auto kind = BTF_INFO_KIND(t->info);
282 const auto vlen = BTF_INFO_VLEN(t->info);
283 Check(kind < NR_BTF_KINDS) << "Unknown BTF kind: " << static_cast<int>(kind);
284
285 switch (kind) {
286 case BTF_KIND_INT: {
287 const auto info = *memory.Pull<uint32_t>();
288 const auto name = GetName(t->name_off);
289 const auto raw_encoding = BTF_INT_ENCODING(info);
290 const auto offset = BTF_INT_OFFSET(info);
291 const auto bits = BTF_INT_BITS(info);
292 const auto is_bool = raw_encoding & BTF_INT_BOOL;
293 const auto is_signed = raw_encoding & BTF_INT_SIGNED;
294 const auto is_char = raw_encoding & BTF_INT_CHAR;
295 Primitive::Encoding encoding =
296 is_bool ? Primitive::Encoding::BOOLEAN
297 : is_char ? is_signed ? Primitive::Encoding::SIGNED_CHARACTER
298 : Primitive::Encoding::UNSIGNED_CHARACTER
299 : is_signed ? Primitive::Encoding::SIGNED_INTEGER
300 : Primitive::Encoding::UNSIGNED_INTEGER;
301 if (offset) {
302 Die() << "BTF INT non-zero offset " << offset;
303 }
304 if (bits != 8 * t->size) {
305 Die() << "BTF INT bits != 8 * size";
306 }
307 Set<Primitive>(btf_index, name, encoding, t->size);
308 break;
309 }
310 case BTF_KIND_FLOAT: {
311 const auto name = GetName(t->name_off);
312 const auto encoding = Primitive::Encoding::REAL_NUMBER;
313 Set<Primitive>(btf_index, name, encoding, t->size);
314 break;
315 }
316 case BTF_KIND_PTR: {
317 Set<PointerReference>(btf_index, PointerReference::Kind::POINTER,
318 GetId(t->type));
319 break;
320 }
321 case BTF_KIND_TYPEDEF: {
322 const auto name = GetName(t->name_off);
323 Set<Typedef>(btf_index, name, GetId(t->type));
324 break;
325 }
326 case BTF_KIND_VOLATILE:
327 case BTF_KIND_CONST:
328 case BTF_KIND_RESTRICT: {
329 const auto qualifier = kind == BTF_KIND_CONST
330 ? Qualifier::CONST
331 : kind == BTF_KIND_VOLATILE
332 ? Qualifier::VOLATILE
333 : Qualifier::RESTRICT;
334 Set<Qualified>(btf_index, qualifier, GetId(t->type));
335 break;
336 }
337 case BTF_KIND_ARRAY: {
338 const auto* array = memory.Pull<struct btf_array>();
339 Set<Array>(btf_index, array->nelems, GetId(array->type));
340 break;
341 }
342 case BTF_KIND_STRUCT:
343 case BTF_KIND_UNION: {
344 const auto struct_union_kind = kind == BTF_KIND_STRUCT
345 ? StructUnion::Kind::STRUCT
346 : StructUnion::Kind::UNION;
347 const auto name = GetName(t->name_off);
348 const bool kflag = BTF_INFO_KFLAG(t->info);
349 const auto* btf_members = memory.Pull<struct btf_member>(vlen);
350 const auto members = BuildMembers(kflag, btf_members, vlen);
351 Set<StructUnion>(btf_index, struct_union_kind, name, t->size,
352 std::vector<Id>(), std::vector<Id>(), members);
353 break;
354 }
355 case BTF_KIND_ENUM: {
356 const auto name = GetName(t->name_off);
357 const bool is_signed = BTF_INFO_KFLAG(t->info);
358 const auto* enums = memory.Pull<struct btf_enum>(vlen);
359 const auto enumerators = BuildEnums(is_signed, enums, vlen);
360 // BTF only considers structs and unions as forward-declared types, and
361 // does not include forward-declared enums. They are treated as
362 // BTF_KIND_ENUMs with vlen set to zero.
363 if (vlen) {
364 // create a synthetic underlying type
365 const Id underlying = BuildEnumUnderlyingType(t->size, is_signed);
366 Set<Enumeration>(btf_index, name, underlying, enumerators);
367 } else {
368 // BTF actually provides size (4), but it's meaningless.
369 Set<Enumeration>(btf_index, name);
370 }
371 break;
372 }
373 case BTF_KIND_ENUM64: {
374 const auto name = GetName(t->name_off);
375 const bool is_signed = BTF_INFO_KFLAG(t->info);
376 const auto* enums = memory.Pull<struct btf_enum64>(vlen);
377 const auto enumerators = BuildEnums64(is_signed, enums, vlen);
378 // create a synthetic underlying type
379 const Id underlying = BuildEnumUnderlyingType(t->size, is_signed);
380 Set<Enumeration>(btf_index, name, underlying, enumerators);
381 break;
382 }
383 case BTF_KIND_FWD: {
384 const auto name = GetName(t->name_off);
385 const auto struct_union_kind = BTF_INFO_KFLAG(t->info)
386 ? StructUnion::Kind::UNION
387 : StructUnion::Kind::STRUCT;
388 Set<StructUnion>(btf_index, struct_union_kind, name);
389 break;
390 }
391 case BTF_KIND_FUNC: {
392 const auto name = GetName(t->name_off);
393 // TODO: map linkage (vlen) to symbol properties
394 Set<ElfSymbol>(btf_index, name, std::nullopt, true,
395 ElfSymbol::SymbolType::FUNCTION,
396 ElfSymbol::Binding::GLOBAL,
397 ElfSymbol::Visibility::DEFAULT,
398 std::nullopt,
399 std::nullopt,
400 GetId(t->type),
401 std::nullopt);
402 const bool inserted =
403 btf_symbols_.insert({name, GetIdRaw(btf_index)}).second;
404 Check(inserted) << "duplicate symbol " << name;
405 break;
406 }
407 case BTF_KIND_FUNC_PROTO: {
408 const auto* params = memory.Pull<struct btf_param>(vlen);
409 const auto parameters = BuildParams(params, vlen);
410 Set<Function>(btf_index, GetId(t->type), parameters);
411 break;
412 }
413 case BTF_KIND_VAR: {
414 // NOTE: global variables are not yet emitted by pahole -J
415 const auto* variable = memory.Pull<struct btf_var>();
416 const auto name = GetName(t->name_off);
417 // TODO: map variable->linkage to symbol properties
418 (void) variable;
419 Set<ElfSymbol>(btf_index, name, std::nullopt, true,
420 ElfSymbol::SymbolType::OBJECT,
421 ElfSymbol::Binding::GLOBAL,
422 ElfSymbol::Visibility::DEFAULT,
423 std::nullopt,
424 std::nullopt,
425 GetId(t->type),
426 std::nullopt);
427 const bool inserted =
428 btf_symbols_.insert({name, GetIdRaw(btf_index)}).second;
429 Check(inserted) << "duplicate symbol " << name;
430 break;
431 }
432 case BTF_KIND_DATASEC: {
433 // Just skip BTF DATASEC entries. They partially duplicate ELF symbol
434 // table information, if they exist at all.
435 memory.Pull<struct btf_var_secinfo>(vlen);
436 break;
437 }
438 default: {
439 Die() << "Unhandled BTF kind: " << static_cast<int>(kind);
440 break;
441 }
442 }
443 }
444
GetName(uint32_t name_off)445 std::string Structs::GetName(uint32_t name_off) {
446 const char* name_begin = string_section_.start + name_off;
447 const char* const limit = string_section_.limit;
448 Check(name_begin < limit) << "name offset exceeds string section length";
449 const char* name_end = std::find(name_begin, limit, '\0');
450 Check(name_end < limit) << "name continues past the string section limit";
451 return {name_begin, static_cast<size_t>(name_end - name_begin)};
452 }
453
BuildSymbols()454 Id Structs::BuildSymbols() {
455 return maker_.Add<Interface>(btf_symbols_);
456 }
457
458 } // namespace
459
ReadSection(Graph & graph,std::string_view data)460 Id ReadSection(Graph& graph, std::string_view data) {
461 return Structs(graph).Process(data);
462 }
463
ReadFile(Graph & graph,const std::string & path,ReadOptions)464 Id ReadFile(Graph& graph, const std::string& path, ReadOptions) {
465 ElfDwarfHandle handle(path);
466 const elf::ElfLoader loader(handle.GetElf());
467 return ReadSection(graph, loader.GetSectionRawData(".BTF"));
468 }
469
470 } // namespace btf
471
472 } // namespace stg
473