xref: /aosp_15_r20/external/stg/btf_reader.cc (revision 9e3b08ae94a55201065475453d799e8b1378bea6)
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2020-2024 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License.  You may obtain a copy of the License at
9 //
10 //     https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Maria Teguiani
19 // Author: Giuliano Procida
20 // Author: Ignes Simeonova
21 // Author: Aleksei Vetrov
22 
23 #include "btf_reader.h"
24 
25 #include <algorithm>
26 #include <cstddef>
27 #include <cstdint>
28 #include <cstring>
29 #include <map>
30 #include <optional>
31 #include <sstream>
32 #include <string>
33 #include <string_view>
34 #include <utility>
35 #include <vector>
36 
37 #include <linux/btf.h>
38 #include "elf_dwarf_handle.h"
39 #include "elf_loader.h"
40 #include "error.h"
41 #include "graph.h"
42 #include "reader_options.h"
43 
44 namespace stg {
45 
46 namespace btf {
47 
48 namespace {
49 
50 // BTF Specification: https://www.kernel.org/doc/html/latest/bpf/btf.html
51 class Structs {
52  public:
53   explicit Structs(Graph& graph);
54   Id Process(std::string_view data);
55 
56  private:
57   struct MemoryRange {
58     const char* start;
59     const char* limit;
60     bool Empty() const;
61     template <typename T> const T* Pull(size_t count = 1);
62   };
63 
64   MemoryRange string_section_;
65 
66   Maker<uint32_t> maker_;
67   std::optional<Id> void_;
68   std::optional<Id> variadic_;
69   std::map<std::string, Id> btf_symbols_;
70 
71   Id ProcessAligned(std::string_view data);
72 
73   Id GetVoid();
74   Id GetVariadic();
75   Id GetIdRaw(uint32_t btf_index);
76   Id GetId(uint32_t btf_index);
77   Id GetParameterId(uint32_t btf_index);
78   template <typename Node, typename... Args>
79   void Set(uint32_t id, Args&&... args);
80 
81   Id BuildTypes(MemoryRange memory);
82   void BuildOneType(const btf_type* t, uint32_t btf_index,
83                     MemoryRange& memory);
84   Id BuildSymbols();
85   std::vector<Id> BuildMembers(
86       bool kflag, const btf_member* members, size_t vlen);
87   Enumeration::Enumerators BuildEnums(
88       bool is_signed, const struct btf_enum* enums, size_t vlen);
89   Enumeration::Enumerators BuildEnums64(
90       bool is_signed, const struct btf_enum64* enums, size_t vlen);
91   std::vector<Id> BuildParams(const struct btf_param* params, size_t vlen);
92   Id BuildEnumUnderlyingType(size_t size, bool is_signed);
93   std::string GetName(uint32_t name_off);
94 };
95 
Empty() const96 bool Structs::MemoryRange::Empty() const {
97   return start == limit;
98 }
99 
100 template <typename T>
Pull(size_t count)101 const T* Structs::MemoryRange::Pull(size_t count) {
102   const char* saved = start;
103   start += sizeof(T) * count;
104   Check(start <= limit) << "type data extends past end of type section";
105   return reinterpret_cast<const T*>(saved);
106 }
107 
Structs(Graph & graph)108 Structs::Structs(Graph& graph)
109     : maker_(graph) {}
110 
111 // Get the index of the void type, creating one if needed.
GetVoid()112 Id Structs::GetVoid() {
113   if (!void_) {
114     void_ = {maker_.Add<Special>(Special::Kind::VOID)};
115   }
116   return *void_;
117 }
118 
119 // Get the index of the variadic parameter type, creating one if needed.
GetVariadic()120 Id Structs::GetVariadic() {
121   if (!variadic_) {
122     variadic_ = {maker_.Add<Special>(Special::Kind::VARIADIC)};
123   }
124   return *variadic_;
125 }
126 
127 // Map BTF type index to node ID.
GetIdRaw(uint32_t btf_index)128 Id Structs::GetIdRaw(uint32_t btf_index) {
129   return maker_.Get(btf_index);
130 }
131 
132 // Translate BTF type index to node ID, for non-parameters.
GetId(uint32_t btf_index)133 Id Structs::GetId(uint32_t btf_index) {
134   return btf_index ? GetIdRaw(btf_index) : GetVoid();
135 }
136 
137 // Translate BTF type index to node ID, for parameters.
GetParameterId(uint32_t btf_index)138 Id Structs::GetParameterId(uint32_t btf_index) {
139   return btf_index ? GetIdRaw(btf_index) : GetVariadic();
140 }
141 
142 // For a BTF type index, populate the node with the corresponding ID.
143 template <typename Node, typename... Args>
Set(uint32_t id,Args &&...args)144 void Structs::Set(uint32_t id, Args&&... args) {
145   maker_.Set<Node>(id, std::forward<Args>(args)...);
146 }
147 
Process(std::string_view btf_data)148 Id Structs::Process(std::string_view btf_data) {
149   // TODO: Remove this hack once the upstream binaries have proper
150   // alignment.
151   //
152   // Copy the data to aligned heap-allocated memory, if needed.
153   return reinterpret_cast<uintptr_t>(btf_data.data()) % alignof(btf_header) > 0
154       ? ProcessAligned(std::string(btf_data))
155       : ProcessAligned(btf_data);
156 }
157 
ProcessAligned(std::string_view btf_data)158 Id Structs::ProcessAligned(std::string_view btf_data) {
159   Check(sizeof(btf_header) <= btf_data.size())
160       << "BTF section too small for header";
161   const btf_header* header =
162       reinterpret_cast<const btf_header*>(btf_data.data());
163   Check(header->magic == 0xEB9F) << "Magic field must be 0xEB9F for BTF";
164 
165   const char* header_limit = btf_data.begin() + header->hdr_len;
166   const char* type_start = header_limit + header->type_off;
167   const char* type_limit = type_start + header->type_len;
168   const char* string_start = header_limit + header->str_off;
169   const char* string_limit = string_start + header->str_len;
170 
171   Check(btf_data.begin() + sizeof(btf_header) <= header_limit)
172       << "header exceeds length";
173   Check(header_limit <= type_start) << "type section overlaps header";
174   Check(type_start <= type_limit) << "type section ill-formed";
175   Check(reinterpret_cast<uintptr_t>(type_start) % alignof(btf_type) == 0)
176       << "misaligned type section";
177   Check(type_limit <= string_start)
178       << "string section does not follow type section";
179   Check(string_start <= string_limit) << "string section ill-formed";
180   Check(string_limit <= btf_data.end())
181       << "string section extends beyond end of BTF data";
182 
183   const MemoryRange type_section{type_start, type_limit};
184   string_section_ = MemoryRange{string_start, string_limit};
185   return BuildTypes(type_section);
186 }
187 
188 // vlen: vector length, the number of struct/union members
BuildMembers(bool kflag,const btf_member * members,size_t vlen)189 std::vector<Id> Structs::BuildMembers(
190     bool kflag, const btf_member* members, size_t vlen) {
191   std::vector<Id> result;
192   for (size_t i = 0; i < vlen; ++i) {
193     const auto& raw_member = members[i];
194     const auto name = GetName(raw_member.name_off);
195     const auto raw_offset = raw_member.offset;
196     const auto offset = kflag ? BTF_MEMBER_BIT_OFFSET(raw_offset) : raw_offset;
197     const auto bitfield_size = kflag ? BTF_MEMBER_BITFIELD_SIZE(raw_offset) : 0;
198     result.push_back(
199         maker_.Add<Member>(name, GetId(raw_member.type),
200                            static_cast<uint64_t>(offset), bitfield_size));
201   }
202   return result;
203 }
204 
205 // vlen: vector length, the number of enum values
BuildEnums(bool is_signed,const struct btf_enum * enums,size_t vlen)206 std::vector<std::pair<std::string, int64_t>> Structs::BuildEnums(
207     bool is_signed, const struct btf_enum* enums, size_t vlen) {
208   std::vector<std::pair<std::string, int64_t>> result;
209   for (size_t i = 0; i < vlen; ++i) {
210     const auto name = GetName(enums[i].name_off);
211     const uint32_t unsigned_value = enums[i].val;
212     if (is_signed) {
213       const int32_t signed_value = unsigned_value;
214       result.emplace_back(name, static_cast<int64_t>(signed_value));
215     } else {
216       result.emplace_back(name, static_cast<int64_t>(unsigned_value));
217     }
218   }
219   return result;
220 }
221 
BuildEnums64(bool is_signed,const struct btf_enum64 * enums,size_t vlen)222 std::vector<std::pair<std::string, int64_t>> Structs::BuildEnums64(
223     bool is_signed, const struct btf_enum64* enums, size_t vlen) {
224   std::vector<std::pair<std::string, int64_t>> result;
225   for (size_t i = 0; i < vlen; ++i) {
226     const auto name = GetName(enums[i].name_off);
227     const uint32_t low = enums[i].val_lo32;
228     const uint32_t high = enums[i].val_hi32;
229     const uint64_t unsigned_value = (static_cast<uint64_t>(high) << 32) | low;
230     if (is_signed) {
231       const int64_t signed_value = unsigned_value;
232       result.emplace_back(name, signed_value);
233     } else {
234       // TODO: very large unsigned values are stored as negative numbers
235       result.emplace_back(name, static_cast<int64_t>(unsigned_value));
236     }
237   }
238   return result;
239 }
240 
241 // vlen: vector length, the number of parameters
BuildParams(const struct btf_param * params,size_t vlen)242 std::vector<Id> Structs::BuildParams(const struct btf_param* params,
243                                      size_t vlen) {
244   std::vector<Id> result;
245   result.reserve(vlen);
246   for (size_t i = 0; i < vlen; ++i) {
247     const auto name = GetName(params[i].name_off);
248     const auto type = params[i].type;
249     result.push_back(GetParameterId(type));
250   }
251   return result;
252 }
253 
BuildEnumUnderlyingType(size_t size,bool is_signed)254 Id Structs::BuildEnumUnderlyingType(size_t size, bool is_signed) {
255   std::ostringstream os;
256   os << (is_signed ? "enum-underlying-signed-" : "enum-underlying-unsigned-")
257      << (8 * size);
258   const auto encoding = is_signed ? Primitive::Encoding::SIGNED_INTEGER
259                                   : Primitive::Encoding::UNSIGNED_INTEGER;
260   return maker_.Add<Primitive>(os.str(), encoding, size);
261 }
262 
BuildTypes(MemoryRange memory)263 Id Structs::BuildTypes(MemoryRange memory) {
264   // Alas, BTF overloads type id 0 to mean both void (for everything but
265   // function parameters) and variadic (for function parameters). We determine
266   // which is intended and create void and variadic types on demand.
267 
268   // The type section is parsed sequentially and each type's index is its id.
269   uint32_t btf_index = 1;
270   while (!memory.Empty()) {
271     const auto* t = memory.Pull<struct btf_type>();
272     BuildOneType(t, btf_index, memory);
273     ++btf_index;
274   }
275 
276   return BuildSymbols();
277 }
278 
BuildOneType(const btf_type * t,uint32_t btf_index,MemoryRange & memory)279 void Structs::BuildOneType(const btf_type* t, uint32_t btf_index,
280                            MemoryRange& memory) {
281   const auto kind = BTF_INFO_KIND(t->info);
282   const auto vlen = BTF_INFO_VLEN(t->info);
283   Check(kind < NR_BTF_KINDS) << "Unknown BTF kind: " << static_cast<int>(kind);
284 
285   switch (kind) {
286     case BTF_KIND_INT: {
287       const auto info = *memory.Pull<uint32_t>();
288       const auto name = GetName(t->name_off);
289       const auto raw_encoding = BTF_INT_ENCODING(info);
290       const auto offset = BTF_INT_OFFSET(info);
291       const auto bits = BTF_INT_BITS(info);
292       const auto is_bool = raw_encoding & BTF_INT_BOOL;
293       const auto is_signed = raw_encoding & BTF_INT_SIGNED;
294       const auto is_char = raw_encoding & BTF_INT_CHAR;
295       Primitive::Encoding encoding =
296           is_bool ? Primitive::Encoding::BOOLEAN
297                 : is_char ? is_signed ? Primitive::Encoding::SIGNED_CHARACTER
298                                       : Primitive::Encoding::UNSIGNED_CHARACTER
299                           : is_signed ? Primitive::Encoding::SIGNED_INTEGER
300                                       : Primitive::Encoding::UNSIGNED_INTEGER;
301       if (offset) {
302         Die() << "BTF INT non-zero offset " << offset;
303       }
304       if (bits != 8 * t->size) {
305         Die() << "BTF INT bits != 8 * size";
306       }
307       Set<Primitive>(btf_index, name, encoding, t->size);
308       break;
309     }
310     case BTF_KIND_FLOAT: {
311       const auto name = GetName(t->name_off);
312       const auto encoding = Primitive::Encoding::REAL_NUMBER;
313       Set<Primitive>(btf_index, name, encoding, t->size);
314       break;
315     }
316     case BTF_KIND_PTR: {
317       Set<PointerReference>(btf_index, PointerReference::Kind::POINTER,
318                             GetId(t->type));
319       break;
320     }
321     case BTF_KIND_TYPEDEF: {
322       const auto name = GetName(t->name_off);
323       Set<Typedef>(btf_index, name, GetId(t->type));
324       break;
325     }
326     case BTF_KIND_VOLATILE:
327     case BTF_KIND_CONST:
328     case BTF_KIND_RESTRICT: {
329       const auto qualifier = kind == BTF_KIND_CONST
330                              ? Qualifier::CONST
331                              : kind == BTF_KIND_VOLATILE
332                              ? Qualifier::VOLATILE
333                              : Qualifier::RESTRICT;
334       Set<Qualified>(btf_index, qualifier, GetId(t->type));
335       break;
336     }
337     case BTF_KIND_ARRAY: {
338       const auto* array = memory.Pull<struct btf_array>();
339       Set<Array>(btf_index, array->nelems, GetId(array->type));
340       break;
341     }
342     case BTF_KIND_STRUCT:
343     case BTF_KIND_UNION: {
344       const auto struct_union_kind = kind == BTF_KIND_STRUCT
345                                      ? StructUnion::Kind::STRUCT
346                                      : StructUnion::Kind::UNION;
347       const auto name = GetName(t->name_off);
348       const bool kflag = BTF_INFO_KFLAG(t->info);
349       const auto* btf_members = memory.Pull<struct btf_member>(vlen);
350       const auto members = BuildMembers(kflag, btf_members, vlen);
351       Set<StructUnion>(btf_index, struct_union_kind, name, t->size,
352                        std::vector<Id>(), std::vector<Id>(), members);
353       break;
354     }
355     case BTF_KIND_ENUM: {
356       const auto name = GetName(t->name_off);
357       const bool is_signed = BTF_INFO_KFLAG(t->info);
358       const auto* enums = memory.Pull<struct btf_enum>(vlen);
359       const auto enumerators = BuildEnums(is_signed, enums, vlen);
360       // BTF only considers structs and unions as forward-declared types, and
361       // does not include forward-declared enums. They are treated as
362       // BTF_KIND_ENUMs with vlen set to zero.
363       if (vlen) {
364         // create a synthetic underlying type
365         const Id underlying = BuildEnumUnderlyingType(t->size, is_signed);
366         Set<Enumeration>(btf_index, name, underlying, enumerators);
367       } else {
368         // BTF actually provides size (4), but it's meaningless.
369         Set<Enumeration>(btf_index, name);
370       }
371       break;
372     }
373     case BTF_KIND_ENUM64: {
374       const auto name = GetName(t->name_off);
375       const bool is_signed = BTF_INFO_KFLAG(t->info);
376       const auto* enums = memory.Pull<struct btf_enum64>(vlen);
377       const auto enumerators = BuildEnums64(is_signed, enums, vlen);
378       // create a synthetic underlying type
379       const Id underlying = BuildEnumUnderlyingType(t->size, is_signed);
380       Set<Enumeration>(btf_index, name, underlying, enumerators);
381       break;
382     }
383     case BTF_KIND_FWD: {
384       const auto name = GetName(t->name_off);
385       const auto struct_union_kind = BTF_INFO_KFLAG(t->info)
386                                      ? StructUnion::Kind::UNION
387                                      : StructUnion::Kind::STRUCT;
388       Set<StructUnion>(btf_index, struct_union_kind, name);
389       break;
390     }
391     case BTF_KIND_FUNC: {
392       const auto name = GetName(t->name_off);
393       // TODO: map linkage (vlen) to symbol properties
394       Set<ElfSymbol>(btf_index, name, std::nullopt, true,
395                      ElfSymbol::SymbolType::FUNCTION,
396                      ElfSymbol::Binding::GLOBAL,
397                      ElfSymbol::Visibility::DEFAULT,
398                      std::nullopt,
399                      std::nullopt,
400                      GetId(t->type),
401                      std::nullopt);
402       const bool inserted =
403           btf_symbols_.insert({name, GetIdRaw(btf_index)}).second;
404       Check(inserted) << "duplicate symbol " << name;
405       break;
406     }
407     case BTF_KIND_FUNC_PROTO: {
408       const auto* params = memory.Pull<struct btf_param>(vlen);
409       const auto parameters = BuildParams(params, vlen);
410       Set<Function>(btf_index, GetId(t->type), parameters);
411       break;
412     }
413     case BTF_KIND_VAR: {
414       // NOTE: global variables are not yet emitted by pahole -J
415       const auto* variable = memory.Pull<struct btf_var>();
416       const auto name = GetName(t->name_off);
417       // TODO: map variable->linkage to symbol properties
418       (void) variable;
419       Set<ElfSymbol>(btf_index, name, std::nullopt, true,
420                      ElfSymbol::SymbolType::OBJECT,
421                      ElfSymbol::Binding::GLOBAL,
422                      ElfSymbol::Visibility::DEFAULT,
423                      std::nullopt,
424                      std::nullopt,
425                      GetId(t->type),
426                      std::nullopt);
427       const bool inserted =
428           btf_symbols_.insert({name, GetIdRaw(btf_index)}).second;
429       Check(inserted) << "duplicate symbol " << name;
430       break;
431     }
432     case BTF_KIND_DATASEC: {
433       // Just skip BTF DATASEC entries. They partially duplicate ELF symbol
434       // table information, if they exist at all.
435       memory.Pull<struct btf_var_secinfo>(vlen);
436       break;
437     }
438     default: {
439       Die() << "Unhandled BTF kind: " << static_cast<int>(kind);
440       break;
441     }
442   }
443 }
444 
GetName(uint32_t name_off)445 std::string Structs::GetName(uint32_t name_off) {
446   const char* name_begin = string_section_.start + name_off;
447   const char* const limit = string_section_.limit;
448   Check(name_begin < limit) << "name offset exceeds string section length";
449   const char* name_end = std::find(name_begin, limit, '\0');
450   Check(name_end < limit) << "name continues past the string section limit";
451   return {name_begin, static_cast<size_t>(name_end - name_begin)};
452 }
453 
BuildSymbols()454 Id Structs::BuildSymbols() {
455   return maker_.Add<Interface>(btf_symbols_);
456 }
457 
458 }  // namespace
459 
ReadSection(Graph & graph,std::string_view data)460 Id ReadSection(Graph& graph, std::string_view data) {
461   return Structs(graph).Process(data);
462 }
463 
ReadFile(Graph & graph,const std::string & path,ReadOptions)464 Id ReadFile(Graph& graph, const std::string& path, ReadOptions) {
465   ElfDwarfHandle handle(path);
466   const elf::ElfLoader loader(handle.GetElf());
467   return ReadSection(graph, loader.GetSectionRawData(".BTF"));
468 }
469 
470 }  // namespace btf
471 
472 }  // namespace stg
473