xref: /aosp_15_r20/external/stg/elf_loader.cc (revision 9e3b08ae94a55201065475453d799e8b1378bea6)
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2020-2022 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License.  You may obtain a copy of the License at
9 //
10 //     https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Maria Teguiani
19 // Author: Giuliano Procida
20 // Author: Aleksei Vetrov
21 
22 #include "elf_loader.h"
23 
24 #include <elf.h>
25 #include <gelf.h>
26 #include <libelf.h>
27 
28 #include <cstddef>
29 #include <cstdint>
30 #include <cstring>
31 #include <functional>
32 #include <limits>
33 #include <ostream>
34 #include <string>
35 #include <string_view>
36 #include <vector>
37 
38 #include "error.h"
39 #include "graph.h"
40 
41 namespace stg {
42 namespace elf {
43 
44 namespace {
45 
ParseSymbolType(unsigned char symbol_type)46 SymbolTableEntry::SymbolType ParseSymbolType(unsigned char symbol_type) {
47   switch (symbol_type) {
48     case STT_NOTYPE:
49       return SymbolTableEntry::SymbolType::NOTYPE;
50     case STT_OBJECT:
51       return SymbolTableEntry::SymbolType::OBJECT;
52     case STT_FUNC:
53       return SymbolTableEntry::SymbolType::FUNCTION;
54     case STT_SECTION:
55       return SymbolTableEntry::SymbolType::SECTION;
56     case STT_FILE:
57       return SymbolTableEntry::SymbolType::FILE;
58     case STT_COMMON:
59       return SymbolTableEntry::SymbolType::COMMON;
60     case STT_TLS:
61       return SymbolTableEntry::SymbolType::TLS;
62     case STT_GNU_IFUNC:
63       return SymbolTableEntry::SymbolType::GNU_IFUNC;
64     default:
65       Die() << "Unknown ELF symbol type: " << symbol_type;
66   }
67 }
68 
ParseSymbolBinding(unsigned char binding)69 SymbolTableEntry::Binding ParseSymbolBinding(unsigned char binding) {
70   switch (binding) {
71     case STB_LOCAL:
72       return SymbolTableEntry::Binding::LOCAL;
73     case STB_GLOBAL:
74       return SymbolTableEntry::Binding::GLOBAL;
75     case STB_WEAK:
76       return SymbolTableEntry::Binding::WEAK;
77     case STB_GNU_UNIQUE:
78       return SymbolTableEntry::Binding::GNU_UNIQUE;
79     default:
80       Die() << "Unknown ELF symbol binding: " << binding;
81   }
82 }
83 
ParseSymbolVisibility(unsigned char visibility)84 SymbolTableEntry::Visibility ParseSymbolVisibility(unsigned char visibility) {
85   switch (visibility) {
86     case STV_DEFAULT:
87       return SymbolTableEntry::Visibility::DEFAULT;
88     case STV_INTERNAL:
89       return SymbolTableEntry::Visibility::INTERNAL;
90     case STV_HIDDEN:
91       return SymbolTableEntry::Visibility::HIDDEN;
92     case STV_PROTECTED:
93       return SymbolTableEntry::Visibility::PROTECTED;
94     default:
95       Die() << "Unknown ELF symbol visibility: " << visibility;
96   }
97 }
98 
ParseSymbolValueType(Elf64_Section section_index)99 SymbolTableEntry::ValueType ParseSymbolValueType(Elf64_Section section_index) {
100   switch (section_index) {
101     case SHN_UNDEF:
102       return SymbolTableEntry::ValueType::UNDEFINED;
103     case SHN_ABS:
104       return SymbolTableEntry::ValueType::ABSOLUTE;
105     case SHN_COMMON:
106       return SymbolTableEntry::ValueType::COMMON;
107     default:
108       return SymbolTableEntry::ValueType::RELATIVE_TO_SECTION;
109   }
110 }
111 
ElfHeaderTypeToString(unsigned char elf_header_type)112 std::string ElfHeaderTypeToString(unsigned char elf_header_type) {
113   switch (elf_header_type) {
114     case ET_NONE:
115       return "none";
116     case ET_REL:
117       return "relocatable";
118     case ET_EXEC:
119       return "executable";
120     case ET_DYN:
121       return "shared object";
122     case ET_CORE:
123       return "coredump";
124     default:
125       return "unknown (type = " + std::to_string(elf_header_type) + ')';
126   }
127 }
128 
ElfSectionTypeToString(Elf64_Word elf_section_type)129 std::string ElfSectionTypeToString(Elf64_Word elf_section_type) {
130   switch (elf_section_type) {
131     case SHT_SYMTAB:
132       return "symtab";
133     case SHT_DYNSYM:
134       return "dynsym";
135     case SHT_GNU_verdef:
136       return "GNU_verdef";
137     case SHT_GNU_verneed:
138       return "GNU_verneed";
139     case SHT_GNU_versym:
140       return "GNU_versym";
141     default:
142       return "unknown (type = " + std::to_string(elf_section_type) + ')';
143   }
144 }
145 
GetMachine(Elf * elf)146 GElf_Half GetMachine(Elf* elf) {
147   GElf_Ehdr header;
148   Check(gelf_getehdr(elf, &header) != nullptr) << "could not get ELF header";
149   return header.e_machine;
150 }
151 
AdjustAddress(GElf_Half machine,SymbolTableEntry & entry)152 void AdjustAddress(GElf_Half machine, SymbolTableEntry& entry) {
153   if (machine == EM_ARM) {
154     if (entry.symbol_type == SymbolTableEntry::SymbolType::FUNCTION
155         || entry.symbol_type == SymbolTableEntry::SymbolType::GNU_IFUNC) {
156       // Clear bit zero of ARM32 addresses as per "ELF for the Arm Architecture"
157       // section 5.5.3.  https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
158       entry.value &= ~1;
159     }
160   } else if (machine == EM_AARCH64) {
161     // Copy bit 55 over bits 56 to 63 which may be tag information.
162     entry.value = entry.value & (1ULL << 55)
163                   ? entry.value | (0xffULL << 56)
164                   : entry.value & ~(0xffULL << 56);
165   }
166 }
167 
GetSectionsIf(Elf * elf,const std::function<bool (const GElf_Shdr &)> & predicate)168 std::vector<Elf_Scn*> GetSectionsIf(
169     Elf* elf, const std::function<bool(const GElf_Shdr&)>& predicate) {
170   std::vector<Elf_Scn*> result;
171   Elf_Scn* section = nullptr;
172   GElf_Shdr header;
173   while ((section = elf_nextscn(elf, section)) != nullptr) {
174     Check(gelf_getshdr(section, &header) != nullptr)
175         << "could not get ELF section header";
176     if (predicate(header)) {
177       result.push_back(section);
178     }
179   }
180   return result;
181 }
182 
GetSectionsByName(Elf * elf,const std::string & name)183 std::vector<Elf_Scn*> GetSectionsByName(Elf* elf, const std::string& name) {
184   size_t shdr_strtab_index;
185   Check(elf_getshdrstrndx(elf, &shdr_strtab_index) == 0)
186       << "could not get ELF section header string table index";
187   return GetSectionsIf(elf, [&](const GElf_Shdr& header) {
188     const auto* section_name =
189         elf_strptr(elf, shdr_strtab_index, header.sh_name);
190     return section_name != nullptr && section_name == name;
191   });
192 }
193 
MaybeGetSectionByName(Elf * elf,const std::string & name)194 Elf_Scn* MaybeGetSectionByName(Elf* elf, const std::string& name) {
195   const auto sections = GetSectionsByName(elf, name);
196   if (sections.empty()) {
197     return nullptr;
198   }
199   Check(sections.size() == 1)
200       << "multiple sections found with name '" << name << "'";
201   return sections[0];
202 }
203 
GetSectionByName(Elf * elf,const std::string & name)204 Elf_Scn* GetSectionByName(Elf* elf, const std::string& name) {
205   Elf_Scn* section = MaybeGetSectionByName(elf, name);
206   Check(section != nullptr) << "no section found with name '" << name << "'";
207   return section;
208 }
209 
MaybeGetSectionByType(Elf * elf,Elf64_Word type)210 Elf_Scn* MaybeGetSectionByType(Elf* elf, Elf64_Word type) {
211   auto sections = GetSectionsIf(
212       elf, [&](const GElf_Shdr& header) { return header.sh_type == type; });
213   if (sections.empty()) {
214     return nullptr;
215   }
216   Check(sections.size() == 1) << "multiple sections found with type " << type;
217   return sections[0];
218 }
219 
GetSectionByIndex(Elf * elf,size_t index)220 Elf_Scn* GetSectionByIndex(Elf* elf, size_t index) {
221   Elf_Scn* section = elf_getscn(elf, index);
222   Check(section != nullptr) << "no section found with index " << index;
223   return section;
224 }
225 
226 struct SectionInfo {
227   GElf_Shdr header;
228   Elf_Data* data;
229 };
230 
GetSectionInfo(Elf_Scn * section)231 SectionInfo GetSectionInfo(Elf_Scn* section) {
232   const size_t index = elf_ndxscn(section);
233   GElf_Shdr section_header;
234   Check(gelf_getshdr(section, &section_header) != nullptr)
235       << "failed to read section (index = " << index << ") header";
236   Elf_Data* data = elf_getdata(section, nullptr);
237   Check(data != nullptr) << "section (index = " << index << ") data is invalid";
238   return {section_header, data};
239 }
240 
GetNumberOfEntries(const GElf_Shdr & section_header)241 size_t GetNumberOfEntries(const GElf_Shdr& section_header) {
242   Check(section_header.sh_entsize != 0)
243       << "zero table entity size is unexpected for section "
244       << ElfSectionTypeToString(section_header.sh_type);
245   return section_header.sh_size / section_header.sh_entsize;
246 }
247 
GetRawData(Elf_Scn * section,const char * name)248 std::string_view GetRawData(Elf_Scn* section, const char* name) {
249   Elf_Data* data = elf_rawdata(section, nullptr);
250   Check(data != nullptr) << "elf_rawdata failed on section " << name;
251   return {static_cast<char*>(data->d_buf), data->d_size};
252 }
253 
GetString(Elf * elf,uint32_t section,size_t offset)254 std::string_view GetString(Elf* elf, uint32_t section, size_t offset) {
255   const auto name = elf_strptr(elf, section, offset);
256 
257   Check(name != nullptr) << "string was not found (section: " << section
258                          << ", offset: " << offset << ")";
259   return name;
260 }
261 
GetSymbolTableSection(Elf * elf,bool is_linux_kernel_binary)262 Elf_Scn* GetSymbolTableSection(Elf* elf, bool is_linux_kernel_binary) {
263   GElf_Ehdr elf_header;
264   Check(gelf_getehdr(elf, &elf_header) != nullptr)
265       << "could not get ELF header";
266 
267   Elf_Scn* symtab = MaybeGetSectionByType(elf, SHT_SYMTAB);
268   Elf_Scn* dynsym = MaybeGetSectionByType(elf, SHT_DYNSYM);
269   if (symtab != nullptr && dynsym != nullptr) {
270     // Relocatable ELF binaries, Linux kernel and modules have their
271     // exported symbols in .symtab, all other ELF types have their
272     // exported symbols in .dynsym.
273     if (elf_header.e_type == ET_REL || is_linux_kernel_binary) {
274       return symtab;
275     }
276     if (elf_header.e_type == ET_DYN || elf_header.e_type == ET_EXEC) {
277       return dynsym;
278     }
279     Die() << "unsupported ELF type: '"
280           << ElfHeaderTypeToString(elf_header.e_type) << "'";
281   } else if (symtab != nullptr) {
282     return symtab;
283   } else if (dynsym != nullptr) {
284     return dynsym;
285   } else {
286     Die() << "no ELF symbol table found";
287   }
288 }
289 
290 
291 constexpr std::string_view kCFISuffix = ".cfi";
292 
IsCFISymbolName(std::string_view name)293 bool IsCFISymbolName(std::string_view name) {
294   return name.ends_with(kCFISuffix);
295 }
296 
297 }  // namespace
298 
UnwrapCFISymbolName(std::string_view cfi_name)299 std::string_view UnwrapCFISymbolName(std::string_view cfi_name) {
300   Check(IsCFISymbolName(cfi_name))
301       << "CFI symbol " << cfi_name << " doesn't end with " << kCFISuffix;
302   return cfi_name.substr(0, cfi_name.size() - kCFISuffix.size());
303 }
304 
305 namespace {
306 
GetSymbols(Elf * elf,Elf_Scn * symbol_table_section,bool cfi)307 std::vector<SymbolTableEntry> GetSymbols(
308     Elf* elf, Elf_Scn* symbol_table_section, bool cfi) {
309   const auto machine = GetMachine(elf);
310   const auto [symbol_table_header, symbol_table_data] =
311       GetSectionInfo(symbol_table_section);
312   const size_t number_of_symbols = GetNumberOfEntries(symbol_table_header);
313 
314   std::vector<SymbolTableEntry> result;
315   result.reserve(number_of_symbols);
316 
317   // GElf uses int for indexes in symbol table, prevent int overflow.
318   Check(number_of_symbols <= std::numeric_limits<int>::max())
319       << "number of symbols exceeds INT_MAX";
320   for (size_t i = 0; i < number_of_symbols; ++i) {
321     GElf_Sym symbol;
322     Check(gelf_getsym(symbol_table_data, static_cast<int>(i), &symbol) !=
323           nullptr)
324         << "symbol (i = " << i << ") was not found";
325 
326     const auto name =
327         GetString(elf, symbol_table_header.sh_link, symbol.st_name);
328     if (cfi != IsCFISymbolName(name)) {
329       continue;
330     }
331     SymbolTableEntry entry{
332         .name = name,
333         .value = symbol.st_value,
334         .size = symbol.st_size,
335         .symbol_type = ParseSymbolType(GELF_ST_TYPE(symbol.st_info)),
336         .binding = ParseSymbolBinding(GELF_ST_BIND(symbol.st_info)),
337         .visibility =
338             ParseSymbolVisibility(GELF_ST_VISIBILITY(symbol.st_other)),
339         .section_index = symbol.st_shndx,
340         .value_type = ParseSymbolValueType(symbol.st_shndx),
341     };
342     AdjustAddress(machine, entry);
343     result.push_back(entry);
344   }
345 
346   return result;
347 }
348 
IsLinuxKernelBinary(Elf * elf)349 bool IsLinuxKernelBinary(Elf* elf) {
350   // The Linux kernel itself has many specific sections that are sufficient to
351   // classify a binary as kernel binary if present, `__ksymtab_strings` is one
352   // of them. It is present if a kernel binary (vmlinux or a module) exports
353   // symbols via the EXPORT_SYMBOL_* macros and it contains symbol names and
354   // namespaces which form part of the ABI.
355   //
356   // Kernel modules might not present a `__ksymtab_strings` section if they do
357   // not export symbols themselves via the ksymtab. Yet they can be identified
358   // by the presence of the `.modinfo` section. Since that is somewhat a generic
359   // name, also check for the presence of `.gnu.linkonce.this_module` to get
360   // solid signal as both of those sections are present in kernel modules.
361   return MaybeGetSectionByName(elf, "__ksymtab_strings") != nullptr ||
362          (MaybeGetSectionByName(elf, ".modinfo") != nullptr &&
363           MaybeGetSectionByName(elf, ".gnu.linkonce.this_module") != nullptr);
364 }
365 
IsRelocatable(Elf * elf)366 bool IsRelocatable(Elf* elf) {
367   GElf_Ehdr elf_header;
368   Check(gelf_getehdr(elf, &elf_header) != nullptr)
369       << "could not get ELF header";
370 
371   return elf_header.e_type == ET_REL;
372 }
373 
IsLittleEndianBinary(Elf * elf)374 bool IsLittleEndianBinary(Elf* elf) {
375   GElf_Ehdr elf_header;
376   Check(gelf_getehdr(elf, &elf_header) != nullptr)
377       << "could not get ELF header";
378 
379   switch (auto endianness = elf_header.e_ident[EI_DATA]) {
380     case ELFDATA2LSB:
381       return true;
382     case ELFDATA2MSB:
383       return false;
384     default:
385       Die() << "Unsupported ELF endianness: " << endianness;
386   }
387 }
388 
389 }  // namespace
390 
operator <<(std::ostream & os,SymbolTableEntry::SymbolType type)391 std::ostream& operator<<(std::ostream& os, SymbolTableEntry::SymbolType type) {
392   using SymbolType = SymbolTableEntry::SymbolType;
393   switch (type) {
394     case SymbolType::NOTYPE:
395       return os << "notype";
396     case SymbolType::OBJECT:
397       return os << "object";
398     case SymbolType::FUNCTION:
399       return os << "function";
400     case SymbolType::SECTION:
401       return os << "section";
402     case SymbolType::FILE:
403       return os << "file";
404     case SymbolType::COMMON:
405       return os << "common";
406     case SymbolType::TLS:
407       return os << "TLS";
408     case SymbolType::GNU_IFUNC:
409       return os << "indirect (ifunc) function";
410   }
411 }
412 
operator <<(std::ostream & os,const SymbolTableEntry::ValueType type)413 std::ostream& operator<<(std::ostream& os,
414                          const SymbolTableEntry::ValueType type) {
415   using ValueType = SymbolTableEntry::ValueType;
416   switch (type) {
417     case ValueType::UNDEFINED:
418       return os << "undefined";
419     case ValueType::ABSOLUTE:
420       return os << "absolute";
421     case ValueType::COMMON:
422       return os << "common";
423     case ValueType::RELATIVE_TO_SECTION:
424       return os << "relative";
425   }
426 }
427 
ElfLoader(Elf & elf)428 ElfLoader::ElfLoader(Elf& elf)
429     : elf_(&elf) {
430   InitializeElfInformation();
431 }
432 
InitializeElfInformation()433 void ElfLoader::InitializeElfInformation() {
434   is_linux_kernel_binary_ = elf::IsLinuxKernelBinary(elf_);
435   is_relocatable_ = elf::IsRelocatable(elf_);
436   is_little_endian_binary_ = elf::IsLittleEndianBinary(elf_);
437 }
438 
GetSectionRawData(const char * name) const439 std::string_view ElfLoader::GetSectionRawData(const char* name) const {
440   return GetRawData(GetSectionByName(elf_, name), name);
441 }
442 
GetElfSymbols() const443 std::vector<SymbolTableEntry> ElfLoader::GetElfSymbols() const {
444   Elf_Scn* symbol_table_section =
445       GetSymbolTableSection(elf_, is_linux_kernel_binary_);
446   Check(symbol_table_section != nullptr)
447       << "failed to find symbol table section";
448 
449   return GetSymbols(elf_, symbol_table_section, /* cfi = */ false);
450 }
451 
GetCFISymbols() const452 std::vector<SymbolTableEntry> ElfLoader::GetCFISymbols() const {
453   // CFI symbols may be only in .symtab
454   Elf_Scn* symbol_table_section = MaybeGetSectionByType(elf_, SHT_SYMTAB);
455   if (symbol_table_section == nullptr) {
456     // It is possible for ET_DYN and ET_EXEC ELF binaries to not have .symtab,
457     // because it was trimmed away. We can't determine whether there were CFI
458     // symbols in the first place, so the best we can do is returning an empty
459     // list.
460     return {};
461   }
462   return GetSymbols(elf_, symbol_table_section, /* cfi = */ true);
463 }
464 
GetElfSymbolCRC(const SymbolTableEntry & symbol) const465 ElfSymbol::CRC ElfLoader::GetElfSymbolCRC(
466     const SymbolTableEntry& symbol) const {
467   Check(is_little_endian_binary_)
468       << "CRC is not supported in big-endian binaries";
469   const auto address = GetAbsoluteAddress(symbol);
470   if (symbol.value_type == SymbolTableEntry::ValueType::ABSOLUTE) {
471     return ElfSymbol::CRC{static_cast<uint32_t>(address)};
472   }
473   Check(symbol.value_type == SymbolTableEntry::ValueType::RELATIVE_TO_SECTION)
474       << "CRC symbol is expected to be absolute or relative to a section";
475 
476   const auto section = GetSectionByIndex(elf_, symbol.section_index);
477   const auto [header, data] = GetSectionInfo(section);
478   Check(data->d_buf != nullptr) << "Section has no data buffer";
479 
480   Check(address >= header.sh_addr)
481       << "CRC symbol address is below CRC section start";
482 
483   const size_t offset = address - header.sh_addr;
484   const size_t offset_end = offset + sizeof(uint32_t);
485   Check(offset_end <= data->d_size && offset_end <= header.sh_size)
486       << "CRC symbol address is above CRC section end";
487 
488   return ElfSymbol::CRC{*reinterpret_cast<uint32_t*>(
489       reinterpret_cast<char*>(data->d_buf) + offset)};
490 }
491 
GetElfSymbolNamespace(const SymbolTableEntry & symbol) const492 std::string_view ElfLoader::GetElfSymbolNamespace(
493     const SymbolTableEntry& symbol) const {
494   Check(symbol.value_type == SymbolTableEntry::ValueType::RELATIVE_TO_SECTION)
495       << "Namespace symbol is expected to be relative to a section";
496 
497   const auto section = GetSectionByIndex(elf_, symbol.section_index);
498   const auto [header, data] = GetSectionInfo(section);
499   Check(data->d_buf != nullptr) << "Section has no data buffer";
500 
501   const auto address = GetAbsoluteAddress(symbol);
502   Check(address >= header.sh_addr)
503       << "Namespace symbol address is below namespace section start";
504 
505   const size_t offset = address - header.sh_addr;
506   Check(offset < data->d_size && offset < header.sh_size)
507       << "Namespace symbol address is above namespace section end";
508 
509   const char* begin = reinterpret_cast<const char*>(data->d_buf) + offset;
510   // TODO: replace strnlen with something in a standard library
511   const size_t length = strnlen(begin, data->d_size - offset);
512   Check(offset + length < data->d_size)
513       << "Namespace string should be null-terminated";
514 
515   return {begin, length};
516 }
517 
GetAbsoluteAddress(const SymbolTableEntry & symbol) const518 size_t ElfLoader::GetAbsoluteAddress(const SymbolTableEntry& symbol) const {
519   if (symbol.value_type == SymbolTableEntry::ValueType::ABSOLUTE) {
520     return symbol.value;
521   }
522   Check(symbol.value_type == SymbolTableEntry::ValueType::RELATIVE_TO_SECTION)
523       << "Only absolute and relative to sections symbols are supported";
524   // In relocatable files, st_value holds a section offset for a defined symbol.
525   if (is_relocatable_) {
526     const auto section = GetSectionByIndex(elf_, symbol.section_index);
527     GElf_Shdr header;
528     Check(gelf_getshdr(section, &header) != nullptr)
529         << "failed to get symbol section header";
530     Check(symbol.value + symbol.size <= header.sh_size)
531         << "Symbol should be inside the section";
532     return symbol.value + header.sh_addr;
533   }
534   // In executable and shared object files, st_value holds a virtual address.
535   return symbol.value;
536 }
537 
IsLinuxKernelBinary() const538 bool ElfLoader::IsLinuxKernelBinary() const {
539   return is_linux_kernel_binary_;
540 }
541 
IsLittleEndianBinary() const542 bool ElfLoader::IsLittleEndianBinary() const {
543   return is_little_endian_binary_;
544 }
545 
546 }  // namespace elf
547 }  // namespace stg
548