1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2020-2022 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License. You may obtain a copy of the License at
9 //
10 // https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Maria Teguiani
19 // Author: Giuliano Procida
20 // Author: Aleksei Vetrov
21
22 #include "elf_loader.h"
23
24 #include <elf.h>
25 #include <gelf.h>
26 #include <libelf.h>
27
28 #include <cstddef>
29 #include <cstdint>
30 #include <cstring>
31 #include <functional>
32 #include <limits>
33 #include <ostream>
34 #include <string>
35 #include <string_view>
36 #include <vector>
37
38 #include "error.h"
39 #include "graph.h"
40
41 namespace stg {
42 namespace elf {
43
44 namespace {
45
ParseSymbolType(unsigned char symbol_type)46 SymbolTableEntry::SymbolType ParseSymbolType(unsigned char symbol_type) {
47 switch (symbol_type) {
48 case STT_NOTYPE:
49 return SymbolTableEntry::SymbolType::NOTYPE;
50 case STT_OBJECT:
51 return SymbolTableEntry::SymbolType::OBJECT;
52 case STT_FUNC:
53 return SymbolTableEntry::SymbolType::FUNCTION;
54 case STT_SECTION:
55 return SymbolTableEntry::SymbolType::SECTION;
56 case STT_FILE:
57 return SymbolTableEntry::SymbolType::FILE;
58 case STT_COMMON:
59 return SymbolTableEntry::SymbolType::COMMON;
60 case STT_TLS:
61 return SymbolTableEntry::SymbolType::TLS;
62 case STT_GNU_IFUNC:
63 return SymbolTableEntry::SymbolType::GNU_IFUNC;
64 default:
65 Die() << "Unknown ELF symbol type: " << symbol_type;
66 }
67 }
68
ParseSymbolBinding(unsigned char binding)69 SymbolTableEntry::Binding ParseSymbolBinding(unsigned char binding) {
70 switch (binding) {
71 case STB_LOCAL:
72 return SymbolTableEntry::Binding::LOCAL;
73 case STB_GLOBAL:
74 return SymbolTableEntry::Binding::GLOBAL;
75 case STB_WEAK:
76 return SymbolTableEntry::Binding::WEAK;
77 case STB_GNU_UNIQUE:
78 return SymbolTableEntry::Binding::GNU_UNIQUE;
79 default:
80 Die() << "Unknown ELF symbol binding: " << binding;
81 }
82 }
83
ParseSymbolVisibility(unsigned char visibility)84 SymbolTableEntry::Visibility ParseSymbolVisibility(unsigned char visibility) {
85 switch (visibility) {
86 case STV_DEFAULT:
87 return SymbolTableEntry::Visibility::DEFAULT;
88 case STV_INTERNAL:
89 return SymbolTableEntry::Visibility::INTERNAL;
90 case STV_HIDDEN:
91 return SymbolTableEntry::Visibility::HIDDEN;
92 case STV_PROTECTED:
93 return SymbolTableEntry::Visibility::PROTECTED;
94 default:
95 Die() << "Unknown ELF symbol visibility: " << visibility;
96 }
97 }
98
ParseSymbolValueType(Elf64_Section section_index)99 SymbolTableEntry::ValueType ParseSymbolValueType(Elf64_Section section_index) {
100 switch (section_index) {
101 case SHN_UNDEF:
102 return SymbolTableEntry::ValueType::UNDEFINED;
103 case SHN_ABS:
104 return SymbolTableEntry::ValueType::ABSOLUTE;
105 case SHN_COMMON:
106 return SymbolTableEntry::ValueType::COMMON;
107 default:
108 return SymbolTableEntry::ValueType::RELATIVE_TO_SECTION;
109 }
110 }
111
ElfHeaderTypeToString(unsigned char elf_header_type)112 std::string ElfHeaderTypeToString(unsigned char elf_header_type) {
113 switch (elf_header_type) {
114 case ET_NONE:
115 return "none";
116 case ET_REL:
117 return "relocatable";
118 case ET_EXEC:
119 return "executable";
120 case ET_DYN:
121 return "shared object";
122 case ET_CORE:
123 return "coredump";
124 default:
125 return "unknown (type = " + std::to_string(elf_header_type) + ')';
126 }
127 }
128
ElfSectionTypeToString(Elf64_Word elf_section_type)129 std::string ElfSectionTypeToString(Elf64_Word elf_section_type) {
130 switch (elf_section_type) {
131 case SHT_SYMTAB:
132 return "symtab";
133 case SHT_DYNSYM:
134 return "dynsym";
135 case SHT_GNU_verdef:
136 return "GNU_verdef";
137 case SHT_GNU_verneed:
138 return "GNU_verneed";
139 case SHT_GNU_versym:
140 return "GNU_versym";
141 default:
142 return "unknown (type = " + std::to_string(elf_section_type) + ')';
143 }
144 }
145
GetMachine(Elf * elf)146 GElf_Half GetMachine(Elf* elf) {
147 GElf_Ehdr header;
148 Check(gelf_getehdr(elf, &header) != nullptr) << "could not get ELF header";
149 return header.e_machine;
150 }
151
AdjustAddress(GElf_Half machine,SymbolTableEntry & entry)152 void AdjustAddress(GElf_Half machine, SymbolTableEntry& entry) {
153 if (machine == EM_ARM) {
154 if (entry.symbol_type == SymbolTableEntry::SymbolType::FUNCTION
155 || entry.symbol_type == SymbolTableEntry::SymbolType::GNU_IFUNC) {
156 // Clear bit zero of ARM32 addresses as per "ELF for the Arm Architecture"
157 // section 5.5.3. https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
158 entry.value &= ~1;
159 }
160 } else if (machine == EM_AARCH64) {
161 // Copy bit 55 over bits 56 to 63 which may be tag information.
162 entry.value = entry.value & (1ULL << 55)
163 ? entry.value | (0xffULL << 56)
164 : entry.value & ~(0xffULL << 56);
165 }
166 }
167
GetSectionsIf(Elf * elf,const std::function<bool (const GElf_Shdr &)> & predicate)168 std::vector<Elf_Scn*> GetSectionsIf(
169 Elf* elf, const std::function<bool(const GElf_Shdr&)>& predicate) {
170 std::vector<Elf_Scn*> result;
171 Elf_Scn* section = nullptr;
172 GElf_Shdr header;
173 while ((section = elf_nextscn(elf, section)) != nullptr) {
174 Check(gelf_getshdr(section, &header) != nullptr)
175 << "could not get ELF section header";
176 if (predicate(header)) {
177 result.push_back(section);
178 }
179 }
180 return result;
181 }
182
GetSectionsByName(Elf * elf,const std::string & name)183 std::vector<Elf_Scn*> GetSectionsByName(Elf* elf, const std::string& name) {
184 size_t shdr_strtab_index;
185 Check(elf_getshdrstrndx(elf, &shdr_strtab_index) == 0)
186 << "could not get ELF section header string table index";
187 return GetSectionsIf(elf, [&](const GElf_Shdr& header) {
188 const auto* section_name =
189 elf_strptr(elf, shdr_strtab_index, header.sh_name);
190 return section_name != nullptr && section_name == name;
191 });
192 }
193
MaybeGetSectionByName(Elf * elf,const std::string & name)194 Elf_Scn* MaybeGetSectionByName(Elf* elf, const std::string& name) {
195 const auto sections = GetSectionsByName(elf, name);
196 if (sections.empty()) {
197 return nullptr;
198 }
199 Check(sections.size() == 1)
200 << "multiple sections found with name '" << name << "'";
201 return sections[0];
202 }
203
GetSectionByName(Elf * elf,const std::string & name)204 Elf_Scn* GetSectionByName(Elf* elf, const std::string& name) {
205 Elf_Scn* section = MaybeGetSectionByName(elf, name);
206 Check(section != nullptr) << "no section found with name '" << name << "'";
207 return section;
208 }
209
MaybeGetSectionByType(Elf * elf,Elf64_Word type)210 Elf_Scn* MaybeGetSectionByType(Elf* elf, Elf64_Word type) {
211 auto sections = GetSectionsIf(
212 elf, [&](const GElf_Shdr& header) { return header.sh_type == type; });
213 if (sections.empty()) {
214 return nullptr;
215 }
216 Check(sections.size() == 1) << "multiple sections found with type " << type;
217 return sections[0];
218 }
219
GetSectionByIndex(Elf * elf,size_t index)220 Elf_Scn* GetSectionByIndex(Elf* elf, size_t index) {
221 Elf_Scn* section = elf_getscn(elf, index);
222 Check(section != nullptr) << "no section found with index " << index;
223 return section;
224 }
225
226 struct SectionInfo {
227 GElf_Shdr header;
228 Elf_Data* data;
229 };
230
GetSectionInfo(Elf_Scn * section)231 SectionInfo GetSectionInfo(Elf_Scn* section) {
232 const size_t index = elf_ndxscn(section);
233 GElf_Shdr section_header;
234 Check(gelf_getshdr(section, §ion_header) != nullptr)
235 << "failed to read section (index = " << index << ") header";
236 Elf_Data* data = elf_getdata(section, nullptr);
237 Check(data != nullptr) << "section (index = " << index << ") data is invalid";
238 return {section_header, data};
239 }
240
GetNumberOfEntries(const GElf_Shdr & section_header)241 size_t GetNumberOfEntries(const GElf_Shdr& section_header) {
242 Check(section_header.sh_entsize != 0)
243 << "zero table entity size is unexpected for section "
244 << ElfSectionTypeToString(section_header.sh_type);
245 return section_header.sh_size / section_header.sh_entsize;
246 }
247
GetRawData(Elf_Scn * section,const char * name)248 std::string_view GetRawData(Elf_Scn* section, const char* name) {
249 Elf_Data* data = elf_rawdata(section, nullptr);
250 Check(data != nullptr) << "elf_rawdata failed on section " << name;
251 return {static_cast<char*>(data->d_buf), data->d_size};
252 }
253
GetString(Elf * elf,uint32_t section,size_t offset)254 std::string_view GetString(Elf* elf, uint32_t section, size_t offset) {
255 const auto name = elf_strptr(elf, section, offset);
256
257 Check(name != nullptr) << "string was not found (section: " << section
258 << ", offset: " << offset << ")";
259 return name;
260 }
261
GetSymbolTableSection(Elf * elf,bool is_linux_kernel_binary)262 Elf_Scn* GetSymbolTableSection(Elf* elf, bool is_linux_kernel_binary) {
263 GElf_Ehdr elf_header;
264 Check(gelf_getehdr(elf, &elf_header) != nullptr)
265 << "could not get ELF header";
266
267 Elf_Scn* symtab = MaybeGetSectionByType(elf, SHT_SYMTAB);
268 Elf_Scn* dynsym = MaybeGetSectionByType(elf, SHT_DYNSYM);
269 if (symtab != nullptr && dynsym != nullptr) {
270 // Relocatable ELF binaries, Linux kernel and modules have their
271 // exported symbols in .symtab, all other ELF types have their
272 // exported symbols in .dynsym.
273 if (elf_header.e_type == ET_REL || is_linux_kernel_binary) {
274 return symtab;
275 }
276 if (elf_header.e_type == ET_DYN || elf_header.e_type == ET_EXEC) {
277 return dynsym;
278 }
279 Die() << "unsupported ELF type: '"
280 << ElfHeaderTypeToString(elf_header.e_type) << "'";
281 } else if (symtab != nullptr) {
282 return symtab;
283 } else if (dynsym != nullptr) {
284 return dynsym;
285 } else {
286 Die() << "no ELF symbol table found";
287 }
288 }
289
290
291 constexpr std::string_view kCFISuffix = ".cfi";
292
IsCFISymbolName(std::string_view name)293 bool IsCFISymbolName(std::string_view name) {
294 return name.ends_with(kCFISuffix);
295 }
296
297 } // namespace
298
UnwrapCFISymbolName(std::string_view cfi_name)299 std::string_view UnwrapCFISymbolName(std::string_view cfi_name) {
300 Check(IsCFISymbolName(cfi_name))
301 << "CFI symbol " << cfi_name << " doesn't end with " << kCFISuffix;
302 return cfi_name.substr(0, cfi_name.size() - kCFISuffix.size());
303 }
304
305 namespace {
306
GetSymbols(Elf * elf,Elf_Scn * symbol_table_section,bool cfi)307 std::vector<SymbolTableEntry> GetSymbols(
308 Elf* elf, Elf_Scn* symbol_table_section, bool cfi) {
309 const auto machine = GetMachine(elf);
310 const auto [symbol_table_header, symbol_table_data] =
311 GetSectionInfo(symbol_table_section);
312 const size_t number_of_symbols = GetNumberOfEntries(symbol_table_header);
313
314 std::vector<SymbolTableEntry> result;
315 result.reserve(number_of_symbols);
316
317 // GElf uses int for indexes in symbol table, prevent int overflow.
318 Check(number_of_symbols <= std::numeric_limits<int>::max())
319 << "number of symbols exceeds INT_MAX";
320 for (size_t i = 0; i < number_of_symbols; ++i) {
321 GElf_Sym symbol;
322 Check(gelf_getsym(symbol_table_data, static_cast<int>(i), &symbol) !=
323 nullptr)
324 << "symbol (i = " << i << ") was not found";
325
326 const auto name =
327 GetString(elf, symbol_table_header.sh_link, symbol.st_name);
328 if (cfi != IsCFISymbolName(name)) {
329 continue;
330 }
331 SymbolTableEntry entry{
332 .name = name,
333 .value = symbol.st_value,
334 .size = symbol.st_size,
335 .symbol_type = ParseSymbolType(GELF_ST_TYPE(symbol.st_info)),
336 .binding = ParseSymbolBinding(GELF_ST_BIND(symbol.st_info)),
337 .visibility =
338 ParseSymbolVisibility(GELF_ST_VISIBILITY(symbol.st_other)),
339 .section_index = symbol.st_shndx,
340 .value_type = ParseSymbolValueType(symbol.st_shndx),
341 };
342 AdjustAddress(machine, entry);
343 result.push_back(entry);
344 }
345
346 return result;
347 }
348
IsLinuxKernelBinary(Elf * elf)349 bool IsLinuxKernelBinary(Elf* elf) {
350 // The Linux kernel itself has many specific sections that are sufficient to
351 // classify a binary as kernel binary if present, `__ksymtab_strings` is one
352 // of them. It is present if a kernel binary (vmlinux or a module) exports
353 // symbols via the EXPORT_SYMBOL_* macros and it contains symbol names and
354 // namespaces which form part of the ABI.
355 //
356 // Kernel modules might not present a `__ksymtab_strings` section if they do
357 // not export symbols themselves via the ksymtab. Yet they can be identified
358 // by the presence of the `.modinfo` section. Since that is somewhat a generic
359 // name, also check for the presence of `.gnu.linkonce.this_module` to get
360 // solid signal as both of those sections are present in kernel modules.
361 return MaybeGetSectionByName(elf, "__ksymtab_strings") != nullptr ||
362 (MaybeGetSectionByName(elf, ".modinfo") != nullptr &&
363 MaybeGetSectionByName(elf, ".gnu.linkonce.this_module") != nullptr);
364 }
365
IsRelocatable(Elf * elf)366 bool IsRelocatable(Elf* elf) {
367 GElf_Ehdr elf_header;
368 Check(gelf_getehdr(elf, &elf_header) != nullptr)
369 << "could not get ELF header";
370
371 return elf_header.e_type == ET_REL;
372 }
373
IsLittleEndianBinary(Elf * elf)374 bool IsLittleEndianBinary(Elf* elf) {
375 GElf_Ehdr elf_header;
376 Check(gelf_getehdr(elf, &elf_header) != nullptr)
377 << "could not get ELF header";
378
379 switch (auto endianness = elf_header.e_ident[EI_DATA]) {
380 case ELFDATA2LSB:
381 return true;
382 case ELFDATA2MSB:
383 return false;
384 default:
385 Die() << "Unsupported ELF endianness: " << endianness;
386 }
387 }
388
389 } // namespace
390
operator <<(std::ostream & os,SymbolTableEntry::SymbolType type)391 std::ostream& operator<<(std::ostream& os, SymbolTableEntry::SymbolType type) {
392 using SymbolType = SymbolTableEntry::SymbolType;
393 switch (type) {
394 case SymbolType::NOTYPE:
395 return os << "notype";
396 case SymbolType::OBJECT:
397 return os << "object";
398 case SymbolType::FUNCTION:
399 return os << "function";
400 case SymbolType::SECTION:
401 return os << "section";
402 case SymbolType::FILE:
403 return os << "file";
404 case SymbolType::COMMON:
405 return os << "common";
406 case SymbolType::TLS:
407 return os << "TLS";
408 case SymbolType::GNU_IFUNC:
409 return os << "indirect (ifunc) function";
410 }
411 }
412
operator <<(std::ostream & os,const SymbolTableEntry::ValueType type)413 std::ostream& operator<<(std::ostream& os,
414 const SymbolTableEntry::ValueType type) {
415 using ValueType = SymbolTableEntry::ValueType;
416 switch (type) {
417 case ValueType::UNDEFINED:
418 return os << "undefined";
419 case ValueType::ABSOLUTE:
420 return os << "absolute";
421 case ValueType::COMMON:
422 return os << "common";
423 case ValueType::RELATIVE_TO_SECTION:
424 return os << "relative";
425 }
426 }
427
ElfLoader(Elf & elf)428 ElfLoader::ElfLoader(Elf& elf)
429 : elf_(&elf) {
430 InitializeElfInformation();
431 }
432
InitializeElfInformation()433 void ElfLoader::InitializeElfInformation() {
434 is_linux_kernel_binary_ = elf::IsLinuxKernelBinary(elf_);
435 is_relocatable_ = elf::IsRelocatable(elf_);
436 is_little_endian_binary_ = elf::IsLittleEndianBinary(elf_);
437 }
438
GetSectionRawData(const char * name) const439 std::string_view ElfLoader::GetSectionRawData(const char* name) const {
440 return GetRawData(GetSectionByName(elf_, name), name);
441 }
442
GetElfSymbols() const443 std::vector<SymbolTableEntry> ElfLoader::GetElfSymbols() const {
444 Elf_Scn* symbol_table_section =
445 GetSymbolTableSection(elf_, is_linux_kernel_binary_);
446 Check(symbol_table_section != nullptr)
447 << "failed to find symbol table section";
448
449 return GetSymbols(elf_, symbol_table_section, /* cfi = */ false);
450 }
451
GetCFISymbols() const452 std::vector<SymbolTableEntry> ElfLoader::GetCFISymbols() const {
453 // CFI symbols may be only in .symtab
454 Elf_Scn* symbol_table_section = MaybeGetSectionByType(elf_, SHT_SYMTAB);
455 if (symbol_table_section == nullptr) {
456 // It is possible for ET_DYN and ET_EXEC ELF binaries to not have .symtab,
457 // because it was trimmed away. We can't determine whether there were CFI
458 // symbols in the first place, so the best we can do is returning an empty
459 // list.
460 return {};
461 }
462 return GetSymbols(elf_, symbol_table_section, /* cfi = */ true);
463 }
464
GetElfSymbolCRC(const SymbolTableEntry & symbol) const465 ElfSymbol::CRC ElfLoader::GetElfSymbolCRC(
466 const SymbolTableEntry& symbol) const {
467 Check(is_little_endian_binary_)
468 << "CRC is not supported in big-endian binaries";
469 const auto address = GetAbsoluteAddress(symbol);
470 if (symbol.value_type == SymbolTableEntry::ValueType::ABSOLUTE) {
471 return ElfSymbol::CRC{static_cast<uint32_t>(address)};
472 }
473 Check(symbol.value_type == SymbolTableEntry::ValueType::RELATIVE_TO_SECTION)
474 << "CRC symbol is expected to be absolute or relative to a section";
475
476 const auto section = GetSectionByIndex(elf_, symbol.section_index);
477 const auto [header, data] = GetSectionInfo(section);
478 Check(data->d_buf != nullptr) << "Section has no data buffer";
479
480 Check(address >= header.sh_addr)
481 << "CRC symbol address is below CRC section start";
482
483 const size_t offset = address - header.sh_addr;
484 const size_t offset_end = offset + sizeof(uint32_t);
485 Check(offset_end <= data->d_size && offset_end <= header.sh_size)
486 << "CRC symbol address is above CRC section end";
487
488 return ElfSymbol::CRC{*reinterpret_cast<uint32_t*>(
489 reinterpret_cast<char*>(data->d_buf) + offset)};
490 }
491
GetElfSymbolNamespace(const SymbolTableEntry & symbol) const492 std::string_view ElfLoader::GetElfSymbolNamespace(
493 const SymbolTableEntry& symbol) const {
494 Check(symbol.value_type == SymbolTableEntry::ValueType::RELATIVE_TO_SECTION)
495 << "Namespace symbol is expected to be relative to a section";
496
497 const auto section = GetSectionByIndex(elf_, symbol.section_index);
498 const auto [header, data] = GetSectionInfo(section);
499 Check(data->d_buf != nullptr) << "Section has no data buffer";
500
501 const auto address = GetAbsoluteAddress(symbol);
502 Check(address >= header.sh_addr)
503 << "Namespace symbol address is below namespace section start";
504
505 const size_t offset = address - header.sh_addr;
506 Check(offset < data->d_size && offset < header.sh_size)
507 << "Namespace symbol address is above namespace section end";
508
509 const char* begin = reinterpret_cast<const char*>(data->d_buf) + offset;
510 // TODO: replace strnlen with something in a standard library
511 const size_t length = strnlen(begin, data->d_size - offset);
512 Check(offset + length < data->d_size)
513 << "Namespace string should be null-terminated";
514
515 return {begin, length};
516 }
517
GetAbsoluteAddress(const SymbolTableEntry & symbol) const518 size_t ElfLoader::GetAbsoluteAddress(const SymbolTableEntry& symbol) const {
519 if (symbol.value_type == SymbolTableEntry::ValueType::ABSOLUTE) {
520 return symbol.value;
521 }
522 Check(symbol.value_type == SymbolTableEntry::ValueType::RELATIVE_TO_SECTION)
523 << "Only absolute and relative to sections symbols are supported";
524 // In relocatable files, st_value holds a section offset for a defined symbol.
525 if (is_relocatable_) {
526 const auto section = GetSectionByIndex(elf_, symbol.section_index);
527 GElf_Shdr header;
528 Check(gelf_getshdr(section, &header) != nullptr)
529 << "failed to get symbol section header";
530 Check(symbol.value + symbol.size <= header.sh_size)
531 << "Symbol should be inside the section";
532 return symbol.value + header.sh_addr;
533 }
534 // In executable and shared object files, st_value holds a virtual address.
535 return symbol.value;
536 }
537
IsLinuxKernelBinary() const538 bool ElfLoader::IsLinuxKernelBinary() const {
539 return is_linux_kernel_binary_;
540 }
541
IsLittleEndianBinary() const542 bool ElfLoader::IsLittleEndianBinary() const {
543 return is_little_endian_binary_;
544 }
545
546 } // namespace elf
547 } // namespace stg
548