xref: /aosp_15_r20/external/sandboxed-api/sandboxed_api/sandbox2/util/minielf.cc (revision ec63e07ab9515d95e79c211197c445ef84cefa6a)
1*ec63e07aSXin Li // Copyright 2019 Google LLC
2*ec63e07aSXin Li //
3*ec63e07aSXin Li // Licensed under the Apache License, Version 2.0 (the "License");
4*ec63e07aSXin Li // you may not use this file except in compliance with the License.
5*ec63e07aSXin Li // You may obtain a copy of the License at
6*ec63e07aSXin Li //
7*ec63e07aSXin Li //     https://www.apache.org/licenses/LICENSE-2.0
8*ec63e07aSXin Li //
9*ec63e07aSXin Li // Unless required by applicable law or agreed to in writing, software
10*ec63e07aSXin Li // distributed under the License is distributed on an "AS IS" BASIS,
11*ec63e07aSXin Li // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*ec63e07aSXin Li // See the License for the specific language governing permissions and
13*ec63e07aSXin Li // limitations under the License.
14*ec63e07aSXin Li 
15*ec63e07aSXin Li #include "sandboxed_api/sandbox2/util/minielf.h"
16*ec63e07aSXin Li 
17*ec63e07aSXin Li #include <elf.h>
18*ec63e07aSXin Li 
19*ec63e07aSXin Li #include <algorithm>
20*ec63e07aSXin Li #include <cerrno>
21*ec63e07aSXin Li #include <cstddef>
22*ec63e07aSXin Li #include <cstdint>
23*ec63e07aSXin Li #include <cstdio>
24*ec63e07aSXin Li #include <cstring>
25*ec63e07aSXin Li #include <string>
26*ec63e07aSXin Li #include <type_traits>
27*ec63e07aSXin Li #include <utility>
28*ec63e07aSXin Li #include <vector>
29*ec63e07aSXin Li 
30*ec63e07aSXin Li #include "absl/base/internal/endian.h"
31*ec63e07aSXin Li #include "absl/status/status.h"
32*ec63e07aSXin Li #include "absl/status/statusor.h"
33*ec63e07aSXin Li #include "absl/strings/match.h"
34*ec63e07aSXin Li #include "absl/strings/str_cat.h"
35*ec63e07aSXin Li #include "absl/strings/string_view.h"
36*ec63e07aSXin Li #include "sandboxed_api/config.h"
37*ec63e07aSXin Li #include "sandboxed_api/sandbox2/util.h"
38*ec63e07aSXin Li #include "sandboxed_api/util/raw_logging.h"
39*ec63e07aSXin Li #include "sandboxed_api/util/status_macros.h"
40*ec63e07aSXin Li 
41*ec63e07aSXin Li namespace host_cpu = ::sapi::host_cpu;
42*ec63e07aSXin Li 
43*ec63e07aSXin Li namespace sandbox2 {
44*ec63e07aSXin Li 
45*ec63e07aSXin Li using ElfEhdr = std::conditional_t<host_cpu::Is64Bit(), Elf64_Ehdr, Elf32_Ehdr>;
46*ec63e07aSXin Li using ElfShdr = std::conditional_t<host_cpu::Is64Bit(), Elf64_Shdr, Elf32_Shdr>;
47*ec63e07aSXin Li using ElfPhdr = std::conditional_t<host_cpu::Is64Bit(), Elf64_Phdr, Elf32_Phdr>;
48*ec63e07aSXin Li using ElfDyn = std::conditional_t<host_cpu::Is64Bit(), Elf64_Dyn, Elf32_Dyn>;
49*ec63e07aSXin Li using ElfSym = std::conditional_t<host_cpu::Is64Bit(), Elf64_Sym, Elf32_Sym>;
50*ec63e07aSXin Li 
51*ec63e07aSXin Li constexpr int kElfHeaderSize = sizeof(ElfEhdr);  // Maximum size for binaries
52*ec63e07aSXin Li 
53*ec63e07aSXin Li constexpr char kElfMagic[] =
54*ec63e07aSXin Li     "\x7F"
55*ec63e07aSXin Li     "ELF";
56*ec63e07aSXin Li 
57*ec63e07aSXin Li constexpr int kEiClassOffset = 0x04;
58*ec63e07aSXin Li constexpr int kEiClass = host_cpu::Is64Bit() ? ELFCLASS64 : ELFCLASS32;
59*ec63e07aSXin Li 
60*ec63e07aSXin Li constexpr int kEiDataOffset = 0x05;
61*ec63e07aSXin Li constexpr int kEiDataLittle = 1;  // Little Endian
62*ec63e07aSXin Li constexpr int kEiDataBig = 2;     // Big Endian
63*ec63e07aSXin Li 
64*ec63e07aSXin Li constexpr int kEiVersionOffset = 0x06;
65*ec63e07aSXin Li constexpr int kEvCurrent = 1;  // ELF version
66*ec63e07aSXin Li 
67*ec63e07aSXin Li namespace {
68*ec63e07aSXin Li 
69*ec63e07aSXin Li // NOLINTNEXTLINE
CheckedFSeek(FILE * f,long offset,int whence)70*ec63e07aSXin Li absl::Status CheckedFSeek(FILE* f, long offset, int whence) {
71*ec63e07aSXin Li   if (fseek(f, offset, whence)) {
72*ec63e07aSXin Li     return absl::ErrnoToStatus(errno, "Fseek on ELF failed");
73*ec63e07aSXin Li   }
74*ec63e07aSXin Li   return absl::OkStatus();
75*ec63e07aSXin Li }
76*ec63e07aSXin Li 
CheckedFRead(void * dst,size_t size,size_t nmemb,FILE * f)77*ec63e07aSXin Li absl::Status CheckedFRead(void* dst, size_t size, size_t nmemb, FILE* f) {
78*ec63e07aSXin Li   if (std::fread(dst, size, nmemb, f) == nmemb) {
79*ec63e07aSXin Li     return absl::OkStatus();
80*ec63e07aSXin Li   }
81*ec63e07aSXin Li   return absl::ErrnoToStatus(errno, "Reading ELF data failed");
82*ec63e07aSXin Li }
83*ec63e07aSXin Li 
CheckedRead(std::string * s,FILE * f)84*ec63e07aSXin Li absl::Status CheckedRead(std::string* s, FILE* f) {
85*ec63e07aSXin Li   return CheckedFRead(&(*s)[0], 1, s->size(), f);
86*ec63e07aSXin Li }
87*ec63e07aSXin Li 
ReadName(uint32_t offset,absl::string_view strtab)88*ec63e07aSXin Li absl::string_view ReadName(uint32_t offset, absl::string_view strtab) {
89*ec63e07aSXin Li   auto name = strtab.substr(offset);
90*ec63e07aSXin Li   return name.substr(0, name.find('\0'));
91*ec63e07aSXin Li }
92*ec63e07aSXin Li 
93*ec63e07aSXin Li }  //  namespace
94*ec63e07aSXin Li 
95*ec63e07aSXin Li #define LOAD_MEMBER(data_struct, member, src)                            \
96*ec63e07aSXin Li   Load(&(data_struct).member,                                            \
97*ec63e07aSXin Li        &src[offsetof(std::remove_reference<decltype(data_struct)>::type, \
98*ec63e07aSXin Li                      member)])
99*ec63e07aSXin Li 
100*ec63e07aSXin Li class ElfParser {
101*ec63e07aSXin Li  public:
102*ec63e07aSXin Li   // Arbitrary cut-off values, so we can parse safely.
103*ec63e07aSXin Li   static constexpr int kMaxProgramHeaderEntries = 500;
104*ec63e07aSXin Li   static constexpr int kMaxSectionHeaderEntries = 500;
105*ec63e07aSXin Li   static constexpr size_t kMaxSectionSize = 500 * 1024 * 1024;
106*ec63e07aSXin Li   static constexpr size_t kMaxStrtabSize = 500 * 1024 * 1024;
107*ec63e07aSXin Li   static constexpr size_t kMaxLibPathSize = 1024;
108*ec63e07aSXin Li   static constexpr int kMaxSymbolEntries = 4 * 1000 * 1000;
109*ec63e07aSXin Li   static constexpr int kMaxDynamicEntries = 10000;
110*ec63e07aSXin Li   static constexpr size_t kMaxInterpreterSize = 1000;
111*ec63e07aSXin Li 
112*ec63e07aSXin Li   static absl::StatusOr<ElfFile> Parse(const std::string& filename,
113*ec63e07aSXin Li                                        uint32_t features);
114*ec63e07aSXin Li 
~ElfParser()115*ec63e07aSXin Li   ~ElfParser() {
116*ec63e07aSXin Li     if (elf_) {
117*ec63e07aSXin Li       std::fclose(elf_);
118*ec63e07aSXin Li     }
119*ec63e07aSXin Li   }
120*ec63e07aSXin Li 
121*ec63e07aSXin Li  private:
122*ec63e07aSXin Li   ElfParser() = default;
123*ec63e07aSXin Li 
124*ec63e07aSXin Li   // Endianess support functions
Load16(const void * src)125*ec63e07aSXin Li   uint16_t Load16(const void* src) {
126*ec63e07aSXin Li     return elf_little_ ? absl::little_endian::Load16(src)
127*ec63e07aSXin Li                        : absl::big_endian::Load16(src);
128*ec63e07aSXin Li   }
Load32(const void * src)129*ec63e07aSXin Li   uint32_t Load32(const void* src) {
130*ec63e07aSXin Li     return elf_little_ ? absl::little_endian::Load32(src)
131*ec63e07aSXin Li                        : absl::big_endian::Load32(src);
132*ec63e07aSXin Li   }
Load64(const void * src)133*ec63e07aSXin Li   uint64_t Load64(const void* src) {
134*ec63e07aSXin Li     return elf_little_ ? absl::little_endian::Load64(src)
135*ec63e07aSXin Li                        : absl::big_endian::Load64(src);
136*ec63e07aSXin Li   }
137*ec63e07aSXin Li 
138*ec63e07aSXin Li   template <size_t N>
Load(unsigned char (* dst)[N],const void * src)139*ec63e07aSXin Li   void Load(unsigned char (*dst)[N], const void* src) {
140*ec63e07aSXin Li     memcpy(dst, src, N);
141*ec63e07aSXin Li   }
142*ec63e07aSXin Li 
143*ec63e07aSXin Li   template <typename IntT>
Load(IntT * dst,const void * src)144*ec63e07aSXin Li   std::enable_if_t<std::is_integral_v<IntT>, void> Load(IntT* dst,
145*ec63e07aSXin Li                                                         const void* src) {
146*ec63e07aSXin Li     switch (sizeof(IntT)) {
147*ec63e07aSXin Li       case 1:
148*ec63e07aSXin Li         *dst = *reinterpret_cast<const char*>(src);
149*ec63e07aSXin Li         break;
150*ec63e07aSXin Li       case 2:
151*ec63e07aSXin Li         *dst = Load16(src);
152*ec63e07aSXin Li         break;
153*ec63e07aSXin Li       case 4:
154*ec63e07aSXin Li         *dst = Load32(src);
155*ec63e07aSXin Li         break;
156*ec63e07aSXin Li       case 8:
157*ec63e07aSXin Li         *dst = Load64(src);
158*ec63e07aSXin Li         break;
159*ec63e07aSXin Li     }
160*ec63e07aSXin Li   }
161*ec63e07aSXin Li 
162*ec63e07aSXin Li   // Reads ELF file size.
163*ec63e07aSXin Li   absl::Status ReadFileSize();
164*ec63e07aSXin Li   // Reads ELF header.
165*ec63e07aSXin Li   absl::Status ReadFileHeader();
166*ec63e07aSXin Li   // Reads a single ELF program header.
167*ec63e07aSXin Li   absl::StatusOr<ElfPhdr> ReadProgramHeader(absl::string_view src);
168*ec63e07aSXin Li   // Reads all ELF program headers.
169*ec63e07aSXin Li   absl::Status ReadProgramHeaders();
170*ec63e07aSXin Li   // Reads a single ELF section header.
171*ec63e07aSXin Li   absl::StatusOr<ElfShdr> ReadSectionHeader(absl::string_view src);
172*ec63e07aSXin Li   // Reads all ELF section headers.
173*ec63e07aSXin Li   absl::Status ReadSectionHeaders();
174*ec63e07aSXin Li   // Reads contents of an ELF section.
175*ec63e07aSXin Li   absl::StatusOr<std::string> ReadSectionContents(int idx);
176*ec63e07aSXin Li   absl::StatusOr<std::string> ReadSectionContents(
177*ec63e07aSXin Li       const ElfShdr& section_header);
178*ec63e07aSXin Li   // Reads all symbols from symtab section.
179*ec63e07aSXin Li   absl::Status ReadSymbolsFromSymtab(const ElfShdr& symtab);
180*ec63e07aSXin Li   // Reads all imported libraries from dynamic section.
181*ec63e07aSXin Li   absl::Status ReadImportedLibrariesFromDynamic(const ElfShdr& dynamic);
182*ec63e07aSXin Li 
183*ec63e07aSXin Li   ElfFile result_;
184*ec63e07aSXin Li   FILE* elf_ = nullptr;
185*ec63e07aSXin Li   size_t file_size_ = 0;
186*ec63e07aSXin Li   bool elf_little_ = false;
187*ec63e07aSXin Li   ElfEhdr file_header_;
188*ec63e07aSXin Li   std::vector<ElfPhdr> program_headers_;
189*ec63e07aSXin Li   std::vector<ElfShdr> section_headers_;
190*ec63e07aSXin Li 
191*ec63e07aSXin Li   int symbol_entries_read = 0;
192*ec63e07aSXin Li   int dynamic_entries_read = 0;
193*ec63e07aSXin Li };
194*ec63e07aSXin Li 
ReadFileSize()195*ec63e07aSXin Li absl::Status ElfParser::ReadFileSize() {
196*ec63e07aSXin Li   std::fseek(elf_, 0, SEEK_END);
197*ec63e07aSXin Li   file_size_ = std::ftell(elf_);
198*ec63e07aSXin Li   if (file_size_ < kElfHeaderSize) {
199*ec63e07aSXin Li     return absl::FailedPreconditionError(
200*ec63e07aSXin Li         absl::StrCat("file too small: ", file_size_, " bytes, at least ",
201*ec63e07aSXin Li                      kElfHeaderSize, " bytes expected"));
202*ec63e07aSXin Li   }
203*ec63e07aSXin Li   return absl::OkStatus();
204*ec63e07aSXin Li }
205*ec63e07aSXin Li 
ReadFileHeader()206*ec63e07aSXin Li absl::Status ElfParser::ReadFileHeader() {
207*ec63e07aSXin Li   std::string header(kElfHeaderSize, '\0');
208*ec63e07aSXin Li   SAPI_RETURN_IF_ERROR(CheckedFSeek(elf_, 0, SEEK_SET));
209*ec63e07aSXin Li   SAPI_RETURN_IF_ERROR(CheckedRead(&header, elf_));
210*ec63e07aSXin Li 
211*ec63e07aSXin Li   if (!absl::StartsWith(header, kElfMagic)) {
212*ec63e07aSXin Li     return absl::FailedPreconditionError("magic not found, not an ELF");
213*ec63e07aSXin Li   }
214*ec63e07aSXin Li 
215*ec63e07aSXin Li   if (header[kEiClassOffset] != kEiClass) {
216*ec63e07aSXin Li     return absl::FailedPreconditionError("invalid ELF class");
217*ec63e07aSXin Li   }
218*ec63e07aSXin Li   const auto elf_data = header[kEiDataOffset];
219*ec63e07aSXin Li   elf_little_ = elf_data == kEiDataLittle;
220*ec63e07aSXin Li   if (!elf_little_ && elf_data != kEiDataBig) {
221*ec63e07aSXin Li     return absl::FailedPreconditionError("invalid endianness");
222*ec63e07aSXin Li   }
223*ec63e07aSXin Li 
224*ec63e07aSXin Li   if (header[kEiVersionOffset] != kEvCurrent) {
225*ec63e07aSXin Li     return absl::FailedPreconditionError("invalid ELF version");
226*ec63e07aSXin Li   }
227*ec63e07aSXin Li   LOAD_MEMBER(file_header_, e_ident, header.data());
228*ec63e07aSXin Li   LOAD_MEMBER(file_header_, e_type, header.data());
229*ec63e07aSXin Li   LOAD_MEMBER(file_header_, e_machine, header.data());
230*ec63e07aSXin Li   LOAD_MEMBER(file_header_, e_version, header.data());
231*ec63e07aSXin Li   LOAD_MEMBER(file_header_, e_entry, header.data());
232*ec63e07aSXin Li   LOAD_MEMBER(file_header_, e_phoff, header.data());
233*ec63e07aSXin Li   LOAD_MEMBER(file_header_, e_shoff, header.data());
234*ec63e07aSXin Li   LOAD_MEMBER(file_header_, e_flags, header.data());
235*ec63e07aSXin Li   LOAD_MEMBER(file_header_, e_ehsize, header.data());
236*ec63e07aSXin Li   LOAD_MEMBER(file_header_, e_phentsize, header.data());
237*ec63e07aSXin Li   LOAD_MEMBER(file_header_, e_phnum, header.data());
238*ec63e07aSXin Li   LOAD_MEMBER(file_header_, e_shentsize, header.data());
239*ec63e07aSXin Li   LOAD_MEMBER(file_header_, e_shnum, header.data());
240*ec63e07aSXin Li   LOAD_MEMBER(file_header_, e_shstrndx, header.data());
241*ec63e07aSXin Li   return absl::OkStatus();
242*ec63e07aSXin Li }
243*ec63e07aSXin Li 
ReadSectionHeader(absl::string_view src)244*ec63e07aSXin Li absl::StatusOr<ElfShdr> ElfParser::ReadSectionHeader(absl::string_view src) {
245*ec63e07aSXin Li   if (src.size() < sizeof(ElfShdr)) {
246*ec63e07aSXin Li     return absl::FailedPreconditionError(
247*ec63e07aSXin Li         absl::StrCat("invalid section header data: got ", src.size(),
248*ec63e07aSXin Li                      " bytes, ", sizeof(ElfShdr), " bytes expected."));
249*ec63e07aSXin Li   }
250*ec63e07aSXin Li   ElfShdr rv;
251*ec63e07aSXin Li   LOAD_MEMBER(rv, sh_name, src.data());
252*ec63e07aSXin Li   LOAD_MEMBER(rv, sh_type, src.data());
253*ec63e07aSXin Li   LOAD_MEMBER(rv, sh_flags, src.data());
254*ec63e07aSXin Li   LOAD_MEMBER(rv, sh_addr, src.data());
255*ec63e07aSXin Li   LOAD_MEMBER(rv, sh_offset, src.data());
256*ec63e07aSXin Li   LOAD_MEMBER(rv, sh_size, src.data());
257*ec63e07aSXin Li   LOAD_MEMBER(rv, sh_link, src.data());
258*ec63e07aSXin Li   LOAD_MEMBER(rv, sh_info, src.data());
259*ec63e07aSXin Li   LOAD_MEMBER(rv, sh_addralign, src.data());
260*ec63e07aSXin Li   LOAD_MEMBER(rv, sh_entsize, src.data());
261*ec63e07aSXin Li   return rv;
262*ec63e07aSXin Li }
263*ec63e07aSXin Li 
ReadSectionHeaders()264*ec63e07aSXin Li absl::Status ElfParser::ReadSectionHeaders() {
265*ec63e07aSXin Li   if (file_header_.e_shoff > file_size_) {
266*ec63e07aSXin Li     return absl::FailedPreconditionError(
267*ec63e07aSXin Li         absl::StrCat("invalid section header offset: ", file_header_.e_shoff));
268*ec63e07aSXin Li   }
269*ec63e07aSXin Li   if (file_header_.e_shentsize != sizeof(ElfShdr)) {
270*ec63e07aSXin Li     return absl::FailedPreconditionError(absl::StrCat(
271*ec63e07aSXin Li         "section header entry size incorrect: ", file_header_.e_shentsize,
272*ec63e07aSXin Li         " bytes, ", sizeof(ElfShdr), " expected."));
273*ec63e07aSXin Li   }
274*ec63e07aSXin Li   if (file_header_.e_shnum > kMaxSectionHeaderEntries) {
275*ec63e07aSXin Li     return absl::FailedPreconditionError(
276*ec63e07aSXin Li         absl::StrCat("too many section header entries: ", file_header_.e_shnum,
277*ec63e07aSXin Li                      " limit: ", kMaxSectionHeaderEntries));
278*ec63e07aSXin Li   }
279*ec63e07aSXin Li   std::string headers(file_header_.e_shentsize * file_header_.e_shnum, '\0');
280*ec63e07aSXin Li   SAPI_RETURN_IF_ERROR(CheckedFSeek(elf_, file_header_.e_shoff, SEEK_SET));
281*ec63e07aSXin Li   SAPI_RETURN_IF_ERROR(CheckedRead(&headers, elf_));
282*ec63e07aSXin Li   section_headers_.resize(file_header_.e_shnum);
283*ec63e07aSXin Li   absl::string_view src = headers;
284*ec63e07aSXin Li   for (int i = 0; i < file_header_.e_shnum; ++i) {
285*ec63e07aSXin Li     SAPI_ASSIGN_OR_RETURN(section_headers_[i], ReadSectionHeader(src));
286*ec63e07aSXin Li     src = src.substr(file_header_.e_shentsize);
287*ec63e07aSXin Li   }
288*ec63e07aSXin Li   return absl::OkStatus();
289*ec63e07aSXin Li }
290*ec63e07aSXin Li 
ReadSectionContents(int idx)291*ec63e07aSXin Li absl::StatusOr<std::string> ElfParser::ReadSectionContents(int idx) {
292*ec63e07aSXin Li   if (idx < 0 || idx >= section_headers_.size()) {
293*ec63e07aSXin Li     return absl::FailedPreconditionError(
294*ec63e07aSXin Li         absl::StrCat("invalid section header index: ", idx));
295*ec63e07aSXin Li   }
296*ec63e07aSXin Li   return ReadSectionContents(section_headers_.at(idx));
297*ec63e07aSXin Li }
298*ec63e07aSXin Li 
ReadSectionContents(const ElfShdr & section_header)299*ec63e07aSXin Li absl::StatusOr<std::string> ElfParser::ReadSectionContents(
300*ec63e07aSXin Li     const ElfShdr& section_header) {
301*ec63e07aSXin Li   auto offset = section_header.sh_offset;
302*ec63e07aSXin Li   if (offset > file_size_) {
303*ec63e07aSXin Li     return absl::FailedPreconditionError(
304*ec63e07aSXin Li         absl::StrCat("invalid section offset: ", offset));
305*ec63e07aSXin Li   }
306*ec63e07aSXin Li   auto size = section_header.sh_size;
307*ec63e07aSXin Li   if (size > kMaxSectionSize) {
308*ec63e07aSXin Li     return absl::FailedPreconditionError(
309*ec63e07aSXin Li         absl::StrCat("section too big: ", size, " limit: ", kMaxSectionSize));
310*ec63e07aSXin Li   }
311*ec63e07aSXin Li   std::string rv(size, '\0');
312*ec63e07aSXin Li   SAPI_RETURN_IF_ERROR(CheckedFSeek(elf_, offset, SEEK_SET));
313*ec63e07aSXin Li   SAPI_RETURN_IF_ERROR(CheckedRead(&rv, elf_));
314*ec63e07aSXin Li   return rv;
315*ec63e07aSXin Li }
316*ec63e07aSXin Li 
ReadProgramHeader(absl::string_view src)317*ec63e07aSXin Li absl::StatusOr<ElfPhdr> ElfParser::ReadProgramHeader(absl::string_view src) {
318*ec63e07aSXin Li   if (src.size() < sizeof(ElfPhdr)) {
319*ec63e07aSXin Li     return absl::FailedPreconditionError(
320*ec63e07aSXin Li         absl::StrCat("invalid program header data: got ", src.size(),
321*ec63e07aSXin Li                      " bytes, ", sizeof(ElfPhdr), " bytes expected."));
322*ec63e07aSXin Li   }
323*ec63e07aSXin Li   ElfPhdr rv;
324*ec63e07aSXin Li   LOAD_MEMBER(rv, p_type, src.data());
325*ec63e07aSXin Li   LOAD_MEMBER(rv, p_flags, src.data());
326*ec63e07aSXin Li   LOAD_MEMBER(rv, p_offset, src.data());
327*ec63e07aSXin Li   LOAD_MEMBER(rv, p_vaddr, src.data());
328*ec63e07aSXin Li   LOAD_MEMBER(rv, p_paddr, src.data());
329*ec63e07aSXin Li   LOAD_MEMBER(rv, p_filesz, src.data());
330*ec63e07aSXin Li   LOAD_MEMBER(rv, p_memsz, src.data());
331*ec63e07aSXin Li   LOAD_MEMBER(rv, p_align, src.data());
332*ec63e07aSXin Li   return rv;
333*ec63e07aSXin Li }
334*ec63e07aSXin Li 
ReadProgramHeaders()335*ec63e07aSXin Li absl::Status ElfParser::ReadProgramHeaders() {
336*ec63e07aSXin Li   if (file_header_.e_phoff > file_size_) {
337*ec63e07aSXin Li     return absl::FailedPreconditionError(
338*ec63e07aSXin Li         absl::StrCat("invalid program header offset: ", file_header_.e_phoff));
339*ec63e07aSXin Li   }
340*ec63e07aSXin Li   if (file_header_.e_phentsize != sizeof(ElfPhdr)) {
341*ec63e07aSXin Li     return absl::FailedPreconditionError(absl::StrCat(
342*ec63e07aSXin Li         "section header entry size incorrect: ", file_header_.e_phentsize,
343*ec63e07aSXin Li         " bytes, ", sizeof(ElfPhdr), " expected."));
344*ec63e07aSXin Li   }
345*ec63e07aSXin Li   if (file_header_.e_phnum > kMaxProgramHeaderEntries) {
346*ec63e07aSXin Li     return absl::FailedPreconditionError(
347*ec63e07aSXin Li         absl::StrCat("too many program header entries: ", file_header_.e_phnum,
348*ec63e07aSXin Li                      " limit: ", kMaxProgramHeaderEntries));
349*ec63e07aSXin Li   }
350*ec63e07aSXin Li   std::string headers(file_header_.e_phentsize * file_header_.e_phnum, '\0');
351*ec63e07aSXin Li   SAPI_RETURN_IF_ERROR(CheckedFSeek(elf_, file_header_.e_phoff, SEEK_SET));
352*ec63e07aSXin Li   SAPI_RETURN_IF_ERROR(CheckedRead(&headers, elf_));
353*ec63e07aSXin Li   program_headers_.resize(file_header_.e_phnum);
354*ec63e07aSXin Li   absl::string_view src = headers;
355*ec63e07aSXin Li   for (int i = 0; i < file_header_.e_phnum; ++i) {
356*ec63e07aSXin Li     SAPI_ASSIGN_OR_RETURN(program_headers_[i], ReadProgramHeader(src));
357*ec63e07aSXin Li     src = src.substr(file_header_.e_phentsize);
358*ec63e07aSXin Li   }
359*ec63e07aSXin Li   return absl::OkStatus();
360*ec63e07aSXin Li }
361*ec63e07aSXin Li 
ReadSymbolsFromSymtab(const ElfShdr & symtab)362*ec63e07aSXin Li absl::Status ElfParser::ReadSymbolsFromSymtab(const ElfShdr& symtab) {
363*ec63e07aSXin Li   if (symtab.sh_type != SHT_SYMTAB) {
364*ec63e07aSXin Li     return absl::FailedPreconditionError("invalid symtab type");
365*ec63e07aSXin Li   }
366*ec63e07aSXin Li   if (symtab.sh_entsize != sizeof(ElfSym)) {
367*ec63e07aSXin Li     return absl::InternalError(
368*ec63e07aSXin Li         absl::StrCat("invalid symbol entry size: ", symtab.sh_entsize));
369*ec63e07aSXin Li   }
370*ec63e07aSXin Li   if ((symtab.sh_size % symtab.sh_entsize) != 0) {
371*ec63e07aSXin Li     return absl::InternalError(
372*ec63e07aSXin Li         absl::StrCat("invalid symbol table size: ", symtab.sh_size));
373*ec63e07aSXin Li   }
374*ec63e07aSXin Li   size_t symbol_entries = symtab.sh_size / symtab.sh_entsize;
375*ec63e07aSXin Li   if (symbol_entries > kMaxSymbolEntries - symbol_entries_read) {
376*ec63e07aSXin Li     return absl::InternalError(
377*ec63e07aSXin Li         absl::StrCat("too many symbols: ", symbol_entries));
378*ec63e07aSXin Li   }
379*ec63e07aSXin Li   symbol_entries_read += symbol_entries;
380*ec63e07aSXin Li   if (symtab.sh_link >= section_headers_.size()) {
381*ec63e07aSXin Li     return absl::InternalError(
382*ec63e07aSXin Li         absl::StrCat("invalid symtab's strtab reference: ", symtab.sh_link));
383*ec63e07aSXin Li   }
384*ec63e07aSXin Li   SAPI_RAW_VLOG(1, "Symbol table with %zu entries found", symbol_entries);
385*ec63e07aSXin Li   SAPI_ASSIGN_OR_RETURN(std::string strtab,
386*ec63e07aSXin Li                         ReadSectionContents(symtab.sh_link));
387*ec63e07aSXin Li   SAPI_ASSIGN_OR_RETURN(std::string symbols, ReadSectionContents(symtab));
388*ec63e07aSXin Li   result_.symbols_.reserve(result_.symbols_.size() + symbol_entries);
389*ec63e07aSXin Li   for (absl::string_view src = symbols; !src.empty();
390*ec63e07aSXin Li        src = src.substr(symtab.sh_entsize)) {
391*ec63e07aSXin Li     ElfSym symbol;
392*ec63e07aSXin Li     LOAD_MEMBER(symbol, st_name, src.data());
393*ec63e07aSXin Li     LOAD_MEMBER(symbol, st_info, src.data());
394*ec63e07aSXin Li     LOAD_MEMBER(symbol, st_other, src.data());
395*ec63e07aSXin Li     LOAD_MEMBER(symbol, st_shndx, src.data());
396*ec63e07aSXin Li     LOAD_MEMBER(symbol, st_value, src.data());
397*ec63e07aSXin Li     LOAD_MEMBER(symbol, st_size, src.data());
398*ec63e07aSXin Li     if (symbol.st_shndx == SHN_UNDEF) {
399*ec63e07aSXin Li       // External symbol, not supported.
400*ec63e07aSXin Li       continue;
401*ec63e07aSXin Li     }
402*ec63e07aSXin Li     if (symbol.st_shndx == SHN_ABS) {
403*ec63e07aSXin Li       // Absolute value, not supported.
404*ec63e07aSXin Li       continue;
405*ec63e07aSXin Li     }
406*ec63e07aSXin Li     if (symbol.st_shndx >= section_headers_.size()) {
407*ec63e07aSXin Li       return absl::FailedPreconditionError(absl::StrCat(
408*ec63e07aSXin Li           "invalid symbol data: section index: ", symbol.st_shndx));
409*ec63e07aSXin Li     }
410*ec63e07aSXin Li     if (symbol.st_name >= strtab.size()) {
411*ec63e07aSXin Li       return absl::FailedPreconditionError(
412*ec63e07aSXin Li           absl::StrCat("invalid name reference: REL", symbol.st_value));
413*ec63e07aSXin Li     }
414*ec63e07aSXin Li     result_.symbols_.push_back(
415*ec63e07aSXin Li         {symbol.st_value, std::string(ReadName(symbol.st_name, strtab))});
416*ec63e07aSXin Li   }
417*ec63e07aSXin Li   return absl::OkStatus();
418*ec63e07aSXin Li }
419*ec63e07aSXin Li 
ReadImportedLibrariesFromDynamic(const ElfShdr & dynamic)420*ec63e07aSXin Li absl::Status ElfParser::ReadImportedLibrariesFromDynamic(
421*ec63e07aSXin Li     const ElfShdr& dynamic) {
422*ec63e07aSXin Li   if (dynamic.sh_type != SHT_DYNAMIC) {
423*ec63e07aSXin Li     return absl::FailedPreconditionError("invalid dynamic type");
424*ec63e07aSXin Li   }
425*ec63e07aSXin Li   if (dynamic.sh_entsize != sizeof(ElfDyn)) {
426*ec63e07aSXin Li     return absl::InternalError(
427*ec63e07aSXin Li         absl::StrCat("invalid dynamic entry size: ", dynamic.sh_entsize));
428*ec63e07aSXin Li   }
429*ec63e07aSXin Li   if ((dynamic.sh_size % dynamic.sh_entsize) != 0) {
430*ec63e07aSXin Li     return absl::InternalError(
431*ec63e07aSXin Li         absl::StrCat("invalid dynamic table size: ", dynamic.sh_size));
432*ec63e07aSXin Li   }
433*ec63e07aSXin Li   size_t entries = dynamic.sh_size / dynamic.sh_entsize;
434*ec63e07aSXin Li   if (entries > kMaxDynamicEntries - dynamic_entries_read) {
435*ec63e07aSXin Li     return absl::InternalError(
436*ec63e07aSXin Li         absl::StrCat("too many dynamic entries: ", entries));
437*ec63e07aSXin Li   }
438*ec63e07aSXin Li   dynamic_entries_read += entries;
439*ec63e07aSXin Li   if (dynamic.sh_link >= section_headers_.size()) {
440*ec63e07aSXin Li     return absl::InternalError(
441*ec63e07aSXin Li         absl::StrCat("invalid dynamic's strtab reference: ", dynamic.sh_link));
442*ec63e07aSXin Li   }
443*ec63e07aSXin Li   SAPI_RAW_VLOG(1, "Dynamic section with %zu entries found", entries);
444*ec63e07aSXin Li   // strtab may be shared with symbols and therefore huge
445*ec63e07aSXin Li   const auto& strtab_section = section_headers_.at(dynamic.sh_link);
446*ec63e07aSXin Li   if (strtab_section.sh_offset > file_size_) {
447*ec63e07aSXin Li     return absl::FailedPreconditionError(absl::StrCat(
448*ec63e07aSXin Li         "invalid symtab's strtab section offset: ", strtab_section.sh_offset));
449*ec63e07aSXin Li   }
450*ec63e07aSXin Li   if (strtab_section.sh_size >= kMaxStrtabSize ||
451*ec63e07aSXin Li       strtab_section.sh_size >= file_size_ ||
452*ec63e07aSXin Li       strtab_section.sh_offset >= file_size_ - strtab_section.sh_size) {
453*ec63e07aSXin Li     return absl::FailedPreconditionError(
454*ec63e07aSXin Li         absl::StrCat("symtab's strtab too big: ", strtab_section.sh_size));
455*ec63e07aSXin Li   }
456*ec63e07aSXin Li   auto strtab_end = strtab_section.sh_offset + strtab_section.sh_size;
457*ec63e07aSXin Li   SAPI_ASSIGN_OR_RETURN(std::string dynamic_entries,
458*ec63e07aSXin Li                         ReadSectionContents(dynamic));
459*ec63e07aSXin Li   for (absl::string_view src = dynamic_entries; !src.empty();
460*ec63e07aSXin Li        src = src.substr(dynamic.sh_entsize)) {
461*ec63e07aSXin Li     ElfDyn dyn;
462*ec63e07aSXin Li     LOAD_MEMBER(dyn, d_tag, src.data());
463*ec63e07aSXin Li     LOAD_MEMBER(dyn, d_un.d_val, src.data());
464*ec63e07aSXin Li     if (dyn.d_tag != DT_NEEDED) {
465*ec63e07aSXin Li       continue;
466*ec63e07aSXin Li     }
467*ec63e07aSXin Li     if (dyn.d_un.d_val >= strtab_section.sh_size) {
468*ec63e07aSXin Li       return absl::FailedPreconditionError(
469*ec63e07aSXin Li           absl::StrCat("invalid name reference"));
470*ec63e07aSXin Li     }
471*ec63e07aSXin Li     auto offset = strtab_section.sh_offset + dyn.d_un.d_val;
472*ec63e07aSXin Li     SAPI_RETURN_IF_ERROR(CheckedFSeek(elf_, offset, SEEK_SET));
473*ec63e07aSXin Li     std::string path(
474*ec63e07aSXin Li         std::min(kMaxLibPathSize, static_cast<size_t>(strtab_end - offset)),
475*ec63e07aSXin Li         '\0');
476*ec63e07aSXin Li     size_t size = std::fread(&path[0], 1, path.size(), elf_);
477*ec63e07aSXin Li     path.resize(size);
478*ec63e07aSXin Li     result_.imported_libraries_.push_back(path.substr(0, path.find('\0')));
479*ec63e07aSXin Li   }
480*ec63e07aSXin Li   return absl::OkStatus();
481*ec63e07aSXin Li }
482*ec63e07aSXin Li 
Parse(const std::string & filename,uint32_t features)483*ec63e07aSXin Li absl::StatusOr<ElfFile> ElfParser::Parse(const std::string& filename,
484*ec63e07aSXin Li                                          uint32_t features) {
485*ec63e07aSXin Li   ElfParser parser;
486*ec63e07aSXin Li   if (parser.elf_ = std::fopen(filename.c_str(), "r"); !parser.elf_) {
487*ec63e07aSXin Li     return absl::ErrnoToStatus(errno,
488*ec63e07aSXin Li                                absl::StrCat("cannot open file: ", filename));
489*ec63e07aSXin Li   }
490*ec63e07aSXin Li 
491*ec63e07aSXin Li   // Basic sanity check.
492*ec63e07aSXin Li   if (features & ~(ElfFile::kAll)) {
493*ec63e07aSXin Li     return absl::InvalidArgumentError("Unknown feature flags specified");
494*ec63e07aSXin Li   }
495*ec63e07aSXin Li   SAPI_RETURN_IF_ERROR(parser.ReadFileSize());
496*ec63e07aSXin Li   SAPI_RETURN_IF_ERROR(parser.ReadFileHeader());
497*ec63e07aSXin Li   switch (parser.file_header_.e_type) {
498*ec63e07aSXin Li     case ET_EXEC:
499*ec63e07aSXin Li       parser.result_.position_independent_ = false;
500*ec63e07aSXin Li       break;
501*ec63e07aSXin Li     case ET_DYN:
502*ec63e07aSXin Li       parser.result_.position_independent_ = true;
503*ec63e07aSXin Li       break;
504*ec63e07aSXin Li     default:
505*ec63e07aSXin Li       return absl::FailedPreconditionError("not an executable: ");
506*ec63e07aSXin Li   }
507*ec63e07aSXin Li   if (features & ElfFile::kGetInterpreter) {
508*ec63e07aSXin Li     SAPI_RETURN_IF_ERROR(parser.ReadProgramHeaders());
509*ec63e07aSXin Li     std::string interpreter;
510*ec63e07aSXin Li     auto it = std::find_if(
511*ec63e07aSXin Li         parser.program_headers_.begin(), parser.program_headers_.end(),
512*ec63e07aSXin Li         [](const ElfPhdr& hdr) { return hdr.p_type == PT_INTERP; });
513*ec63e07aSXin Li     // No interpreter usually means that the executable was statically linked.
514*ec63e07aSXin Li     if (it != parser.program_headers_.end()) {
515*ec63e07aSXin Li       if (it->p_filesz > kMaxInterpreterSize) {
516*ec63e07aSXin Li         return absl::FailedPreconditionError(
517*ec63e07aSXin Li             absl::StrCat("program interpeter path too long: ", it->p_filesz));
518*ec63e07aSXin Li       }
519*ec63e07aSXin Li       SAPI_RETURN_IF_ERROR(CheckedFSeek(parser.elf_, it->p_offset, SEEK_SET));
520*ec63e07aSXin Li       interpreter.resize(it->p_filesz, '\0');
521*ec63e07aSXin Li       SAPI_RETURN_IF_ERROR(CheckedRead(&interpreter, parser.elf_));
522*ec63e07aSXin Li       auto first_nul = interpreter.find_first_of('\0');
523*ec63e07aSXin Li       if (first_nul != std::string::npos) {
524*ec63e07aSXin Li         interpreter.erase(first_nul);
525*ec63e07aSXin Li       }
526*ec63e07aSXin Li     }
527*ec63e07aSXin Li     parser.result_.interpreter_ = std::move(interpreter);
528*ec63e07aSXin Li   }
529*ec63e07aSXin Li 
530*ec63e07aSXin Li   if (features & (ElfFile::kLoadSymbols | ElfFile::kLoadImportedLibraries)) {
531*ec63e07aSXin Li     SAPI_RETURN_IF_ERROR(parser.ReadSectionHeaders());
532*ec63e07aSXin Li     for (const auto& hdr : parser.section_headers_) {
533*ec63e07aSXin Li       if (hdr.sh_type == SHT_SYMTAB && features & ElfFile::kLoadSymbols) {
534*ec63e07aSXin Li         SAPI_RETURN_IF_ERROR(parser.ReadSymbolsFromSymtab(hdr));
535*ec63e07aSXin Li       }
536*ec63e07aSXin Li       if (hdr.sh_type == SHT_DYNAMIC &&
537*ec63e07aSXin Li           features & ElfFile::kLoadImportedLibraries) {
538*ec63e07aSXin Li         SAPI_RETURN_IF_ERROR(parser.ReadImportedLibrariesFromDynamic(hdr));
539*ec63e07aSXin Li       }
540*ec63e07aSXin Li     }
541*ec63e07aSXin Li   }
542*ec63e07aSXin Li 
543*ec63e07aSXin Li   return std::move(parser.result_);
544*ec63e07aSXin Li }
545*ec63e07aSXin Li 
ParseFromFile(const std::string & filename,uint32_t features)546*ec63e07aSXin Li absl::StatusOr<ElfFile> ElfFile::ParseFromFile(const std::string& filename,
547*ec63e07aSXin Li                                                uint32_t features) {
548*ec63e07aSXin Li   return ElfParser::Parse(filename, features);
549*ec63e07aSXin Li }
550*ec63e07aSXin Li 
551*ec63e07aSXin Li }  // namespace sandbox2
552