1#!/usr/bin/env python3 2# Copyright 2020 The Pigweed Authors 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); you may not 5# use this file except in compliance with the License. You may obtain a copy of 6# the License at 7# 8# https://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13# License for the specific language governing permissions and limitations under 14# the License. 15"""Reads data from ELF sections. 16 17This module provides tools for dumping the contents of an ELF section. It can 18also be used to read values at a particular address. A command line interface 19for both of these features is provided. 20 21This module supports any ELF-format file, including .o and .so files. This 22module also has basic support for archive (.a) files. All ELF files in an 23archive are read as one unit. 24""" 25 26from __future__ import annotations 27 28import argparse 29import collections 30from pathlib import Path 31import re 32import struct 33import sys 34from typing import ( 35 BinaryIO, 36 Iterable, 37 Mapping, 38 NamedTuple, 39 Pattern, 40) 41 42ARCHIVE_MAGIC = b'!<arch>\n' 43ELF_MAGIC = b'\x7fELF' 44 45 46def _check_next_bytes(fd: BinaryIO, expected: bytes, what: str) -> None: 47 actual = fd.read(len(expected)) 48 if expected != actual: 49 raise FileDecodeError( 50 f'Invalid {what}: expected {expected!r}, found {actual!r} in file ' 51 f'{getattr(fd, "name", "(unknown")}' 52 ) 53 54 55def files_in_archive(fd: BinaryIO) -> Iterable[int]: 56 """Seeks to each file in an archive and yields its size.""" 57 58 _check_next_bytes(fd, ARCHIVE_MAGIC, 'archive magic number') 59 60 while True: 61 # In some archives, the first file ends with an additional \n. If that 62 # is present, skip it. 63 if fd.read(1) != b'\n': 64 fd.seek(-1, 1) 65 66 # Each file in an archive is prefixed with an ASCII header: 67 # 68 # 16 B - file identifier (text) 69 # 12 B - file modification timestamp (decimal) 70 # 6 B - owner ID (decimal) 71 # 6 B - group ID (decimal) 72 # 8 B - file mode (octal) 73 # 10 B - file size in bytes (decimal) 74 # 2 B - ending characters (`\n) 75 # 76 # Skip the unused portions of the file header, then read the size. 77 fd.seek(16 + 12 + 6 + 6 + 8, 1) 78 size_str = fd.read(10) 79 if not size_str: 80 return 81 82 try: 83 size = int(size_str, 10) 84 except ValueError as exc: 85 raise FileDecodeError( 86 'Archive file sizes must be decimal integers' 87 ) from exc 88 89 _check_next_bytes(fd, b'`\n', 'archive file header ending') 90 offset = fd.tell() # Store offset in case the caller reads the file. 91 92 yield size 93 94 fd.seek(offset + size) 95 96 97def _elf_files_in_archive(fd: BinaryIO): 98 if _bytes_match(fd, ELF_MAGIC): 99 yield # The value isn't used, so just yield None. 100 else: 101 for _ in files_in_archive(fd): 102 if _bytes_match(fd, ELF_MAGIC): 103 yield 104 105 106class Field(NamedTuple): 107 """A field in an ELF file. 108 109 Fields refer to a particular piece of data in an ELF file or section header. 110 """ 111 112 name: str 113 offset_32: int 114 offset_64: int 115 size_32: int 116 size_64: int 117 118 119class _FileHeader(NamedTuple): 120 """Fields in the ELF file header.""" 121 122 section_header_offset: Field = Field('e_shoff', 0x20, 0x28, 4, 8) 123 section_count: Field = Field('e_shnum', 0x30, 0x3C, 2, 2) 124 section_names_index: Field = Field('e_shstrndx', 0x32, 0x3E, 2, 2) 125 126 127FILE_HEADER = _FileHeader() 128 129 130class _SectionHeader(NamedTuple): 131 """Fields in an ELF section header.""" 132 133 section_name_offset: Field = Field('sh_name', 0x00, 0x00, 4, 4) 134 section_address: Field = Field('sh_addr', 0x0C, 0x10, 4, 8) 135 section_offset: Field = Field('sh_offset', 0x10, 0x18, 4, 8) 136 section_size: Field = Field('sh_size', 0x14, 0x20, 4, 8) 137 138 # section_header_end records the size of the header. 139 section_header_end: Field = Field('section end', 0x28, 0x40, 0, 0) 140 141 142SECTION_HEADER = _SectionHeader() 143 144 145def read_c_string(fd: BinaryIO) -> bytes: 146 """Reads a null-terminated string from the provided file descriptor.""" 147 string = bytearray() 148 while True: 149 byte = fd.read(1) 150 if not byte or byte == b'\0': 151 return bytes(string) 152 string += byte 153 154 155def _bytes_match(fd: BinaryIO, expected: bytes) -> bool: 156 """Peeks at the next bytes to see if they match the expected.""" 157 try: 158 offset = fd.tell() 159 data = fd.read(len(expected)) 160 fd.seek(offset) 161 return data == expected 162 except IOError: 163 return False 164 165 166def compatible_file(file: BinaryIO | str | Path) -> bool: 167 """True if the file type is supported (ELF or archive).""" 168 try: 169 fd = open(file, 'rb') if isinstance(file, (str, Path)) else file 170 171 offset = fd.tell() 172 fd.seek(0) 173 result = _bytes_match(fd, ELF_MAGIC) or _bytes_match(fd, ARCHIVE_MAGIC) 174 fd.seek(offset) 175 finally: 176 if isinstance(file, (str, Path)): 177 fd.close() 178 179 return result 180 181 182class FileDecodeError(Exception): 183 """Invalid data was read from an ELF file.""" 184 185 186class FieldReader: 187 """Reads ELF fields defined with a Field tuple from an ELF file.""" 188 189 def __init__(self, elf: BinaryIO): 190 self._elf = elf 191 self.file_offset = self._elf.tell() 192 193 _check_next_bytes(self._elf, ELF_MAGIC, 'ELF file header') 194 size_field = self._elf.read(1) # e_ident[EI_CLASS] (address size) 195 196 int_unpacker = self._determine_integer_format() 197 198 if size_field == b'\x01': 199 self.offset = lambda field: field.offset_32 200 self._size = lambda field: field.size_32 201 self._decode = lambda f, d: int_unpacker[f.size_32].unpack(d)[0] 202 elif size_field == b'\x02': 203 self.offset = lambda field: field.offset_64 204 self._size = lambda field: field.size_64 205 self._decode = lambda f, d: int_unpacker[f.size_64].unpack(d)[0] 206 else: 207 raise FileDecodeError('Unknown size {!r}'.format(size_field)) 208 209 def _determine_integer_format(self) -> Mapping[int, struct.Struct]: 210 """Returns a dict of structs used for converting bytes to integers.""" 211 endianness_byte = self._elf.read(1) # e_ident[EI_DATA] (endianness) 212 if endianness_byte == b'\x01': 213 endianness = '<' 214 elif endianness_byte == b'\x02': 215 endianness = '>' 216 else: 217 raise FileDecodeError( 218 'Unknown endianness {!r}'.format(endianness_byte) 219 ) 220 221 return { 222 1: struct.Struct(endianness + 'B'), 223 2: struct.Struct(endianness + 'H'), 224 4: struct.Struct(endianness + 'I'), 225 8: struct.Struct(endianness + 'Q'), 226 } 227 228 def read(self, field: Field, base: int = 0) -> int: 229 self._elf.seek(self.file_offset + base + self.offset(field)) 230 data = self._elf.read(self._size(field)) 231 return self._decode(field, data) 232 233 def read_string(self, offset: int) -> str: 234 self._elf.seek(self.file_offset + offset) 235 return read_c_string(self._elf).decode() 236 237 238class Elf: 239 """Represents an ELF file and the sections in it.""" 240 241 class Section(NamedTuple): 242 """Info about a section in an ELF file.""" 243 244 name: str 245 address: int 246 offset: int 247 size: int 248 249 file_offset: int # Starting place in the file; 0 unless in an archive. 250 251 def range(self) -> range: 252 return range(self.address, self.address + self.size) 253 254 def __lt__(self, other) -> bool: 255 return self.address < other.address 256 257 def __init__(self, elf: BinaryIO): 258 self._elf = elf 259 self.sections: tuple[Elf.Section, ...] = tuple(self._list_sections()) 260 261 def _list_sections(self) -> Iterable[Elf.Section]: 262 """Reads the section headers to enumerate all ELF sections.""" 263 for _ in _elf_files_in_archive(self._elf): 264 reader = FieldReader(self._elf) 265 base = reader.read(FILE_HEADER.section_header_offset) 266 section_header_size = reader.offset( 267 SECTION_HEADER.section_header_end 268 ) 269 270 # Find the section with the section names in it. 271 names_section_header_base = ( 272 base 273 + section_header_size 274 * reader.read(FILE_HEADER.section_names_index) 275 ) 276 names_table_base = reader.read( 277 SECTION_HEADER.section_offset, names_section_header_base 278 ) 279 280 base = reader.read(FILE_HEADER.section_header_offset) 281 for _ in range(reader.read(FILE_HEADER.section_count)): 282 name_offset = reader.read( 283 SECTION_HEADER.section_name_offset, base 284 ) 285 286 yield self.Section( 287 reader.read_string(names_table_base + name_offset), 288 reader.read(SECTION_HEADER.section_address, base), 289 reader.read(SECTION_HEADER.section_offset, base), 290 reader.read(SECTION_HEADER.section_size, base), 291 reader.file_offset, 292 ) 293 294 base += section_header_size 295 296 def section_by_address(self, address: int) -> Elf.Section | None: 297 """Returns the section that contains the provided address, if any.""" 298 # Iterate in reverse to give priority to sections with nonzero addresses 299 for section in sorted(self.sections, reverse=True): 300 if address in section.range(): 301 return section 302 303 return None 304 305 def sections_with_name(self, name: str) -> Iterable[Elf.Section]: 306 for section in self.sections: 307 if section.name == name: 308 yield section 309 310 def read_value( 311 self, address: int, size: int | None = None 312 ) -> None | bytes | int: 313 """Reads specified bytes or null-terminated string at address.""" 314 section = self.section_by_address(address) 315 if not section: 316 return None 317 318 assert section.address <= address 319 self._elf.seek( 320 section.file_offset + section.offset + address - section.address 321 ) 322 323 if size is None: 324 return read_c_string(self._elf) 325 326 return self._elf.read(size) 327 328 def dump_sections(self, name: str | Pattern[str]) -> Mapping[str, bytes]: 329 """Returns a mapping of section names to section contents. 330 331 If processing an archive with multiple object files, the contents of 332 sections with duplicate names are concatenated in the order they appear 333 in the archive. 334 """ 335 name_regex = re.compile(name) 336 337 sections: Mapping[str, bytearray] = collections.defaultdict(bytearray) 338 for section in self.sections: 339 if name_regex.match(section.name): 340 self._elf.seek(section.file_offset + section.offset) 341 sections[section.name].extend(self._elf.read(section.size)) 342 343 return sections 344 345 def dump_section_contents(self, name: str | Pattern[str]) -> bytes | None: 346 """Dumps a binary string containing the sections matching the regex. 347 348 If processing an archive with multiple object files, the contents of 349 sections with duplicate names are concatenated in the order they appear 350 in the archive. 351 """ 352 sections = self.dump_sections(name) 353 return b''.join(sections.values()) if sections else None 354 355 def summary(self) -> str: 356 return '\n'.join( 357 '[{0:2}] {1.address:08x} {1.offset:08x} {1.size:08x} ' 358 '{1.name}'.format(i, section) 359 for i, section in enumerate(self.sections) 360 ) 361 362 def __str__(self) -> str: 363 return 'Elf({}\n)'.format( 364 ''.join('\n {},'.format(s) for s in self.sections) 365 ) 366 367 368def _read_addresses(elf, size: int, output, address: Iterable[int]) -> None: 369 for addr in address: 370 value = elf.read_value(addr, size) 371 372 if value is None: 373 raise ValueError('Invalid address 0x{:08x}'.format(addr)) 374 375 output(value) 376 377 378def _dump_sections(elf: Elf, output, sections: Iterable[Pattern[str]]) -> None: 379 if not sections: 380 output(elf.summary().encode()) 381 return 382 383 for section_pattern in sections: 384 output(elf.dump_section_contents(section_pattern)) 385 386 387def _parse_args() -> argparse.Namespace: 388 """Parses and returns command line arguments.""" 389 parser = argparse.ArgumentParser(description=__doc__) 390 391 def hex_int(arg): 392 return int(arg, 16) 393 394 parser.add_argument( 395 '-e', 396 '--elf', 397 type=argparse.FileType('rb'), 398 help='the ELF file to examine', 399 required=True, 400 ) 401 402 parser.add_argument( 403 '-d', 404 '--delimiter', 405 default=ord('\n'), 406 type=int, 407 help=r'delimiter to write after each value; \n by default', 408 ) 409 410 parser.set_defaults(handler=lambda **_: parser.print_help()) 411 412 subparsers = parser.add_subparsers( 413 help='select whether to work with addresses or whole sections' 414 ) 415 416 section_parser = subparsers.add_parser('section') 417 section_parser.set_defaults(handler=_dump_sections) 418 section_parser.add_argument( 419 'sections', 420 metavar='section_regex', 421 nargs='*', 422 type=re.compile, # type: ignore 423 help='section name regular expression', 424 ) 425 426 address_parser = subparsers.add_parser('address') 427 address_parser.set_defaults(handler=_read_addresses) 428 address_parser.add_argument( 429 '--size', 430 type=int, 431 help='the size to read; reads until a null terminator by default', 432 ) 433 address_parser.add_argument( 434 'address', nargs='+', type=hex_int, help='hexadecimal addresses to read' 435 ) 436 437 return parser.parse_args() 438 439 440def _main(args): 441 """Calls the appropriate handler for the command line options.""" 442 handler = args.handler 443 del args.handler 444 445 delim = args.delimiter 446 del args.delimiter 447 448 def output(value): 449 if value is not None: 450 sys.stdout.buffer.write(value) 451 sys.stdout.buffer.write(bytearray([delim])) 452 sys.stdout.flush() 453 454 args.output = output 455 args.elf = Elf(args.elf) 456 457 handler(**vars(args)) 458 459 460if __name__ == '__main__': 461 _main(_parse_args()) 462