xref: /aosp_15_r20/external/pigweed/pw_tokenizer/py/elf_reader_test.py (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1#!/usr/bin/env python3
2# Copyright 2020 The Pigweed Authors
3#
4# Licensed under the Apache License, Version 2.0 (the "License"); you may not
5# use this file except in compliance with the License. You may obtain a copy of
6# the License at
7#
8#     https://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13# License for the specific language governing permissions and limitations under
14# the License.
15"""Tests the ELF reader Python module."""
16
17import io
18import os
19import re
20import unittest
21
22from pw_tokenizer import elf_reader
23
24# Output from the following command:
25#
26#   readelf -WS elf_reader_test_binary.elf
27#
28TEST_READELF_OUTPUT = """
29There are 33 section headers, starting at offset 0x1758:
30
31Section Headers:
32  [Nr] Name              Type            Address          Off    Size   ES Flg Lk Inf Al
33  [ 0]                   NULL            0000000000000000 000000 000000 00      0   0  0
34  [ 1] .interp           PROGBITS        0000000000000238 000238 00001c 00   A  0   0  1
35  [ 2] .note.ABI-tag     NOTE            0000000000000254 000254 000020 00   A  0   0  4
36  [ 3] .note.gnu.build-id NOTE            0000000000000274 000274 000024 00   A  0   0  4
37  [ 4] .dynsym           DYNSYM          0000000000000298 000298 0000a8 18   A  5   1  8
38  [ 5] .dynstr           STRTAB          0000000000000340 000340 00009b 00   A  0   0  1
39  [ 6] .gnu.hash         GNU_HASH        00000000000003e0 0003e0 00001c 00   A  4   0  8
40  [ 7] .gnu.version      VERSYM          00000000000003fc 0003fc 00000e 02   A  4   0  2
41  [ 8] .gnu.version_r    VERNEED         000000000000040c 00040c 000020 00   A  5   1  4
42  [ 9] .rela.dyn         RELA            0000000000000430 000430 0000d8 18   A  4   0  8
43  [10] .rela.plt         RELA            0000000000000508 000508 000018 18  AI  4  12  8
44  [11] .init             PROGBITS        0000000000000520 000520 000017 00  AX  0   0  4
45  [12] .plt              PROGBITS        0000000000000540 000540 000020 10  AX  0   0 16
46  [13] .text             PROGBITS        0000000000000560 000560 000151 00  AX  0   0 16
47  [14] .fini             PROGBITS        00000000000006b4 0006b4 000009 00  AX  0   0  4
48  [15] .rodata           PROGBITS        00000000000006c0 0006c0 000004 04  AM  0   0  4
49  [16] .test_section_1   PROGBITS        00000000000006d0 0006d0 000010 00   A  0   0 16
50  [17] .test_section_2   PROGBITS        00000000000006e0 0006e0 000004 00   A  0   0  4
51  [18] .eh_frame         X86_64_UNWIND   00000000000006e8 0006e8 0000d4 00   A  0   0  8
52  [19] .eh_frame_hdr     X86_64_UNWIND   00000000000007bc 0007bc 00002c 00   A  0   0  4
53  [20] .fini_array       FINI_ARRAY      0000000000001d80 000d80 000008 08  WA  0   0  8
54  [21] .init_array       INIT_ARRAY      0000000000001d88 000d88 000008 08  WA  0   0  8
55  [22] .dynamic          DYNAMIC         0000000000001d90 000d90 000220 10  WA  5   0  8
56  [23] .got              PROGBITS        0000000000001fb0 000fb0 000030 00  WA  0   0  8
57  [24] .got.plt          PROGBITS        0000000000001fe0 000fe0 000020 00  WA  0   0  8
58  [25] .data             PROGBITS        0000000000002000 001000 000010 00  WA  0   0  8
59  [26] .tm_clone_table   PROGBITS        0000000000002010 001010 000000 00  WA  0   0  8
60  [27] .bss              NOBITS          0000000000002010 001010 000001 00  WA  0   0  1
61  [28] .comment          PROGBITS        0000000000000000 001010 00001d 01  MS  0   0  1
62  [29] .note.gnu.gold-version NOTE            0000000000000000 001030 00001c 00      0   0  4
63  [30] .symtab           SYMTAB          0000000000000000 001050 000390 18     31  21  8
64  [31] .strtab           STRTAB          0000000000000000 0013e0 000227 00      0   0  1
65  [32] .shstrtab         STRTAB          0000000000000000 001607 00014a 00      0   0  1
66Key to Flags:
67  W (write), A (alloc), X (execute), M (merge), S (strings), I (info),
68  L (link order), O (extra OS processing required), G (group), T (TLS),
69  C (compressed), x (unknown), o (OS specific), E (exclude),
70  l (large), p (processor specific)
71"""
72
73TEST_ELF_PATH = os.path.join(
74    os.path.dirname(__file__), 'elf_reader_test_binary.elf'
75)
76
77
78class ElfReaderTest(unittest.TestCase):
79    """Tests the elf_reader.Elf class."""
80
81    def setUp(self) -> None:
82        super().setUp()
83        self._elf_file = open(TEST_ELF_PATH, 'rb')
84        self._elf = elf_reader.Elf(self._elf_file)
85
86    def tearDown(self) -> None:
87        super().tearDown()
88        self._elf_file.close()
89
90    def _section(self, name) -> elf_reader.Elf.Section:
91        return next(iter(self._elf.sections_with_name(name)))
92
93    def test_readelf_comparison_using_the_readelf_binary(self) -> None:
94        """Compares elf_reader to readelf's output."""
95
96        parse_readelf_output = re.compile(
97            r'\s+'
98            r'\[\s*(?P<number>\d+)\]\s+'
99            r'(?P<name>\.\S*)?\s+'
100            r'(?P<type>\S+)\s+'
101            r'(?P<addr>[0-9a-fA-F]+)\s+'
102            r'(?P<offset>[0-9a-fA-F]+)\s+'
103            r'(?P<size>[0-9a-fA-F]+)\s+'
104        )
105
106        readelf_sections = []
107        for number, name, _, addr, offset, size in parse_readelf_output.findall(
108            TEST_READELF_OUTPUT
109        ):
110            readelf_sections.append(
111                (
112                    int(number),
113                    name or '',
114                    int(addr, 16),
115                    int(offset, 16),
116                    int(size, 16),
117                )
118            )
119
120        self.assertEqual(len(readelf_sections), 33)
121        self.assertEqual(len(readelf_sections), len(self._elf.sections))
122
123        for (index, section), readelf_section in zip(
124            enumerate(self._elf.sections), readelf_sections
125        ):
126            readelf_index, name, address, offset, size = readelf_section
127
128            self.assertEqual(index, readelf_index)
129            self.assertEqual(section.name, name)
130            self.assertEqual(section.address, address)
131            self.assertEqual(section.offset, offset)
132            self.assertEqual(section.size, size)
133
134    def test_dump_single_section(self) -> None:
135        self.assertEqual(
136            self._elf.dump_section_contents(r'\.test_section_1'),
137            b'You cannot pass\0',
138        )
139        self.assertEqual(
140            self._elf.dump_section_contents(r'\.test_section_2'),
141            b'\xef\xbe\xed\xfe',
142        )
143
144    def test_dump_multiple_sections(self) -> None:
145        if (
146            self._section('.test_section_1').address
147            < self._section('.test_section_2').address
148        ):
149            contents = b'You cannot pass\0\xef\xbe\xed\xfe'
150        else:
151            contents = b'\xef\xbe\xed\xfeYou cannot pass\0'
152
153        self.assertIn(
154            self._elf.dump_section_contents(r'.test_section_\d'), contents
155        )
156
157    def test_read_values(self) -> None:
158        address = self._section('.test_section_1').address
159        self.assertEqual(self._elf.read_value(address), b'You cannot pass')
160
161        int32_address = self._section('.test_section_2').address
162        self.assertEqual(
163            self._elf.read_value(int32_address, 4), b'\xef\xbe\xed\xfe'
164        )
165
166    def test_read_string(self) -> None:
167        bytes_io = io.BytesIO(
168            b'This is a null-terminated string\0No terminator!'
169        )
170        self.assertEqual(
171            elf_reader.read_c_string(bytes_io),
172            b'This is a null-terminated string',
173        )
174        self.assertEqual(elf_reader.read_c_string(bytes_io), b'No terminator!')
175        self.assertEqual(elf_reader.read_c_string(bytes_io), b'')
176
177    def test_compatible_file_for_elf(self) -> None:
178        self.assertTrue(elf_reader.compatible_file(self._elf_file))
179        self.assertTrue(elf_reader.compatible_file(io.BytesIO(b'\x7fELF')))
180
181    def test_compatible_file_for_elf_start_at_offset(self) -> None:
182        self._elf_file.seek(13)  # Seek ahead to get out of sync
183        self.assertTrue(elf_reader.compatible_file(self._elf_file))
184        self.assertEqual(13, self._elf_file.tell())
185
186    def test_compatible_file_for_invalid_elf(self) -> None:
187        self.assertFalse(elf_reader.compatible_file(io.BytesIO(b'\x7fELVESF')))
188
189
190def _archive_file(data: bytes) -> bytes:
191    return (
192        'FILE ID 90123456'
193        'MODIFIED 012'
194        'OWNER '
195        'GROUP '
196        'MODE 678'
197        f'{len(data):10}'  # File size -- the only part that's needed.
198        '`\n'.encode() + data
199    )
200
201
202class ArchiveTest(unittest.TestCase):
203    """Tests reading from archive files."""
204
205    def setUp(self) -> None:
206        super().setUp()
207
208        with open(TEST_ELF_PATH, 'rb') as fd:
209            self._elf_data = fd.read()
210
211        self._archive_entries = b'blah', b'hello', self._elf_data
212
213        self._archive_data = elf_reader.ARCHIVE_MAGIC + b''.join(
214            _archive_file(f) for f in self._archive_entries
215        )
216        self._archive = io.BytesIO(self._archive_data)
217
218    def test_compatible_file_for_archive(self) -> None:
219        self.assertTrue(elf_reader.compatible_file(io.BytesIO(b'!<arch>\n')))
220        self.assertTrue(elf_reader.compatible_file(self._archive))
221
222    def test_compatible_file_for_invalid_archive(self) -> None:
223        self.assertFalse(elf_reader.compatible_file(io.BytesIO(b'!<arch>')))
224
225    def test_iterate_over_files(self) -> None:
226        for expected, size in zip(
227            self._archive_entries, elf_reader.files_in_archive(self._archive)
228        ):
229            self.assertEqual(expected, self._archive.read(size))
230
231    def test_iterate_over_empty_archive(self) -> None:
232        with self.assertRaises(StopIteration):
233            next(iter(elf_reader.files_in_archive(io.BytesIO(b'!<arch>\n'))))
234
235    def test_iterate_over_invalid_archive(self) -> None:
236        with self.assertRaises(elf_reader.FileDecodeError):
237            for _ in elf_reader.files_in_archive(
238                io.BytesIO(b'!<arch>blah blahblah')
239            ):
240                pass
241
242    def test_extra_newline_after_entry_is_ignored(self) -> None:
243        archive = io.BytesIO(
244            elf_reader.ARCHIVE_MAGIC
245            + _archive_file(self._elf_data)
246            + b'\n'
247            + _archive_file(self._elf_data)
248        )
249
250        for size in elf_reader.files_in_archive(archive):
251            self.assertEqual(self._elf_data, archive.read(size))
252
253    def test_two_extra_newlines_parsing_fails(self) -> None:
254        archive = io.BytesIO(
255            elf_reader.ARCHIVE_MAGIC
256            + _archive_file(self._elf_data)
257            + b'\n\n'
258            + _archive_file(self._elf_data)
259        )
260
261        with self.assertRaises(elf_reader.FileDecodeError):
262            for size in elf_reader.files_in_archive(archive):
263                self.assertEqual(self._elf_data, archive.read(size))
264
265    def test_iterate_over_archive_with_invalid_size(self) -> None:
266        data = elf_reader.ARCHIVE_MAGIC + _archive_file(b'$' * 3210)
267        file = io.BytesIO(data)
268
269        # Iterate over the file normally.
270        for size in elf_reader.files_in_archive(file):
271            self.assertEqual(b'$' * 3210, file.read(size))
272
273        # Replace the size with a hex number, which is not valid.
274        with self.assertRaises(elf_reader.FileDecodeError):
275            for _ in elf_reader.files_in_archive(
276                io.BytesIO(data.replace(b'3210', b'0x99'))
277            ):
278                pass
279
280    def test_elf_reader_dump_single_section(self) -> None:
281        elf = elf_reader.Elf(self._archive)
282        self.assertEqual(
283            elf.dump_section_contents(r'\.test_section_1'), b'You cannot pass\0'
284        )
285        self.assertEqual(
286            elf.dump_section_contents(r'\.test_section_2'), b'\xef\xbe\xed\xfe'
287        )
288
289    def test_elf_reader_read_values(self) -> None:
290        elf = elf_reader.Elf(self._archive)
291        address = next(iter(elf.sections_with_name('.test_section_1'))).address
292        self.assertEqual(elf.read_value(address), b'You cannot pass')
293
294        int32_address = next(
295            iter(elf.sections_with_name('.test_section_2'))
296        ).address
297        self.assertEqual(elf.read_value(int32_address, 4), b'\xef\xbe\xed\xfe')
298
299    def test_elf_reader_duplicate_sections_are_concatenated(self) -> None:
300        archive_data = elf_reader.ARCHIVE_MAGIC + b''.join(
301            _archive_file(f) for f in [self._elf_data, self._elf_data]
302        )
303        elf = elf_reader.Elf(io.BytesIO(archive_data))
304
305        self.assertEqual(
306            elf.dump_section_contents(r'\.test_section_1'),
307            b'You cannot pass\0You cannot pass\0',
308        )
309        self.assertEqual(
310            elf.dump_section_contents(r'\.test_section_2'),
311            b'\xef\xbe\xed\xfe' * 2,
312        )
313
314
315if __name__ == '__main__':
316    unittest.main()
317