xref: /aosp_15_r20/bionic/benchmarks/linker_relocation/regen/dump_relocs.py (revision 8d67ca893c1523eb926b9080dbe4e2ffd2a27ba1)
1*8d67ca89SAndroid Build Coastguard Worker#!/usr/bin/env python3
2*8d67ca89SAndroid Build Coastguard Worker#
3*8d67ca89SAndroid Build Coastguard Worker# Copyright (C) 2019 The Android Open Source Project
4*8d67ca89SAndroid Build Coastguard Worker# All rights reserved.
5*8d67ca89SAndroid Build Coastguard Worker#
6*8d67ca89SAndroid Build Coastguard Worker# Redistribution and use in source and binary forms, with or without
7*8d67ca89SAndroid Build Coastguard Worker# modification, are permitted provided that the following conditions
8*8d67ca89SAndroid Build Coastguard Worker# are met:
9*8d67ca89SAndroid Build Coastguard Worker#  * Redistributions of source code must retain the above copyright
10*8d67ca89SAndroid Build Coastguard Worker#    notice, this list of conditions and the following disclaimer.
11*8d67ca89SAndroid Build Coastguard Worker#  * Redistributions in binary form must reproduce the above copyright
12*8d67ca89SAndroid Build Coastguard Worker#    notice, this list of conditions and the following disclaimer in
13*8d67ca89SAndroid Build Coastguard Worker#    the documentation and/or other materials provided with the
14*8d67ca89SAndroid Build Coastguard Worker#    distribution.
15*8d67ca89SAndroid Build Coastguard Worker#
16*8d67ca89SAndroid Build Coastguard Worker# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17*8d67ca89SAndroid Build Coastguard Worker# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18*8d67ca89SAndroid Build Coastguard Worker# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19*8d67ca89SAndroid Build Coastguard Worker# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20*8d67ca89SAndroid Build Coastguard Worker# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21*8d67ca89SAndroid Build Coastguard Worker# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22*8d67ca89SAndroid Build Coastguard Worker# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
23*8d67ca89SAndroid Build Coastguard Worker# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24*8d67ca89SAndroid Build Coastguard Worker# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25*8d67ca89SAndroid Build Coastguard Worker# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26*8d67ca89SAndroid Build Coastguard Worker# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27*8d67ca89SAndroid Build Coastguard Worker# SUCH DAMAGE.
28*8d67ca89SAndroid Build Coastguard Worker
29*8d67ca89SAndroid Build Coastguard Worker# Scan an ELF file and its tree of DT_NEEDED ELF files, and dump out a JSON file listing:
30*8d67ca89SAndroid Build Coastguard Worker#  - each ELF file
31*8d67ca89SAndroid Build Coastguard Worker#  - its DT_NEEDED entries
32*8d67ca89SAndroid Build Coastguard Worker#  - its defined symbols
33*8d67ca89SAndroid Build Coastguard Worker#  - its relocations
34*8d67ca89SAndroid Build Coastguard Worker
35*8d67ca89SAndroid Build Coastguard Workerimport argparse
36*8d67ca89SAndroid Build Coastguard Workerimport json
37*8d67ca89SAndroid Build Coastguard Workerimport os
38*8d67ca89SAndroid Build Coastguard Workerimport re
39*8d67ca89SAndroid Build Coastguard Workerimport shlex
40*8d67ca89SAndroid Build Coastguard Workerimport shutil
41*8d67ca89SAndroid Build Coastguard Workerimport subprocess
42*8d67ca89SAndroid Build Coastguard Workerimport sys
43*8d67ca89SAndroid Build Coastguard Workerimport tempfile
44*8d67ca89SAndroid Build Coastguard Workerimport textwrap
45*8d67ca89SAndroid Build Coastguard Workerimport typing
46*8d67ca89SAndroid Build Coastguard Workerfrom enum import Enum
47*8d67ca89SAndroid Build Coastguard Workerfrom typing import Any, Set, List, Dict, Optional
48*8d67ca89SAndroid Build Coastguard Workerfrom subprocess import PIPE, DEVNULL
49*8d67ca89SAndroid Build Coastguard Workerfrom pathlib import Path
50*8d67ca89SAndroid Build Coastguard Worker
51*8d67ca89SAndroid Build Coastguard Workerfrom common_types import LoadedLibrary, SymBind, SymKind, DynSymbol, DynSymbols, Relocations, \
52*8d67ca89SAndroid Build Coastguard Worker    SymbolRef, bfs_walk, elf_tree_to_json
53*8d67ca89SAndroid Build Coastguard Worker
54*8d67ca89SAndroid Build Coastguard Worker
55*8d67ca89SAndroid Build Coastguard Workerg_readelf_cache: Dict[str, str] = {}
56*8d67ca89SAndroid Build Coastguard Workerg_path_to_soname_cache: Dict[Path, str] = {}
57*8d67ca89SAndroid Build Coastguard Worker
58*8d67ca89SAndroid Build Coastguard Workerdef do_readelf_query(arguments: List[str]) -> List[str]:
59*8d67ca89SAndroid Build Coastguard Worker    cmdline = ['llvm-readelf'] + arguments
60*8d67ca89SAndroid Build Coastguard Worker    key = repr(cmdline)
61*8d67ca89SAndroid Build Coastguard Worker    if key in g_readelf_cache: return g_readelf_cache[key].splitlines()
62*8d67ca89SAndroid Build Coastguard Worker    out = subprocess.run(cmdline, check=True, stdout=PIPE).stdout.decode()
63*8d67ca89SAndroid Build Coastguard Worker    g_readelf_cache[key] = out
64*8d67ca89SAndroid Build Coastguard Worker    return out.splitlines()
65*8d67ca89SAndroid Build Coastguard Worker
66*8d67ca89SAndroid Build Coastguard Worker
67*8d67ca89SAndroid Build Coastguard Workerdef get_elf_soname(path: Path) -> str:
68*8d67ca89SAndroid Build Coastguard Worker    if path in g_path_to_soname_cache: return g_path_to_soname_cache[path]
69*8d67ca89SAndroid Build Coastguard Worker    out = do_readelf_query(['-d', str(path)])
70*8d67ca89SAndroid Build Coastguard Worker    for line in out:
71*8d67ca89SAndroid Build Coastguard Worker        m = re.search(r'\(SONAME\)\s+Library soname: \[(.+)\]$', line)
72*8d67ca89SAndroid Build Coastguard Worker        if not m: continue
73*8d67ca89SAndroid Build Coastguard Worker        result = m.group(1)
74*8d67ca89SAndroid Build Coastguard Worker        break
75*8d67ca89SAndroid Build Coastguard Worker    else:
76*8d67ca89SAndroid Build Coastguard Worker        result = os.path.basename(path)
77*8d67ca89SAndroid Build Coastguard Worker    g_path_to_soname_cache[path] = result
78*8d67ca89SAndroid Build Coastguard Worker    return result
79*8d67ca89SAndroid Build Coastguard Worker
80*8d67ca89SAndroid Build Coastguard Worker
81*8d67ca89SAndroid Build Coastguard Workerdef get_elf_needed(path: Path) -> List[str]:
82*8d67ca89SAndroid Build Coastguard Worker    result = []
83*8d67ca89SAndroid Build Coastguard Worker    out = do_readelf_query(['-d', str(path)])
84*8d67ca89SAndroid Build Coastguard Worker    for line in out:
85*8d67ca89SAndroid Build Coastguard Worker        m = re.search(r'\(NEEDED\)\s+Shared library: \[(.+)\]$', line)
86*8d67ca89SAndroid Build Coastguard Worker        if not m: continue
87*8d67ca89SAndroid Build Coastguard Worker        result.append(m.group(1))
88*8d67ca89SAndroid Build Coastguard Worker    return result
89*8d67ca89SAndroid Build Coastguard Worker
90*8d67ca89SAndroid Build Coastguard Worker
91*8d67ca89SAndroid Build Coastguard WorkerkSymbolMatcher = re.compile(r'''
92*8d67ca89SAndroid Build Coastguard Worker    \s+ (\d+) : \s*                 # number
93*8d67ca89SAndroid Build Coastguard Worker    [0-9a-f]+ \s+                   # value
94*8d67ca89SAndroid Build Coastguard Worker    [0-9a-f]+ \s+                   # size
95*8d67ca89SAndroid Build Coastguard Worker    (FUNC|IFUNC|OBJECT|NOTYPE) \s+  # type
96*8d67ca89SAndroid Build Coastguard Worker    (GLOBAL|WEAK) \s+               # bind
97*8d67ca89SAndroid Build Coastguard Worker    \w+ \s+                         # vis
98*8d67ca89SAndroid Build Coastguard Worker    (\d+|UND) \s+                   # ndx
99*8d67ca89SAndroid Build Coastguard Worker    ([\.\w]+)                       # name
100*8d67ca89SAndroid Build Coastguard Worker    (?:(@@?)(\w+))?                 # version
101*8d67ca89SAndroid Build Coastguard Worker    $
102*8d67ca89SAndroid Build Coastguard Worker''', re.VERBOSE)
103*8d67ca89SAndroid Build Coastguard Worker
104*8d67ca89SAndroid Build Coastguard Worker
105*8d67ca89SAndroid Build Coastguard Workerdef get_dyn_symbols(path: Path) -> DynSymbols:
106*8d67ca89SAndroid Build Coastguard Worker    kind_lookup = {
107*8d67ca89SAndroid Build Coastguard Worker        'FUNC': SymKind.Func,
108*8d67ca89SAndroid Build Coastguard Worker        'IFUNC': SymKind.Func,
109*8d67ca89SAndroid Build Coastguard Worker        'OBJECT': SymKind.Var,
110*8d67ca89SAndroid Build Coastguard Worker        'NOTYPE': SymKind.Func,
111*8d67ca89SAndroid Build Coastguard Worker    }
112*8d67ca89SAndroid Build Coastguard Worker    bind_lookup = { 'GLOBAL': SymBind.Global, 'WEAK': SymBind.Weak }
113*8d67ca89SAndroid Build Coastguard Worker
114*8d67ca89SAndroid Build Coastguard Worker    result = {}
115*8d67ca89SAndroid Build Coastguard Worker    out = do_readelf_query(['--dyn-syms', str(path)])
116*8d67ca89SAndroid Build Coastguard Worker    for line in out:
117*8d67ca89SAndroid Build Coastguard Worker        m = kSymbolMatcher.match(line)
118*8d67ca89SAndroid Build Coastguard Worker        if not m:
119*8d67ca89SAndroid Build Coastguard Worker            # gLinux currently has a version of llvm-readelf whose output is very different from
120*8d67ca89SAndroid Build Coastguard Worker            # the current versions of llvm-readelf (or GNU readelf).
121*8d67ca89SAndroid Build Coastguard Worker            if 'Symbol table of .gnu.hash for image:' in line:
122*8d67ca89SAndroid Build Coastguard Worker                sys.exit(f'error: obsolete version of llvm-readelf')
123*8d67ca89SAndroid Build Coastguard Worker            continue
124*8d67ca89SAndroid Build Coastguard Worker
125*8d67ca89SAndroid Build Coastguard Worker        num, kind, bind, ndx, name, ver_type, ver_name = m.groups()
126*8d67ca89SAndroid Build Coastguard Worker
127*8d67ca89SAndroid Build Coastguard Worker        if name == '__cfi_check':
128*8d67ca89SAndroid Build Coastguard Worker            # The linker gives an error like:
129*8d67ca89SAndroid Build Coastguard Worker            #    CANNOT LINK EXECUTABLE "/data/local/tmp/out-linker-bench/b_libandroid_servers": unaligned __cfi_check in the library "(null)"
130*8d67ca89SAndroid Build Coastguard Worker            # I am probably breaking some kind of CFI invariant, so strip these out for now.
131*8d67ca89SAndroid Build Coastguard Worker            continue
132*8d67ca89SAndroid Build Coastguard Worker
133*8d67ca89SAndroid Build Coastguard Worker        result[int(num)] = DynSymbol(name, kind_lookup[kind], bind_lookup[bind], ndx != 'UND',
134*8d67ca89SAndroid Build Coastguard Worker                                     ver_type, ver_name)
135*8d67ca89SAndroid Build Coastguard Worker
136*8d67ca89SAndroid Build Coastguard Worker    return result
137*8d67ca89SAndroid Build Coastguard Worker
138*8d67ca89SAndroid Build Coastguard Worker
139*8d67ca89SAndroid Build Coastguard WorkerkRelocationMatcher = re.compile(r'''
140*8d67ca89SAndroid Build Coastguard Worker    ([0-9a-f]+) \s+     # offset
141*8d67ca89SAndroid Build Coastguard Worker    ([0-9a-f]+) \s+     # info
142*8d67ca89SAndroid Build Coastguard Worker    (\w+)               # type
143*8d67ca89SAndroid Build Coastguard Worker    (?:
144*8d67ca89SAndroid Build Coastguard Worker        \s+ [0-9a-f]+ \s+       # symbol value
145*8d67ca89SAndroid Build Coastguard Worker        ([\.\w]+)               # symbol name
146*8d67ca89SAndroid Build Coastguard Worker        (?: @@? ([\.\w]+) )?    # version
147*8d67ca89SAndroid Build Coastguard Worker    )?
148*8d67ca89SAndroid Build Coastguard Worker    \b
149*8d67ca89SAndroid Build Coastguard Worker''', re.VERBOSE)
150*8d67ca89SAndroid Build Coastguard Worker
151*8d67ca89SAndroid Build Coastguard Worker
152*8d67ca89SAndroid Build Coastguard Workerdef scan_relocations(path: Path, syms: DynSymbols) -> Relocations:
153*8d67ca89SAndroid Build Coastguard Worker    result: Relocations = Relocations()
154*8d67ca89SAndroid Build Coastguard Worker    out = do_readelf_query(['-r', str(path)])
155*8d67ca89SAndroid Build Coastguard Worker    for line in out:
156*8d67ca89SAndroid Build Coastguard Worker        m = kRelocationMatcher.match(line)
157*8d67ca89SAndroid Build Coastguard Worker        if not m: continue
158*8d67ca89SAndroid Build Coastguard Worker
159*8d67ca89SAndroid Build Coastguard Worker        offset_str, info_str, reloc_name, sym_name, ver = m.groups()
160*8d67ca89SAndroid Build Coastguard Worker
161*8d67ca89SAndroid Build Coastguard Worker        if len(offset_str) == 8:
162*8d67ca89SAndroid Build Coastguard Worker            offset = int(offset_str, 16) // 4
163*8d67ca89SAndroid Build Coastguard Worker            sym_idx = int(info_str, 16) >> 8
164*8d67ca89SAndroid Build Coastguard Worker        elif len(offset_str) == 16:
165*8d67ca89SAndroid Build Coastguard Worker            offset = int(offset_str, 16) // 8
166*8d67ca89SAndroid Build Coastguard Worker            sym_idx = int(info_str, 16) >> 32
167*8d67ca89SAndroid Build Coastguard Worker        else:
168*8d67ca89SAndroid Build Coastguard Worker            sys.exit(f'error: invalid offset length: {repr(offset_str)}')
169*8d67ca89SAndroid Build Coastguard Worker
170*8d67ca89SAndroid Build Coastguard Worker        # TODO: R_ARM_IRELATIVE doesn't work, so skip it.
171*8d67ca89SAndroid Build Coastguard Worker        if reloc_name == 'R_ARM_IRELATIVE': continue
172*8d67ca89SAndroid Build Coastguard Worker
173*8d67ca89SAndroid Build Coastguard Worker        if reloc_name in ['R_ARM_RELATIVE', 'R_AARCH64_RELATIVE']:
174*8d67ca89SAndroid Build Coastguard Worker            assert sym_name is None
175*8d67ca89SAndroid Build Coastguard Worker            result.relative.append(offset)
176*8d67ca89SAndroid Build Coastguard Worker        else:
177*8d67ca89SAndroid Build Coastguard Worker            if sym_name is None:
178*8d67ca89SAndroid Build Coastguard Worker                sys.exit(f'error: missing symbol for reloc {m.groups()} in {path}')
179*8d67ca89SAndroid Build Coastguard Worker
180*8d67ca89SAndroid Build Coastguard Worker            is_weak = syms[sym_idx].bind == SymBind.Weak
181*8d67ca89SAndroid Build Coastguard Worker            symbol = SymbolRef(sym_name, is_weak, ver)
182*8d67ca89SAndroid Build Coastguard Worker
183*8d67ca89SAndroid Build Coastguard Worker            if reloc_name in ['R_ARM_JUMP_SLOT', 'R_AARCH64_JUMP_SLOT']:
184*8d67ca89SAndroid Build Coastguard Worker                result.jump_slots.append(symbol)
185*8d67ca89SAndroid Build Coastguard Worker            elif reloc_name in ['R_ARM_GLOB_DAT', 'R_AARCH64_GLOB_DAT']:
186*8d67ca89SAndroid Build Coastguard Worker                result.got.append(symbol)
187*8d67ca89SAndroid Build Coastguard Worker            elif reloc_name in ['R_ARM_ABS32', 'R_AARCH64_ABS64']:
188*8d67ca89SAndroid Build Coastguard Worker                result.symbolic.append((offset, symbol))
189*8d67ca89SAndroid Build Coastguard Worker            else:
190*8d67ca89SAndroid Build Coastguard Worker                sys.exit(f'error: unrecognized reloc {m.groups()} in {path}')
191*8d67ca89SAndroid Build Coastguard Worker
192*8d67ca89SAndroid Build Coastguard Worker    return result
193*8d67ca89SAndroid Build Coastguard Worker
194*8d67ca89SAndroid Build Coastguard Worker
195*8d67ca89SAndroid Build Coastguard Workerdef load_elf_tree(search_path: List[Path], path: Path) -> LoadedLibrary:
196*8d67ca89SAndroid Build Coastguard Worker
197*8d67ca89SAndroid Build Coastguard Worker    libraries: Dict[str, LoadedLibrary] = {}
198*8d67ca89SAndroid Build Coastguard Worker
199*8d67ca89SAndroid Build Coastguard Worker    def find_library(needed: str) -> Optional[LoadedLibrary]:
200*8d67ca89SAndroid Build Coastguard Worker        nonlocal libraries
201*8d67ca89SAndroid Build Coastguard Worker
202*8d67ca89SAndroid Build Coastguard Worker        if needed in libraries: return libraries[needed]
203*8d67ca89SAndroid Build Coastguard Worker
204*8d67ca89SAndroid Build Coastguard Worker        for candidate_dir in search_path:
205*8d67ca89SAndroid Build Coastguard Worker            candidate_path = candidate_dir / needed
206*8d67ca89SAndroid Build Coastguard Worker            if candidate_path.exists():
207*8d67ca89SAndroid Build Coastguard Worker                return load(candidate_path)
208*8d67ca89SAndroid Build Coastguard Worker
209*8d67ca89SAndroid Build Coastguard Worker        sys.exit(f'error: missing DT_NEEDED lib {needed}!')
210*8d67ca89SAndroid Build Coastguard Worker
211*8d67ca89SAndroid Build Coastguard Worker    def load(path: Path) -> LoadedLibrary:
212*8d67ca89SAndroid Build Coastguard Worker        nonlocal libraries
213*8d67ca89SAndroid Build Coastguard Worker
214*8d67ca89SAndroid Build Coastguard Worker        lib = LoadedLibrary()
215*8d67ca89SAndroid Build Coastguard Worker        lib.soname = get_elf_soname(path)
216*8d67ca89SAndroid Build Coastguard Worker        if lib.soname in libraries: sys.exit(f'soname already loaded: {lib.soname}')
217*8d67ca89SAndroid Build Coastguard Worker        libraries[lib.soname] = lib
218*8d67ca89SAndroid Build Coastguard Worker
219*8d67ca89SAndroid Build Coastguard Worker        lib.syms = get_dyn_symbols(path)
220*8d67ca89SAndroid Build Coastguard Worker        lib.rels = scan_relocations(path, lib.syms)
221*8d67ca89SAndroid Build Coastguard Worker
222*8d67ca89SAndroid Build Coastguard Worker        for needed in get_elf_needed(path):
223*8d67ca89SAndroid Build Coastguard Worker            needed_lib = find_library(needed)
224*8d67ca89SAndroid Build Coastguard Worker            if needed_lib is not None:
225*8d67ca89SAndroid Build Coastguard Worker                lib.needed.append(needed_lib)
226*8d67ca89SAndroid Build Coastguard Worker
227*8d67ca89SAndroid Build Coastguard Worker        return lib
228*8d67ca89SAndroid Build Coastguard Worker
229*8d67ca89SAndroid Build Coastguard Worker    return load(path)
230*8d67ca89SAndroid Build Coastguard Worker
231*8d67ca89SAndroid Build Coastguard Worker
232*8d67ca89SAndroid Build Coastguard Workerdef main() -> None:
233*8d67ca89SAndroid Build Coastguard Worker    parser = argparse.ArgumentParser()
234*8d67ca89SAndroid Build Coastguard Worker    parser.add_argument('input', type=str)
235*8d67ca89SAndroid Build Coastguard Worker    parser.add_argument('output', type=str)
236*8d67ca89SAndroid Build Coastguard Worker    parser.add_argument('-L', dest='search_path', metavar='PATH', action='append', type=str, default=[])
237*8d67ca89SAndroid Build Coastguard Worker
238*8d67ca89SAndroid Build Coastguard Worker    args = parser.parse_args()
239*8d67ca89SAndroid Build Coastguard Worker    search_path = [Path(p) for p in args.search_path]
240*8d67ca89SAndroid Build Coastguard Worker
241*8d67ca89SAndroid Build Coastguard Worker    with open(Path(args.output), 'w') as f:
242*8d67ca89SAndroid Build Coastguard Worker        root = load_elf_tree(search_path, Path(args.input))
243*8d67ca89SAndroid Build Coastguard Worker        json.dump(elf_tree_to_json(root), f, sort_keys=True, indent=2)
244*8d67ca89SAndroid Build Coastguard Worker
245*8d67ca89SAndroid Build Coastguard Worker
246*8d67ca89SAndroid Build Coastguard Workerif __name__ == '__main__':
247*8d67ca89SAndroid Build Coastguard Worker    main()
248