xref: /aosp_15_r20/external/pigweed/pw_symbolizer/py/pw_symbolizer/llvm_symbolizer.py (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1# Copyright 2021 The Pigweed Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may not
4# use this file except in compliance with the License. You may obtain a copy of
5# the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations under
13# the License.
14"""A symbolizer based on llvm-symbolizer."""
15
16import shutil
17import subprocess
18import threading
19import json
20from pathlib import Path
21from pw_symbolizer import symbolizer
22
23# If the script is being run through Bazel, our support binaries are provided
24# at well known locations in its runfiles.
25try:
26    from python.runfiles import runfiles  # type: ignore
27
28    r = runfiles.Create()
29    _LLVM_SYMBOLIZER = r.Rlocation(
30        'pigweed/pw_symbolizer/py/llvm-symbolizer-copy'
31    )
32except ImportError:
33    _LLVM_SYMBOLIZER = 'llvm-symbolizer'
34
35
36class LlvmSymbolizer(symbolizer.Symbolizer):
37    """A symbolizer that wraps llvm-symbolizer."""
38
39    def __init__(
40        self,
41        binary: Path | None = None,
42        force_legacy=False,
43        llvm_symbolizer_binary: Path | None = None,
44    ):
45        # Lets destructor return cleanly if the binary is not found.
46        self._symbolizer = None
47        if llvm_symbolizer_binary:
48            self._symbolizer_binary = str(llvm_symbolizer_binary)
49        else:
50            self._symbolizer_binary = _LLVM_SYMBOLIZER
51            if shutil.which(self._symbolizer_binary) is None:
52                raise FileNotFoundError(
53                    'llvm-symbolizer not installed. Run bootstrap, or download '
54                    'LLVM (https://github.com/llvm/llvm-project/releases/) and '
55                    'add the tools to your system PATH'
56                )
57
58        # Prefer JSON output as it's easier to decode.
59        if force_legacy:
60            self._json_mode = False
61        else:
62            self._json_mode = LlvmSymbolizer._is_json_compatibile(
63                self._symbolizer_binary
64            )
65
66        if binary is not None:
67            if not binary.exists():
68                raise FileNotFoundError(binary)
69
70            output_style = 'JSON' if self._json_mode else 'LLVM'
71            cmd = [
72                self._symbolizer_binary,
73                '--no-inlines',
74                '--demangle',
75                '--functions',
76                f'--output-style={output_style}',
77                '--exe',
78                str(binary),
79            ]
80            self._symbolizer = subprocess.Popen(
81                cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE
82            )
83
84            self._lock: threading.Lock = threading.Lock()
85
86    def __del__(self):
87        self.close()
88
89    def close(self):
90        """Closes the active llvm-symbolizer process."""
91        if self._symbolizer is not None:
92            self._symbolizer.terminate()
93            self._symbolizer.wait()
94            self._symbolizer.stdin.close()
95            self._symbolizer.stdout.close()
96            self._symbolizer = None
97
98    @staticmethod
99    def _is_json_compatibile(symbolizer_binary: str) -> bool:
100        """Checks llvm-symbolizer to ensure compatibility"""
101        result = subprocess.run(
102            (symbolizer_binary, '--help'),
103            stdout=subprocess.PIPE,
104            stdin=subprocess.PIPE,
105        )
106        for line in result.stdout.decode().splitlines():
107            if '--output-style' in line and 'JSON' in line:
108                return True
109
110        return False
111
112    @staticmethod
113    def _read_json_symbol(address, stdout) -> symbolizer.Symbol:
114        """Reads a single symbol from llvm-symbolizer's JSON output mode."""
115        results = json.loads(stdout.readline().decode())
116        # The symbol resolution should give us at least one symbol, even
117        # if it's largely empty.
118        assert len(results["Symbol"]) > 0
119
120        # Get the first symbol.
121        symbol = results["Symbol"][0]
122
123        return symbolizer.Symbol(
124            address=address,
125            name=symbol['FunctionName'],
126            file=symbol['FileName'],
127            line=symbol['Line'],
128        )
129
130    @staticmethod
131    def _llvm_output_line_splitter(file_and_line: str) -> tuple[str, int]:
132        split = file_and_line.split(':')
133        # LLVM file name output is as follows:
134        #   path/to/src.c:123:1
135        # Where the last number is the discriminator, the second to last the
136        # line number, and all leading characters the file name. For now,
137        # this class ignores discriminators.
138        line_number_str = split[-2]
139        file = ':'.join(split[:-2])
140
141        if not line_number_str:
142            raise ValueError(f'Bad symbol format: {file_and_line}')
143
144        # For unknown file names, mark as blank.
145        if file.startswith('?'):
146            return ('', 0)
147
148        return (file, int(line_number_str))
149
150    @staticmethod
151    def _read_llvm_symbol(address, stdout) -> symbolizer.Symbol:
152        """Reads a single symbol from llvm-symbolizer's LLVM output mode."""
153        symbol = stdout.readline().decode().strip()
154        file_and_line = stdout.readline().decode().strip()
155
156        # Might have gotten multiple symbol matches, drop all of the other ones.
157        # The results of a symbol are denoted by an empty newline.
158        while stdout.readline().decode() != '\n':
159            pass
160
161        if symbol.startswith('?'):
162            return symbolizer.Symbol(address)
163
164        file, line_number = LlvmSymbolizer._llvm_output_line_splitter(
165            file_and_line
166        )
167
168        return symbolizer.Symbol(address, symbol, file, line_number)
169
170    def symbolize(self, address: int) -> symbolizer.Symbol:
171        """Symbolizes an address using the loaded ELF file."""
172        if not self._symbolizer:
173            return symbolizer.Symbol(address=address, name='', file='', line=0)
174
175        with self._lock:
176            if self._symbolizer.returncode is not None:
177                raise ValueError('llvm-symbolizer closed unexpectedly')
178
179            stdin = self._symbolizer.stdin
180            stdout = self._symbolizer.stdout
181
182            assert stdin is not None
183            assert stdout is not None
184
185            stdin.write(f'0x{address:08X}\n'.encode())
186            stdin.flush()
187
188            if self._json_mode:
189                return LlvmSymbolizer._read_json_symbol(address, stdout)
190
191            return LlvmSymbolizer._read_llvm_symbol(address, stdout)
192