1# Copyright 2021 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14"""A symbolizer based on llvm-symbolizer.""" 15 16import shutil 17import subprocess 18import threading 19import json 20from pathlib import Path 21from pw_symbolizer import symbolizer 22 23# If the script is being run through Bazel, our support binaries are provided 24# at well known locations in its runfiles. 25try: 26 from python.runfiles import runfiles # type: ignore 27 28 r = runfiles.Create() 29 _LLVM_SYMBOLIZER = r.Rlocation( 30 'pigweed/pw_symbolizer/py/llvm-symbolizer-copy' 31 ) 32except ImportError: 33 _LLVM_SYMBOLIZER = 'llvm-symbolizer' 34 35 36class LlvmSymbolizer(symbolizer.Symbolizer): 37 """A symbolizer that wraps llvm-symbolizer.""" 38 39 def __init__( 40 self, 41 binary: Path | None = None, 42 force_legacy=False, 43 llvm_symbolizer_binary: Path | None = None, 44 ): 45 # Lets destructor return cleanly if the binary is not found. 46 self._symbolizer = None 47 if llvm_symbolizer_binary: 48 self._symbolizer_binary = str(llvm_symbolizer_binary) 49 else: 50 self._symbolizer_binary = _LLVM_SYMBOLIZER 51 if shutil.which(self._symbolizer_binary) is None: 52 raise FileNotFoundError( 53 'llvm-symbolizer not installed. Run bootstrap, or download ' 54 'LLVM (https://github.com/llvm/llvm-project/releases/) and ' 55 'add the tools to your system PATH' 56 ) 57 58 # Prefer JSON output as it's easier to decode. 59 if force_legacy: 60 self._json_mode = False 61 else: 62 self._json_mode = LlvmSymbolizer._is_json_compatibile( 63 self._symbolizer_binary 64 ) 65 66 if binary is not None: 67 if not binary.exists(): 68 raise FileNotFoundError(binary) 69 70 output_style = 'JSON' if self._json_mode else 'LLVM' 71 cmd = [ 72 self._symbolizer_binary, 73 '--no-inlines', 74 '--demangle', 75 '--functions', 76 f'--output-style={output_style}', 77 '--exe', 78 str(binary), 79 ] 80 self._symbolizer = subprocess.Popen( 81 cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE 82 ) 83 84 self._lock: threading.Lock = threading.Lock() 85 86 def __del__(self): 87 self.close() 88 89 def close(self): 90 """Closes the active llvm-symbolizer process.""" 91 if self._symbolizer is not None: 92 self._symbolizer.terminate() 93 self._symbolizer.wait() 94 self._symbolizer.stdin.close() 95 self._symbolizer.stdout.close() 96 self._symbolizer = None 97 98 @staticmethod 99 def _is_json_compatibile(symbolizer_binary: str) -> bool: 100 """Checks llvm-symbolizer to ensure compatibility""" 101 result = subprocess.run( 102 (symbolizer_binary, '--help'), 103 stdout=subprocess.PIPE, 104 stdin=subprocess.PIPE, 105 ) 106 for line in result.stdout.decode().splitlines(): 107 if '--output-style' in line and 'JSON' in line: 108 return True 109 110 return False 111 112 @staticmethod 113 def _read_json_symbol(address, stdout) -> symbolizer.Symbol: 114 """Reads a single symbol from llvm-symbolizer's JSON output mode.""" 115 results = json.loads(stdout.readline().decode()) 116 # The symbol resolution should give us at least one symbol, even 117 # if it's largely empty. 118 assert len(results["Symbol"]) > 0 119 120 # Get the first symbol. 121 symbol = results["Symbol"][0] 122 123 return symbolizer.Symbol( 124 address=address, 125 name=symbol['FunctionName'], 126 file=symbol['FileName'], 127 line=symbol['Line'], 128 ) 129 130 @staticmethod 131 def _llvm_output_line_splitter(file_and_line: str) -> tuple[str, int]: 132 split = file_and_line.split(':') 133 # LLVM file name output is as follows: 134 # path/to/src.c:123:1 135 # Where the last number is the discriminator, the second to last the 136 # line number, and all leading characters the file name. For now, 137 # this class ignores discriminators. 138 line_number_str = split[-2] 139 file = ':'.join(split[:-2]) 140 141 if not line_number_str: 142 raise ValueError(f'Bad symbol format: {file_and_line}') 143 144 # For unknown file names, mark as blank. 145 if file.startswith('?'): 146 return ('', 0) 147 148 return (file, int(line_number_str)) 149 150 @staticmethod 151 def _read_llvm_symbol(address, stdout) -> symbolizer.Symbol: 152 """Reads a single symbol from llvm-symbolizer's LLVM output mode.""" 153 symbol = stdout.readline().decode().strip() 154 file_and_line = stdout.readline().decode().strip() 155 156 # Might have gotten multiple symbol matches, drop all of the other ones. 157 # The results of a symbol are denoted by an empty newline. 158 while stdout.readline().decode() != '\n': 159 pass 160 161 if symbol.startswith('?'): 162 return symbolizer.Symbol(address) 163 164 file, line_number = LlvmSymbolizer._llvm_output_line_splitter( 165 file_and_line 166 ) 167 168 return symbolizer.Symbol(address, symbol, file, line_number) 169 170 def symbolize(self, address: int) -> symbolizer.Symbol: 171 """Symbolizes an address using the loaded ELF file.""" 172 if not self._symbolizer: 173 return symbolizer.Symbol(address=address, name='', file='', line=0) 174 175 with self._lock: 176 if self._symbolizer.returncode is not None: 177 raise ValueError('llvm-symbolizer closed unexpectedly') 178 179 stdin = self._symbolizer.stdin 180 stdout = self._symbolizer.stdout 181 182 assert stdin is not None 183 assert stdout is not None 184 185 stdin.write(f'0x{address:08X}\n'.encode()) 186 stdin.flush() 187 188 if self._json_mode: 189 return LlvmSymbolizer._read_json_symbol(address, stdout) 190 191 return LlvmSymbolizer._read_llvm_symbol(address, stdout) 192