1#!/usr/bin/env python3 2# 3# Copyright (C) 2013 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""Module for looking up symbolic debugging information. 18 19The information can include symbol names, offsets, and source locations. 20""" 21 22import atexit 23import json 24import glob 25import os 26import platform 27import re 28import shutil 29import signal 30import subprocess 31import unittest 32 33ANDROID_BUILD_TOP = os.environ.get("ANDROID_BUILD_TOP", ".") 34 35 36def FindClangDir(): 37 get_clang_version = ANDROID_BUILD_TOP + "/build/soong/scripts/get_clang_version.py" 38 if os.path.exists(get_clang_version): 39 # We want the script to fail if get_clang_version.py exists but is unable 40 # to find the clang version. 41 version_output = subprocess.check_output(get_clang_version, text=True) 42 return ANDROID_BUILD_TOP + "/prebuilts/clang/host/linux-x86/" + version_output.strip() 43 else: 44 return None 45 46 47def FindSymbolsDir(): 48 saveddir = os.getcwd() 49 os.chdir(ANDROID_BUILD_TOP) 50 stream = None 51 try: 52 cmd = "build/soong/soong_ui.bash --dumpvar-mode --abs TARGET_OUT_UNSTRIPPED" 53 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True, shell=True).stdout 54 return str(stream.read().strip()) 55 finally: 56 if stream is not None: 57 stream.close() 58 os.chdir(saveddir) 59 60SYMBOLS_DIR = FindSymbolsDir() 61 62ARCH_IS_32BIT = None 63 64VERBOSE = False 65 66# These are private. Do not access them from other modules. 67_CACHED_TOOLCHAIN = None 68_CACHED_CXX_FILT = None 69 70# Caches for symbolized information. 71_SYMBOL_INFORMATION_ADDR2LINE_CACHE = {} 72_SYMBOL_INFORMATION_OBJDUMP_CACHE = {} 73_SYMBOL_DEMANGLING_CACHE = {} 74 75# Caches for pipes to subprocesses. 76 77class ProcessCache: 78 _cmd2pipe = {} 79 _lru = [] 80 81 # Max number of open pipes. 82 _PIPE_MAX_OPEN = 10 83 84 def GetProcess(self, cmd): 85 cmd_tuple = tuple(cmd) # Need to use a tuple as lists can't be dict keys. 86 # Pipe already available? 87 if cmd_tuple in self._cmd2pipe: 88 pipe = self._cmd2pipe[cmd_tuple] 89 # Update LRU. 90 self._lru = [(cmd_tuple, pipe)] + [i for i in self._lru if i[0] != cmd_tuple] 91 return pipe 92 93 # Not cached, yet. Open a new one. 94 95 # Check if too many are open, close the old ones. 96 while len(self._lru) >= self._PIPE_MAX_OPEN: 97 open_cmd, open_pipe = self._lru.pop() 98 del self._cmd2pipe[open_cmd] 99 self.TerminateProcess(open_pipe) 100 101 # Create and put into cache. 102 pipe = self.SpawnProcess(cmd) 103 self._cmd2pipe[cmd_tuple] = pipe 104 self._lru = [(cmd_tuple, pipe)] + self._lru 105 return pipe 106 107 def SpawnProcess(self, cmd): 108 return subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, universal_newlines=True) 109 110 def TerminateProcess(self, pipe): 111 pipe.stdin.close() 112 pipe.stdout.close() 113 pipe.terminate() 114 pipe.wait() 115 116 def KillAllProcesses(self): 117 for _, open_pipe in self._lru: 118 self.TerminateProcess(open_pipe) 119 _cmd2pipe = {} 120 _lru = [] 121 122 123_PIPE_ADDR2LINE_CACHE = ProcessCache() 124_PIPE_CPPFILT_CACHE = ProcessCache() 125 126 127# Process cache cleanup on shutdown. 128 129def CloseAllPipes(): 130 _PIPE_ADDR2LINE_CACHE.KillAllProcesses() 131 _PIPE_CPPFILT_CACHE.KillAllProcesses() 132 133 134atexit.register(CloseAllPipes) 135 136 137def PipeTermHandler(signum, frame): 138 CloseAllPipes() 139 os._exit(0) 140 141 142for sig in (signal.SIGABRT, signal.SIGINT, signal.SIGTERM): 143 signal.signal(sig, PipeTermHandler) 144 145 146 147 148def ToolPath(tool, toolchain=None): 149 """Return a fully-qualified path to the specified tool, or just the tool if it's on PATH """ 150 if shutil.which(tool): 151 return tool 152 if not toolchain: 153 toolchain = FindToolchain() 154 return os.path.join(toolchain, tool) 155 156 157def FindToolchain(): 158 """Returns the toolchain.""" 159 160 global _CACHED_TOOLCHAIN 161 if _CACHED_TOOLCHAIN: 162 return _CACHED_TOOLCHAIN 163 164 llvm_binutils_dir = ANDROID_BUILD_TOP + "/prebuilts/clang/host/linux-x86/llvm-binutils-stable/"; 165 if not os.path.exists(llvm_binutils_dir): 166 raise Exception("Could not find llvm tool chain directory %s" % (llvm_binutils_dir)) 167 168 _CACHED_TOOLCHAIN = llvm_binutils_dir 169 print("Using toolchain from:", _CACHED_TOOLCHAIN) 170 return _CACHED_TOOLCHAIN 171 172 173def SymbolInformation(lib, addr): 174 """Look up symbol information about an address. 175 176 Args: 177 lib: library (or executable) pathname containing symbols 178 addr: string hexidecimal address 179 180 Returns: 181 A list of the form [(source_symbol, source_location, 182 object_symbol_with_offset)]. 183 184 If the function has been inlined then the list may contain 185 more than one element with the symbols for the most deeply 186 nested inlined location appearing first. The list is 187 always non-empty, even if no information is available. 188 189 Usually you want to display the source_location and 190 object_symbol_with_offset from the last element in the list. 191 """ 192 info = SymbolInformationForSet(lib, set([addr])) 193 return (info and info.get(addr)) or [(None, None, None)] 194 195 196def SymbolInformationForSet(lib, unique_addrs): 197 """Look up symbol information for a set of addresses from the given library. 198 199 Args: 200 lib: library (or executable) pathname containing symbols 201 unique_addrs: set of hexidecimal addresses 202 203 Returns: 204 A dictionary of the form {addr: [(source_symbol, source_location, 205 object_symbol_with_offset)]} where each address has a list of 206 associated symbols and locations. The list is always non-empty. 207 208 If the function has been inlined then the list may contain 209 more than one element with the symbols for the most deeply 210 nested inlined location appearing first. The list is 211 always non-empty, even if no information is available. 212 213 Usually you want to display the source_location and 214 object_symbol_with_offset from the last element in the list. 215 """ 216 if not lib: 217 return None 218 219 addr_to_line = CallLlvmSymbolizerForSet(lib, unique_addrs) 220 if not addr_to_line: 221 return None 222 223 addr_to_objdump = CallObjdumpForSet(lib, unique_addrs) 224 if not addr_to_objdump: 225 return None 226 227 result = {} 228 for addr in unique_addrs: 229 source_info = addr_to_line.get(addr) 230 if not source_info: 231 source_info = [(None, None)] 232 if addr in addr_to_objdump: 233 (object_symbol, object_offset) = addr_to_objdump.get(addr) 234 object_symbol_with_offset = FormatSymbolWithOffset(object_symbol, 235 object_offset) 236 else: 237 object_symbol_with_offset = None 238 result[addr] = [(source_symbol, source_location, object_symbol_with_offset) 239 for (source_symbol, source_location) in source_info] 240 241 return result 242 243 244def _OptionalStackRecordField(json_result, field): 245 """Fix up bizarre formatting of llvm-symbolizer output 246 247 Some parts of the FRAME output are output as a string containing a hex 248 integer, or the empty string when it's missing. 249 250 Args: 251 json_result: dictionary containing the Frame response 252 field: name of the field we want to read 253 254 Returns: 255 integer of field value, or None if missing 256 """ 257 value = json_result.get(field, "") 258 if isinstance(value, int): 259 # Leaving this here in case someone decides to fix the types of the 260 # symbolizer output, so it's easier to roll out. 261 return value 262 if value != "": 263 return int(value, 16) 264 return None 265 266 267def _GetJSONSymbolizerForLib(lib, args=None): 268 """ Find symbol file for lib, and return a llvm-symbolizer instance for it. 269 270 Args: 271 lib: library (or executable) pathname containing symbols 272 args: (optional) list of arguments to pass to llvm-symbolizer 273 274 Returns: 275 child process, or None if lib not found 276 """ 277 if args is None: 278 args = [] 279 symbols = SYMBOLS_DIR + lib 280 if not os.path.exists(symbols): 281 symbols = lib 282 if not os.path.exists(symbols): 283 return None 284 285 # Make sure the symbols path is not a directory. 286 if os.path.isdir(symbols): 287 return None 288 289 cmd = [ToolPath("llvm-symbolizer"), "--output-style=JSON"] + args + ["--obj=" + symbols] 290 return _PIPE_ADDR2LINE_CACHE.GetProcess(cmd) 291 292 293def GetStackRecordsForSet(lib, unique_addrs): 294 """Look up stack record information for a set of addresses 295 296 Args: 297 lib: library (or executable) pathname containing symbols 298 unique_addrs: set of integer addresses look up. 299 300 Returns: 301 A list of tuples 302 (addr, function_name, local_name, file_line, frame_offset, size, tag_offset) 303 describing the local variables of the stack frame. 304 frame_offset, size, tag_offset may be None. 305 """ 306 child = _GetJSONSymbolizerForLib(lib) 307 if child is None: 308 return None 309 records = [] 310 for addr in unique_addrs: 311 child.stdin.write("FRAME 0x%x\n" % addr) 312 child.stdin.flush() 313 json_result = json.loads(child.stdout.readline().strip()) 314 for frame in json_result["Frame"]: 315 records.append( 316 (addr, 317 frame["FunctionName"], 318 frame["Name"], 319 frame["DeclFile"] + ":" + str(frame["DeclLine"]), 320 frame.get("FrameOffset"), 321 _OptionalStackRecordField(frame, "Size"), 322 _OptionalStackRecordField(frame, "TagOffset"))) 323 return records 324 325 326def CallLlvmSymbolizerForSet(lib, unique_addrs): 327 """Look up line and symbol information for a set of addresses. 328 329 Args: 330 lib: library (or executable) pathname containing symbols 331 unique_addrs: set of string hexidecimal addresses look up. 332 333 Returns: 334 A dictionary of the form {addr: [(symbol, file:line)]} where 335 each address has a list of associated symbols and locations 336 or an empty list if no symbol information was found. 337 338 If the function has been inlined then the list may contain 339 more than one element with the symbols for the most deeply 340 nested inlined location appearing first. 341 """ 342 if not lib: 343 return None 344 345 result = {} 346 addrs = sorted(unique_addrs) 347 348 if lib in _SYMBOL_INFORMATION_ADDR2LINE_CACHE: 349 addr_cache = _SYMBOL_INFORMATION_ADDR2LINE_CACHE[lib] 350 351 # Go through and handle all known addresses. 352 for x in range(len(addrs)): 353 next_addr = addrs.pop(0) 354 if next_addr in addr_cache: 355 result[next_addr] = addr_cache[next_addr] 356 else: 357 # Re-add, needs to be symbolized. 358 addrs.append(next_addr) 359 360 if not addrs: 361 # Everything was cached, we're done. 362 return result 363 else: 364 addr_cache = {} 365 _SYMBOL_INFORMATION_ADDR2LINE_CACHE[lib] = addr_cache 366 367 child = _GetJSONSymbolizerForLib( 368 lib, ["--functions", "--inlines", "--demangle"]) 369 if child is None: 370 return None 371 for addr in addrs: 372 try: 373 child.stdin.write("0x%s\n" % addr) 374 child.stdin.flush() 375 records = [] 376 json_result = json.loads(child.stdout.readline().strip()) 377 for symbol in json_result["Symbol"]: 378 function_name = symbol["FunctionName"] 379 # GNU style location: file_name:line_num 380 location = ("%s:%s" % (symbol["FileName"], symbol["Line"])) 381 records.append((function_name, location)) 382 except IOError as e: 383 # Remove the / in front of the library name to match other output. 384 records = [(None, lib[1:] + " ***Error: " + str(e))] 385 result[addr] = records 386 addr_cache[addr] = records 387 return result 388 389 390def CallObjdumpForSet(lib, unique_addrs): 391 """Use objdump to find out the names of the containing functions. 392 393 Args: 394 lib: library (or executable) pathname containing symbols 395 unique_addrs: set of string hexidecimal addresses to find the functions for. 396 397 Returns: 398 A dictionary of the form {addr: (string symbol, offset)}. 399 """ 400 if not lib: 401 return None 402 403 result = {} 404 addrs = sorted(unique_addrs) 405 406 addr_cache = None 407 if lib in _SYMBOL_INFORMATION_OBJDUMP_CACHE: 408 addr_cache = _SYMBOL_INFORMATION_OBJDUMP_CACHE[lib] 409 410 # Go through and handle all known addresses. 411 for x in range(len(addrs)): 412 next_addr = addrs.pop(0) 413 if next_addr in addr_cache: 414 result[next_addr] = addr_cache[next_addr] 415 else: 416 # Re-add, needs to be symbolized. 417 addrs.append(next_addr) 418 419 if not addrs: 420 # Everything was cached, we're done. 421 return result 422 else: 423 addr_cache = {} 424 _SYMBOL_INFORMATION_OBJDUMP_CACHE[lib] = addr_cache 425 426 symbols = SYMBOLS_DIR + lib 427 if not os.path.exists(symbols): 428 symbols = lib 429 if not os.path.exists(symbols): 430 return None 431 432 start_addr_dec = str(int(addrs[0], 16)) 433 stop_addr_dec = str(int(addrs[-1], 16) + 8) 434 cmd = [ToolPath("llvm-objdump"), 435 "--section=.text", 436 "--demangle", 437 "--disassemble", 438 "--start-address=" + start_addr_dec, 439 "--stop-address=" + stop_addr_dec, 440 symbols] 441 442 # Function lines look like: 443 # 000177b0 <android::IBinder::~IBinder()+0x2c>: 444 # We pull out the address and function first. Then we check for an optional 445 # offset. This is tricky due to functions that look like "operator+(..)+0x2c" 446 func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$") 447 offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)") 448 449 # A disassembly line looks like: 450 # 177b2: b510 push {r4, lr} 451 asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$") 452 453 current_symbol = None # The current function symbol in the disassembly. 454 current_symbol_addr = 0 # The address of the current function. 455 addr_index = 0 # The address that we are currently looking for. 456 457 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True).stdout 458 for line in stream: 459 # Is it a function line like: 460 # 000177b0 <android::IBinder::~IBinder()>: 461 components = func_regexp.match(line) 462 if components: 463 # This is a new function, so record the current function and its address. 464 current_symbol_addr = int(components.group(1), 16) 465 current_symbol = components.group(2) 466 467 # Does it have an optional offset like: "foo(..)+0x2c"? 468 components = offset_regexp.match(current_symbol) 469 if components: 470 current_symbol = components.group(1) 471 offset = components.group(2) 472 if offset: 473 current_symbol_addr -= int(offset, 16) 474 475 # Is it an disassembly line like: 476 # 177b2: b510 push {r4, lr} 477 components = asm_regexp.match(line) 478 if components: 479 addr = components.group(1) 480 target_addr = addrs[addr_index] 481 i_addr = int(addr, 16) 482 i_target = int(target_addr, 16) 483 if i_addr == i_target: 484 result[target_addr] = (current_symbol, i_target - current_symbol_addr) 485 addr_cache[target_addr] = result[target_addr] 486 addr_index += 1 487 if addr_index >= len(addrs): 488 break 489 stream.close() 490 491 return result 492 493 494def CallCppFilt(mangled_symbol): 495 if mangled_symbol in _SYMBOL_DEMANGLING_CACHE: 496 return _SYMBOL_DEMANGLING_CACHE[mangled_symbol] 497 498 global _CACHED_CXX_FILT 499 if not _CACHED_CXX_FILT: 500 toolchains = None 501 clang_dir = FindClangDir() 502 if clang_dir: 503 if os.path.exists(clang_dir + "/bin/llvm-cxxfilt"): 504 toolchains = [clang_dir + "/bin/llvm-cxxfilt"] 505 else: 506 raise Exception("bin/llvm-cxxfilt missing from " + clang_dir) 507 else: 508 # When run in CI, we don't have a way to find the clang version. But 509 # llvm-cxxfilt should be available in the following relative path. 510 toolchains = glob.glob("./clang-r*/bin/llvm-cxxfilt") 511 if toolchains and len(toolchains) != 1: 512 raise Exception("Expected one llvm-cxxfilt but found many: " + \ 513 ", ".join(toolchains)) 514 if not toolchains: 515 raise Exception("Could not find llvm-cxxfilt tool") 516 _CACHED_CXX_FILT = sorted(toolchains)[-1] 517 518 cmd = [_CACHED_CXX_FILT] 519 process = _PIPE_CPPFILT_CACHE.GetProcess(cmd) 520 process.stdin.write(mangled_symbol) 521 process.stdin.write("\n") 522 process.stdin.flush() 523 524 demangled_symbol = process.stdout.readline().strip() 525 526 _SYMBOL_DEMANGLING_CACHE[mangled_symbol] = demangled_symbol 527 528 return demangled_symbol 529 530 531def FormatSymbolWithOffset(symbol, offset): 532 if offset == 0: 533 return symbol 534 return "%s+%d" % (symbol, offset) 535 536def FormatSymbolWithoutParameters(symbol): 537 """Remove parameters from function. 538 539 Rather than trying to parse the demangled C++ signature, 540 it just removes matching top level parenthesis. 541 """ 542 if not symbol: 543 return symbol 544 545 result = symbol 546 result = result.replace(") const", ")") # Strip const keyword. 547 result = result.replace("operator<<", "operator\u00AB") # Avoid unmatched '<'. 548 result = result.replace("operator>>", "operator\u00BB") # Avoid unmatched '>'. 549 result = result.replace("operator->", "operator\u2192") # Avoid unmatched '>'. 550 551 nested = [] # Keeps tract of current nesting level of parenthesis. 552 for i in reversed(range(len(result))): # Iterate backward to make cutting easier. 553 c = result[i] 554 if c == ')' or c == '>': 555 if len(nested) == 0: 556 end = i + 1 # Mark the end of top-level pair. 557 nested.append(c) 558 if c == '(' or c == '<': 559 if len(nested) == 0 or {')':'(', '>':'<'}[nested.pop()] != c: 560 return symbol # Malformed: character does not match its pair. 561 if len(nested) == 0 and c == '(' and (end - i) > 2: 562 result = result[:i] + result[end:] # Remove substring (i, end). 563 if len(nested) > 0: 564 return symbol # Malformed: missing pair. 565 566 return result.strip() 567 568def SetBitness(lines): 569 global ARCH_IS_32BIT 570 571 trace_line = re.compile("\#[0-9]+[ \t]+..[ \t]+([0-9a-f]{8}|[0-9a-f]{16})([ \t]+|$)") 572 asan_trace_line = re.compile("\#[0-9]+[ \t]+0x([0-9a-f]+)[ \t]+") 573 574 ARCH_IS_32BIT = False 575 for line in lines: 576 trace_match = trace_line.search(line) 577 if trace_match: 578 # Try to guess the arch, we know the bitness. 579 if len(trace_match.group(1)) == 16: 580 ARCH_IS_32BIT = False 581 else: 582 ARCH_IS_32BIT = True 583 break 584 asan_trace_match = asan_trace_line.search(line) 585 if asan_trace_match: 586 # We might be able to guess the bitness by the length of the address. 587 if len(asan_trace_match.group(1)) > 8: 588 ARCH_IS_32BIT = False 589 # We know for a fact this is 64 bit, so we are done. 590 break 591 else: 592 # This might be 32 bit, or just a small address. Keep going in this 593 # case, but if we couldn't figure anything else out, go with 32 bit. 594 ARCH_IS_32BIT = True 595 596class FindClangDirTests(unittest.TestCase): 597 @unittest.skipIf(ANDROID_BUILD_TOP == '.', 'Test only supported in an Android tree.') 598 def test_clang_dir_found(self): 599 self.assertIsNotNone(FindClangDir()) 600 601class SetBitnessTests(unittest.TestCase): 602 def test_32bit_check(self): 603 global ARCH_IS_32BIT 604 605 SetBitness(["#00 pc 000374e0"]) 606 self.assertTrue(ARCH_IS_32BIT) 607 608 def test_64bit_check(self): 609 global ARCH_IS_32BIT 610 611 SetBitness(["#00 pc 00000000000374e0"]) 612 self.assertFalse(ARCH_IS_32BIT) 613 614 def test_32bit_asan_trace_line_toolchain(self): 615 global ARCH_IS_32BIT 616 617 SetBitness(["#10 0xb5eeba5d (/system/vendor/lib/egl/libGLESv1_CM_adreno.so+0xfa5d)"]) 618 self.assertTrue(ARCH_IS_32BIT) 619 620 def test_64bit_asan_trace_line_toolchain(self): 621 global ARCH_IS_32BIT 622 623 SetBitness(["#12 0x5d33bf (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)", 624 "#12 0x11b35d33bf (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)"]) 625 self.assertFalse(ARCH_IS_32BIT) 626 627class FormatSymbolWithoutParametersTests(unittest.TestCase): 628 def test_c(self): 629 self.assertEqual(FormatSymbolWithoutParameters("foo"), "foo") 630 self.assertEqual(FormatSymbolWithoutParameters("foo+42"), "foo+42") 631 632 def test_simple(self): 633 self.assertEqual(FormatSymbolWithoutParameters("foo(int i)"), "foo") 634 self.assertEqual(FormatSymbolWithoutParameters("foo(int i)+42"), "foo+42") 635 self.assertEqual(FormatSymbolWithoutParameters("bar::foo(int i)+42"), "bar::foo+42") 636 self.assertEqual(FormatSymbolWithoutParameters("operator()"), "operator()") 637 638 def test_templates(self): 639 self.assertEqual(FormatSymbolWithoutParameters("bar::foo<T>(vector<T>& v)"), "bar::foo<T>") 640 self.assertEqual(FormatSymbolWithoutParameters("bar<T>::foo(vector<T>& v)"), "bar<T>::foo") 641 self.assertEqual(FormatSymbolWithoutParameters("bar::foo<T>(vector<T<U>>& v)"), "bar::foo<T>") 642 self.assertEqual(FormatSymbolWithoutParameters("bar::foo<(EnumType)0>(vector<(EnumType)0>& v)"), 643 "bar::foo<(EnumType)0>") 644 645 def test_nested(self): 646 self.assertEqual(FormatSymbolWithoutParameters("foo(int i)::bar(int j)"), "foo::bar") 647 648 def test_unbalanced(self): 649 self.assertEqual(FormatSymbolWithoutParameters("foo(bar(int i)"), "foo(bar(int i)") 650 self.assertEqual(FormatSymbolWithoutParameters("foo)bar(int i)"), "foo)bar(int i)") 651 self.assertEqual(FormatSymbolWithoutParameters("foo<bar(int i)"), "foo<bar(int i)") 652 self.assertEqual(FormatSymbolWithoutParameters("foo>bar(int i)"), "foo>bar(int i)") 653 654if __name__ == '__main__': 655 unittest.main(verbosity=2) 656