1#!/usr/bin/env python3 2# 3# Copyright (C) 2016 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""utils.py: export utility functions. 19""" 20 21from __future__ import annotations 22import argparse 23from concurrent.futures import Future, ThreadPoolExecutor 24from dataclasses import dataclass 25import logging 26import os 27import os.path 28from pathlib import Path 29import re 30import shutil 31import subprocess 32import sys 33import time 34from typing import Any, Dict, Iterator, List, Optional, Set, Tuple, Union, TextIO 35 36 37NDK_ERROR_MESSAGE = "Please install the Android NDK (https://developer.android.com/studio/projects/install-ndk), then set NDK path with --ndk_path option." 38 39 40def get_script_dir() -> str: 41 return os.path.dirname(os.path.realpath(__file__)) 42 43 44def is_windows() -> bool: 45 return sys.platform == 'win32' or sys.platform == 'cygwin' 46 47 48def is_darwin() -> bool: 49 return sys.platform == 'darwin' 50 51 52def get_platform() -> str: 53 if is_windows(): 54 return 'windows' 55 if is_darwin(): 56 return 'darwin' 57 return 'linux' 58 59 60def str_to_bytes(str_value: str) -> bytes: 61 # In python 3, str are wide strings whereas the C api expects 8 bit strings, 62 # hence we have to convert. For now using utf-8 as the encoding. 63 return str_value.encode('utf-8') 64 65 66def bytes_to_str(bytes_value: Optional[bytes]) -> str: 67 if not bytes_value: 68 return '' 69 return bytes_value.decode('utf-8') 70 71 72def get_target_binary_path(arch: str, binary_name: str) -> str: 73 if arch == 'aarch64': 74 arch = 'arm64' 75 arch_dir = os.path.join(get_script_dir(), "bin", "android", arch) 76 if not os.path.isdir(arch_dir): 77 log_fatal("can't find arch directory: %s" % arch_dir) 78 binary_path = os.path.join(arch_dir, binary_name) 79 if not os.path.isfile(binary_path): 80 log_fatal("can't find binary: %s" % binary_path) 81 return binary_path 82 83 84def get_host_binary_path(binary_name: str) -> str: 85 dirname = os.path.join(get_script_dir(), 'bin') 86 if is_windows(): 87 if binary_name.endswith('.so'): 88 binary_name = binary_name[0:-3] + '.dll' 89 elif '.' not in binary_name: 90 binary_name += '.exe' 91 dirname = os.path.join(dirname, 'windows') 92 elif sys.platform == 'darwin': # OSX 93 if binary_name.endswith('.so'): 94 binary_name = binary_name[0:-3] + '.dylib' 95 dirname = os.path.join(dirname, 'darwin') 96 else: 97 dirname = os.path.join(dirname, 'linux') 98 dirname = os.path.join(dirname, 'x86_64' if sys.maxsize > 2 ** 32 else 'x86') 99 binary_path = os.path.join(dirname, binary_name) 100 if not os.path.isfile(binary_path): 101 log_fatal("can't find binary: %s" % binary_path) 102 return binary_path 103 104 105def is_executable_available(executable: str, option='--help') -> bool: 106 """ Run an executable to see if it exists. """ 107 try: 108 subproc = subprocess.Popen([executable, option], stdout=subprocess.PIPE, 109 stderr=subprocess.PIPE) 110 subproc.communicate() 111 return subproc.returncode == 0 112 except OSError: 113 return False 114 115 116class ToolFinder: 117 """ Find tools in ndk or sdk. """ 118 DEFAULT_SDK_PATH = { 119 'darwin': 'Library/Android/sdk', 120 'linux': 'Android/Sdk', 121 'windows': 'AppData/Local/Android/sdk', 122 } 123 124 EXPECTED_TOOLS = { 125 'adb': { 126 'is_binutils': False, 127 'test_option': 'version', 128 'path_in_sdk': 'platform-tools/adb', 129 }, 130 'llvm-objdump': { 131 'is_binutils': False, 132 'path_in_ndk': 133 lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-objdump' % platform, 134 }, 135 'llvm-readelf': { 136 'is_binutils': False, 137 'path_in_ndk': 138 lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-readelf' % platform, 139 }, 140 'llvm-symbolizer': { 141 'is_binutils': False, 142 'path_in_ndk': 143 lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-symbolizer' % platform, 144 }, 145 'llvm-strip': { 146 'is_binutils': False, 147 'path_in_ndk': 148 lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-strip' % platform, 149 }, 150 } 151 152 @classmethod 153 def find_ndk_and_sdk_paths(cls, ndk_path: Optional[str] = None 154 ) -> Iterator[Tuple[Optional[str], Optional[str]]]: 155 # Use the given ndk path. 156 if ndk_path and os.path.isdir(ndk_path): 157 ndk_path = os.path.abspath(ndk_path) 158 yield ndk_path, cls.find_sdk_path(ndk_path) 159 # Find ndk in the parent directory containing simpleperf scripts. 160 ndk_path = os.path.dirname(os.path.abspath(get_script_dir())) 161 yield ndk_path, cls.find_sdk_path(ndk_path) 162 # Find ndk in the default sdk installation path. 163 if is_windows(): 164 home = os.environ.get('HOMEDRIVE') + os.environ.get('HOMEPATH') 165 else: 166 home = os.environ.get('HOME') 167 if home: 168 platform = get_platform() 169 sdk_path = os.path.join(home, cls.DEFAULT_SDK_PATH[platform].replace('/', os.sep)) 170 if os.path.isdir(sdk_path): 171 path = os.path.join(sdk_path, 'ndk') 172 if os.path.isdir(path): 173 # Android Studio can install multiple ndk versions in 'ndk'. 174 # Find the newest one. 175 ndk_version = None 176 for name in os.listdir(path): 177 if not ndk_version or ndk_version < name: 178 ndk_version = name 179 if ndk_version: 180 yield os.path.join(path, ndk_version), sdk_path 181 ndk_path = os.path.join(sdk_path, 'ndk-bundle') 182 if os.path.isdir(ndk_path): 183 yield ndk_path, sdk_path 184 185 @classmethod 186 def find_sdk_path(cls, ndk_path: str) -> Optional[str]: 187 path = ndk_path 188 for _ in range(2): 189 path = os.path.dirname(path) 190 if os.path.isdir(os.path.join(path, 'platform-tools')): 191 return path 192 return None 193 194 @classmethod 195 def _get_binutils_path_in_ndk(cls, toolname: str, arch: Optional[str], platform: str 196 ) -> Tuple[str, str]: 197 if not arch: 198 arch = 'arm64' 199 if arch == 'arm64': 200 name = 'aarch64-linux-android-' + toolname 201 elif arch == 'arm': 202 name = 'arm-linux-androideabi-' + toolname 203 elif arch == 'x86_64': 204 name = 'x86_64-linux-android-' + toolname 205 elif arch == 'x86': 206 name = 'i686-linux-android-' + toolname 207 else: 208 log_fatal('unexpected arch %s' % arch) 209 path = 'toolchains/llvm/prebuilt/%s-x86_64/bin/%s' % (platform, name) 210 return (name, path) 211 212 @classmethod 213 def find_tool_path(cls, toolname: str, ndk_path: Optional[str] = None, 214 arch: Optional[str] = None) -> Optional[str]: 215 tool_info = cls.EXPECTED_TOOLS.get(toolname) 216 if not tool_info: 217 return None 218 219 is_binutils = tool_info['is_binutils'] 220 test_option = tool_info.get('test_option', '--help') 221 platform = get_platform() 222 223 # Find tool in clang prebuilts in Android platform. 224 if toolname.startswith('llvm-') and platform == 'linux' and get_script_dir().endswith( 225 'system/extras/simpleperf/scripts'): 226 path = str( 227 Path(get_script_dir()).parents[3] / 'prebuilts' / 'clang' / 'host' / 'linux-x86' / 228 'llvm-binutils-stable' / toolname) 229 if is_executable_available(path, test_option): 230 return path 231 232 # Find tool in NDK or SDK. 233 path_in_ndk = None 234 path_in_sdk = None 235 if is_binutils: 236 toolname_with_arch, path_in_ndk = cls._get_binutils_path_in_ndk( 237 toolname, arch, platform) 238 else: 239 toolname_with_arch = toolname 240 if 'path_in_ndk' in tool_info: 241 path_in_ndk = tool_info['path_in_ndk'](platform) 242 elif 'path_in_sdk' in tool_info: 243 path_in_sdk = tool_info['path_in_sdk'] 244 if path_in_ndk: 245 path_in_ndk = path_in_ndk.replace('/', os.sep) 246 elif path_in_sdk: 247 path_in_sdk = path_in_sdk.replace('/', os.sep) 248 249 for ndk_dir, sdk_dir in cls.find_ndk_and_sdk_paths(ndk_path): 250 if path_in_ndk and ndk_dir: 251 path = os.path.join(ndk_dir, path_in_ndk) 252 if is_executable_available(path, test_option): 253 return path 254 elif path_in_sdk and sdk_dir: 255 path = os.path.join(sdk_dir, path_in_sdk) 256 if is_executable_available(path, test_option): 257 return path 258 259 # Find tool in $PATH. 260 if is_executable_available(toolname_with_arch, test_option): 261 return toolname_with_arch 262 263 # Find tool without arch in $PATH. 264 if is_binutils and tool_info.get('accept_tool_without_arch'): 265 if is_executable_available(toolname, test_option): 266 return toolname 267 return None 268 269 270class AdbHelper(object): 271 def __init__(self, enable_switch_to_root: bool = True): 272 adb_path = ToolFinder.find_tool_path('adb') 273 if not adb_path: 274 log_exit("Can't find adb in PATH environment.") 275 self.adb_path: str = adb_path 276 self.enable_switch_to_root = enable_switch_to_root 277 self.serial_number: Optional[str] = None 278 279 def is_device_available(self) -> bool: 280 return self.run_and_return_output(['shell', 'whoami'])[0] 281 282 def run(self, adb_args: List[str], log_output: bool = False, log_stderr: bool = False) -> bool: 283 return self.run_and_return_output(adb_args, log_output, log_stderr)[0] 284 285 def run_and_return_output(self, adb_args: List[str], log_output: bool = False, 286 log_stderr: bool = False) -> Tuple[bool, str]: 287 adb_args = [self.adb_path] + adb_args 288 logging.debug('run adb cmd: %s' % adb_args) 289 env = None 290 if self.serial_number: 291 env = os.environ.copy() 292 env['ANDROID_SERIAL'] = self.serial_number 293 subproc = subprocess.Popen( 294 adb_args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 295 stdout_data, stderr_data = subproc.communicate() 296 stdout_data = bytes_to_str(stdout_data) 297 stderr_data = bytes_to_str(stderr_data) 298 returncode = subproc.returncode 299 result = (returncode == 0) 300 if log_output and stdout_data: 301 logging.debug(stdout_data) 302 if log_stderr and stderr_data: 303 logging.warning(stderr_data) 304 logging.debug('run adb cmd: %s [result %s]' % (adb_args, result)) 305 return (result, stdout_data) 306 307 def check_run(self, adb_args: List[str], log_output: bool = False): 308 self.check_run_and_return_output(adb_args, log_output) 309 310 def check_run_and_return_output(self, adb_args: List[str], log_output: bool = False, 311 log_stderr: bool = False) -> str: 312 result, stdoutdata = self.run_and_return_output(adb_args, log_output, True) 313 if not result: 314 log_exit('run "adb %s" failed: %s' % (adb_args, stdoutdata)) 315 return stdoutdata 316 317 def _unroot(self): 318 result, stdoutdata = self.run_and_return_output(['shell', 'whoami']) 319 if not result: 320 return 321 if 'root' not in stdoutdata: 322 return 323 logging.info('unroot adb') 324 self.run(['unroot']) 325 time.sleep(1) 326 self.run(['wait-for-device']) 327 328 def switch_to_root(self) -> bool: 329 if not self.enable_switch_to_root: 330 self._unroot() 331 return False 332 result, stdoutdata = self.run_and_return_output(['shell', 'whoami']) 333 if not result: 334 return False 335 if 'root' in stdoutdata: 336 return True 337 build_type = self.get_property('ro.build.type') 338 if build_type == 'user': 339 return False 340 self.run(['root']) 341 time.sleep(1) 342 self.run(['wait-for-device']) 343 result, stdoutdata = self.run_and_return_output(['shell', 'whoami']) 344 return result and 'root' in stdoutdata 345 346 def get_property(self, name: str) -> Optional[str]: 347 result, stdoutdata = self.run_and_return_output(['shell', 'getprop', name]) 348 return stdoutdata.strip() if result else None 349 350 def set_property(self, name: str, value: str) -> bool: 351 return self.run(['shell', 'setprop', name, value]) 352 353 def get_device_arch(self) -> str: 354 output = self.check_run_and_return_output(['shell', 'uname', '-m']) 355 if 'aarch64' in output: 356 return 'arm64' 357 if 'arm' in output: 358 return 'arm' 359 if 'x86_64' in output: 360 return 'x86_64' 361 if '86' in output: 362 return 'x86' 363 if 'riscv64' in output: 364 return 'riscv64' 365 log_fatal('unsupported architecture: %s' % output.strip()) 366 return '' 367 368 def get_android_version(self) -> int: 369 """ Get Android version on device, like 7 is for Android N, 8 is for Android O.""" 370 def parse_version(s: str) -> int: 371 if not s: 372 return 0 373 if s[0].isdigit(): 374 i = 1 375 while i < len(s) and s[i].isdigit(): 376 i += 1 377 return int(s[:i]) 378 else: 379 c = s[0].upper() 380 if c.isupper() and 'L' <= c <= 'V': 381 return ord(c) - ord('L') + 5 382 return 0 383 384 android_version = 0 385 s = self.get_property('ro.build.version.codename') 386 if s != 'REL': 387 android_version = parse_version(s) 388 if android_version == 0: 389 s = self.get_property('ro.build.version.release') 390 android_version = parse_version(s) 391 if android_version == 0: 392 s = self.get_property('ro.build.version.sdk') 393 if int(s) >= 35: 394 android_version = 15 395 return android_version 396 397 398def flatten_arg_list(arg_list: List[List[str]]) -> List[str]: 399 res = [] 400 if arg_list: 401 for items in arg_list: 402 res += items 403 return res 404 405 406def remove(dir_or_file: Union[Path, str]): 407 if os.path.isfile(dir_or_file): 408 os.remove(dir_or_file) 409 elif os.path.isdir(dir_or_file): 410 shutil.rmtree(dir_or_file, ignore_errors=True) 411 412 413def open_report_in_browser(report_path: str): 414 if is_darwin(): 415 # On darwin 10.12.6, webbrowser can't open browser, so try `open` cmd first. 416 try: 417 subprocess.check_call(['open', report_path]) 418 return 419 except subprocess.CalledProcessError: 420 pass 421 import webbrowser 422 try: 423 # Try to open the report with Chrome 424 browser = webbrowser.get('google-chrome') 425 browser.open(report_path, new=0, autoraise=True) 426 except webbrowser.Error: 427 # webbrowser.get() doesn't work well on darwin/windows. 428 webbrowser.open_new_tab(report_path) 429 430 431class BinaryFinder: 432 def __init__(self, binary_cache_dir: Optional[Union[Path, str]], readelf: ReadElf): 433 if isinstance(binary_cache_dir, str): 434 binary_cache_dir = Path(binary_cache_dir) 435 self.binary_cache_dir = binary_cache_dir 436 self.readelf = readelf 437 self.build_id_map = self._load_build_id_map() 438 439 def _load_build_id_map(self) -> Dict[str, Path]: 440 build_id_map: Dict[str, Path] = {} 441 if self.binary_cache_dir: 442 build_id_list_file = self.binary_cache_dir / 'build_id_list' 443 if build_id_list_file.is_file(): 444 with open(self.binary_cache_dir / 'build_id_list', 'rb') as fh: 445 for line in fh.readlines(): 446 # lines are in format "<build_id>=<path_in_binary_cache>". 447 items = bytes_to_str(line).strip().split('=') 448 if len(items) == 2: 449 build_id_map[items[0]] = self.binary_cache_dir / items[1] 450 return build_id_map 451 452 def find_binary(self, dso_path_in_record_file: str, 453 expected_build_id: Optional[str]) -> Optional[Path]: 454 """ If expected_build_id is None, don't check build id. 455 Otherwise, the build id of the found binary should match the expected one.""" 456 # Find binary from build id map. 457 if expected_build_id: 458 path = self.build_id_map.get(expected_build_id) 459 if path and self._check_path(path, expected_build_id): 460 return path 461 # Find binary by path in binary cache. 462 if self.binary_cache_dir: 463 path = self.binary_cache_dir / dso_path_in_record_file[1:].replace('/', os.sep) 464 if self._check_path(path, expected_build_id): 465 return path 466 # Find binary by its absolute path. 467 path = Path(dso_path_in_record_file) 468 if self._check_path(path, expected_build_id): 469 return path 470 return None 471 472 def _check_path(self, path: Path, expected_build_id: Optional[str]) -> bool: 473 if not self.readelf.is_elf_file(path): 474 return False 475 if expected_build_id is not None: 476 return self.readelf.get_build_id(path) == expected_build_id 477 return True 478 479 480class Addr2Nearestline(object): 481 """ Use llvm-symbolizer to convert (dso_path, func_addr, addr) to (source_file, line). 482 For instructions generated by C++ compilers without a matching statement in source code 483 (like stack corruption check, switch optimization, etc.), addr2line can't generate 484 line information. However, we want to assign the instruction to the nearest line before 485 the instruction (just like objdump -dl). So we use below strategy: 486 Instead of finding the exact line of the instruction in an address, we find the nearest 487 line to the instruction in an address. If an address doesn't have a line info, we find 488 the line info of address - 1. If still no line info, then use address - 2, address - 3, 489 etc. 490 491 The implementation steps are as below: 492 1. Collect all (dso_path, func_addr, addr) requests before converting. This saves the 493 times to call addr2line. 494 2. Convert addrs to (source_file, line) pairs for each dso_path as below: 495 2.1 Check if the dso_path has .debug_line. If not, omit its conversion. 496 2.2 Get arch of the dso_path, and decide the addr_step for it. addr_step is the step we 497 change addr each time. For example, since instructions of arm64 are all 4 bytes long, 498 addr_step for arm64 can be 4. 499 2.3 Use addr2line to find line info for each addr in the dso_path. 500 2.4 For each addr without line info, use addr2line to find line info for 501 range(addr - addr_step, addr - addr_step * 4 - 1, -addr_step). 502 2.5 For each addr without line info, use addr2line to find line info for 503 range(addr - addr_step * 5, addr - addr_step * 128 - 1, -addr_step). 504 (128 is a guess number. A nested switch statement in 505 system/core/demangle/Demangler.cpp has >300 bytes without line info in arm64.) 506 """ 507 class Dso(object): 508 """ Info of a dynamic shared library. 509 addrs: a map from address to Addr object in this dso. 510 """ 511 512 def __init__(self, build_id: Optional[str]): 513 self.build_id = build_id 514 self.addrs: Dict[int, Addr2Nearestline.Addr] = {} 515 # Saving file names for each addr takes a lot of memory. So we store file ids in Addr, 516 # and provide data structures connecting file id and file name here. 517 self.file_name_to_id: Dict[str, int] = {} 518 self.file_id_to_name: List[str] = [] 519 self.func_name_to_id: Dict[str, int] = {} 520 self.func_id_to_name: List[str] = [] 521 522 def get_file_id(self, file_path: str) -> int: 523 file_id = self.file_name_to_id.get(file_path) 524 if file_id is None: 525 file_id = self.file_name_to_id[file_path] = len(self.file_id_to_name) 526 self.file_id_to_name.append(file_path) 527 return file_id 528 529 def get_func_id(self, func_name: str) -> int: 530 func_id = self.func_name_to_id.get(func_name) 531 if func_id is None: 532 func_id = self.func_name_to_id[func_name] = len(self.func_id_to_name) 533 self.func_id_to_name.append(func_name) 534 return func_id 535 536 class Addr(object): 537 """ Info of an addr request. 538 func_addr: start_addr of the function containing addr. 539 source_lines: a list of [file_id, line_number] for addr. 540 source_lines[:-1] are all for inlined functions. 541 """ 542 543 def __init__(self, func_addr: int): 544 self.func_addr = func_addr 545 self.source_lines: Optional[List[int, int]] = None 546 547 def __init__( 548 self, ndk_path: Optional[str], 549 binary_finder: BinaryFinder, with_function_name: bool): 550 self.symbolizer_path = ToolFinder.find_tool_path('llvm-symbolizer', ndk_path) 551 if not self.symbolizer_path: 552 log_exit("Can't find llvm-symbolizer. " + NDK_ERROR_MESSAGE) 553 self.readelf = ReadElf(ndk_path) 554 self.dso_map: Dict[str, Addr2Nearestline.Dso] = {} # map from dso_path to Dso. 555 self.binary_finder = binary_finder 556 self.with_function_name = with_function_name 557 558 def add_addr(self, dso_path: str, build_id: Optional[str], func_addr: int, addr: int): 559 dso = self.dso_map.get(dso_path) 560 if dso is None: 561 dso = self.dso_map[dso_path] = self.Dso(build_id) 562 if addr not in dso.addrs: 563 dso.addrs[addr] = self.Addr(func_addr) 564 565 def convert_addrs_to_lines(self, jobs: int): 566 with ThreadPoolExecutor(jobs) as executor: 567 futures: List[Future] = [] 568 for dso_path, dso in self.dso_map.items(): 569 futures.append(executor.submit(self._convert_addrs_in_one_dso, dso_path, dso)) 570 for future in futures: 571 # Call future.result() to report exceptions raised in the executor. 572 future.result() 573 574 def _convert_addrs_in_one_dso(self, dso_path: str, dso: Addr2Nearestline.Dso): 575 real_path = self.binary_finder.find_binary(dso_path, dso.build_id) 576 if not real_path: 577 if dso_path not in ['//anon', 'unknown', '[kernel.kallsyms]']: 578 logging.debug("Can't find dso %s" % dso_path) 579 return 580 581 if not self._check_debug_line_section(real_path): 582 logging.debug("file %s doesn't contain .debug_line section." % real_path) 583 return 584 585 addr_step = self._get_addr_step(real_path) 586 self._collect_line_info(dso, real_path, [0]) 587 self._collect_line_info(dso, real_path, range(-addr_step, -addr_step * 4 - 1, -addr_step)) 588 self._collect_line_info(dso, real_path, 589 range(-addr_step * 5, -addr_step * 128 - 1, -addr_step)) 590 591 def _check_debug_line_section(self, real_path: Path) -> bool: 592 return '.debug_line' in self.readelf.get_sections(real_path) 593 594 def _get_addr_step(self, real_path: Path) -> int: 595 arch = self.readelf.get_arch(real_path) 596 if arch == 'arm64': 597 return 4 598 if arch == 'arm': 599 return 2 600 return 1 601 602 def _collect_line_info( 603 self, dso: Addr2Nearestline.Dso, real_path: Path, addr_shifts: List[int]): 604 """ Use addr2line to get line info in a dso, with given addr shifts. """ 605 # 1. Collect addrs to send to addr2line. 606 addr_set: Set[int] = set() 607 for addr in dso.addrs: 608 addr_obj = dso.addrs[addr] 609 if addr_obj.source_lines: # already has source line, no need to search. 610 continue 611 for shift in addr_shifts: 612 # The addr after shift shouldn't change to another function. 613 shifted_addr = max(addr + shift, addr_obj.func_addr) 614 addr_set.add(shifted_addr) 615 if shifted_addr == addr_obj.func_addr: 616 break 617 if not addr_set: 618 return 619 addr_request = '\n'.join(['0x%x' % addr for addr in sorted(addr_set)]) 620 621 # 2. Use addr2line to collect line info. 622 try: 623 subproc = subprocess.Popen(self._build_symbolizer_args(real_path), 624 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 625 (stdoutdata, _) = subproc.communicate(str_to_bytes(addr_request)) 626 stdoutdata = bytes_to_str(stdoutdata) 627 except OSError: 628 return 629 addr_map = self.parse_line_output(stdoutdata, dso) 630 631 # 3. Fill line info in dso.addrs. 632 for addr in dso.addrs: 633 addr_obj = dso.addrs[addr] 634 if addr_obj.source_lines: 635 continue 636 for shift in addr_shifts: 637 shifted_addr = max(addr + shift, addr_obj.func_addr) 638 lines = addr_map.get(shifted_addr) 639 if lines: 640 addr_obj.source_lines = lines 641 break 642 if shifted_addr == addr_obj.func_addr: 643 break 644 645 def _build_symbolizer_args(self, binary_path: Path) -> List[str]: 646 args = [self.symbolizer_path, '--print-address', '--inlining', '--obj=%s' % binary_path] 647 if self.with_function_name: 648 args += ['--functions=linkage', '--demangle'] 649 else: 650 args.append('--functions=none') 651 return args 652 653 def parse_line_output(self, output: str, dso: Addr2Nearestline.Dso) -> Dict[int, 654 List[Tuple[int]]]: 655 """ 656 The output is a list of lines. 657 address1 658 function_name1 (the function name can be empty) 659 source_location1 660 function_name2 661 source_location2 662 ... 663 (end with empty line) 664 """ 665 666 addr_map: Dict[int, List[Tuple[int]]] = {} 667 lines = output.strip().splitlines() 668 i = 0 669 while i < len(lines): 670 address = self._parse_line_output_address(lines[i]) 671 i += 1 672 if address is None: 673 continue 674 info = [] 675 while i < len(lines): 676 if self.with_function_name: 677 if i + 1 == len(lines): 678 break 679 function_name = lines[i].strip() 680 if not function_name and (':' not in lines[i+1]): 681 # no more frames 682 break 683 i += 1 684 elif not lines[i]: 685 i += 1 686 break 687 688 file_path, line_number = self._parse_line_output_source_location(lines[i]) 689 i += 1 690 if not file_path or not line_number: 691 # An addr can have a list of (file, line), when the addr belongs to an inlined 692 # function. Sometimes only part of the list has ? mark. In this case, we think 693 # the line info is valid if the first line doesn't have ? mark. 694 if not info: 695 break 696 continue 697 file_id = dso.get_file_id(file_path) 698 if self.with_function_name: 699 func_id = dso.get_func_id(function_name) 700 info.append((file_id, line_number, func_id)) 701 else: 702 info.append((file_id, line_number)) 703 if info: 704 addr_map[address] = info 705 return addr_map 706 707 def _parse_line_output_address(self, output: str) -> Optional[int]: 708 if output.startswith('0x'): 709 return int(output, 16) 710 return None 711 712 def _parse_line_output_source_location(self, line: str) -> Tuple[Optional[str], Optional[int]]: 713 file_path, line_number = None, None 714 # Handle lines in format filename:line:column, like "runtest/two_functions.cpp:14:25". 715 # Filename may contain ':' like "C:\Users\...\file". 716 items = line.rsplit(':', 2) 717 if len(items) == 3: 718 file_path, line_number = items[:2] 719 if not file_path or ('?' in file_path) or not line_number or ('?' in line_number): 720 return None, None 721 try: 722 line_number = int(line_number) 723 except ValueError: 724 return None, None 725 return file_path, line_number 726 727 def get_dso(self, dso_path: str) -> Addr2Nearestline.Dso: 728 return self.dso_map.get(dso_path) 729 730 def get_addr_source(self, dso: Addr2Nearestline.Dso, addr: int) -> Optional[List[Tuple[int]]]: 731 source = dso.addrs[addr].source_lines 732 if source is None: 733 return None 734 if self.with_function_name: 735 return [(dso.file_id_to_name[file_id], line, dso.func_id_to_name[func_id]) 736 for (file_id, line, func_id) in source] 737 return [(dso.file_id_to_name[file_id], line) for (file_id, line) in source] 738 739 740class SourceFileSearcher(object): 741 """ Find source file paths in the file system. 742 The file paths reported by addr2line are the paths stored in debug sections 743 of shared libraries. And we need to convert them to file paths in the file 744 system. It is done in below steps: 745 1. Collect all file paths under the provided source_dirs. The suffix of a 746 source file should contain one of below: 747 h: for C/C++ header files. 748 c: for C/C++ source files. 749 java: for Java source files. 750 kt: for Kotlin source files. 751 2. Given an abstract_path reported by addr2line, select the best real path 752 as below: 753 2.1 Find all real paths with the same file name as the abstract path. 754 2.2 Select the real path having the longest common suffix with the abstract path. 755 """ 756 757 SOURCE_FILE_EXTS = {'.h', '.hh', '.H', '.hxx', '.hpp', '.h++', 758 '.c', '.cc', '.C', '.cxx', '.cpp', '.c++', 759 '.java', '.kt'} 760 761 @classmethod 762 def is_source_filename(cls, filename: str) -> bool: 763 ext = os.path.splitext(filename)[1] 764 return ext in cls.SOURCE_FILE_EXTS 765 766 def __init__(self, source_dirs: List[str]): 767 # Map from filename to a list of reversed directory path containing filename. 768 self.filename_to_rparents: Dict[str, List[str]] = {} 769 self._collect_paths(source_dirs) 770 771 def _collect_paths(self, source_dirs: List[str]): 772 for source_dir in source_dirs: 773 for parent, _, file_names in os.walk(source_dir): 774 rparent = None 775 for file_name in file_names: 776 if self.is_source_filename(file_name): 777 rparents = self.filename_to_rparents.get(file_name) 778 if rparents is None: 779 rparents = self.filename_to_rparents[file_name] = [] 780 if rparent is None: 781 rparent = parent[::-1] 782 rparents.append(rparent) 783 784 def get_real_path(self, abstract_path: str) -> Optional[str]: 785 abstract_path = abstract_path.replace('/', os.sep) 786 abstract_parent, file_name = os.path.split(abstract_path) 787 abstract_rparent = abstract_parent[::-1] 788 real_rparents = self.filename_to_rparents.get(file_name) 789 if real_rparents is None: 790 return None 791 best_matched_rparent = None 792 best_common_length = -1 793 for real_rparent in real_rparents: 794 length = len(os.path.commonprefix((real_rparent, abstract_rparent))) 795 if length > best_common_length: 796 best_common_length = length 797 best_matched_rparent = real_rparent 798 if best_matched_rparent is None: 799 return None 800 return os.path.join(best_matched_rparent[::-1], file_name) 801 802 803class AddrRange: 804 def __init__(self, start: int, len: int): 805 self.start = start 806 self.len = len 807 808 @property 809 def end(self) -> int: 810 return self.start + self.len 811 812 def is_in_range(self, addr: int) -> bool: 813 return addr >= self.start and addr < self.end 814 815 816class Disassembly: 817 def __init__(self): 818 self.lines: List[Tuple[str, int]] = [] 819 820 821class Objdump(object): 822 """ A wrapper of objdump to disassemble code. """ 823 824 def __init__(self, ndk_path: Optional[str], binary_finder: BinaryFinder): 825 self.ndk_path = ndk_path 826 self.binary_finder = binary_finder 827 self.readelf = ReadElf(ndk_path) 828 self.objdump_paths: Dict[str, str] = {} 829 830 def get_dso_info(self, dso_path: str, expected_build_id: Optional[str] 831 ) -> Optional[Tuple[str, str]]: 832 real_path = self.binary_finder.find_binary(dso_path, expected_build_id) 833 if not real_path: 834 return None 835 arch = self.readelf.get_arch(real_path) 836 if arch == 'unknown': 837 return None 838 return (str(real_path), arch) 839 840 def disassemble_function(self, dso_info, addr_range: AddrRange) -> Optional[Disassembly]: 841 """ Disassemble code for an addr range in a binary. 842 """ 843 real_path, arch = dso_info 844 objdump_path = self.objdump_paths.get(arch) 845 if not objdump_path: 846 objdump_path = ToolFinder.find_tool_path('llvm-objdump', self.ndk_path, arch) 847 if not objdump_path: 848 log_exit("Can't find llvm-objdump." + NDK_ERROR_MESSAGE) 849 self.objdump_paths[arch] = objdump_path 850 851 # Run objdump. 852 args = [objdump_path, '-dlC', '--no-show-raw-insn', 853 '--start-address=0x%x' % addr_range.start, 854 '--stop-address=0x%x' % (addr_range.end), 855 real_path] 856 if arch == 'arm' and 'llvm-objdump' in objdump_path: 857 args += ['--print-imm-hex'] 858 logging.debug('disassembling: %s', ' '.join(args)) 859 try: 860 subproc = subprocess.Popen(args, stdout=subprocess.PIPE) 861 (stdoutdata, _) = subproc.communicate() 862 stdoutdata = bytes_to_str(stdoutdata) 863 except OSError: 864 return None 865 866 if not stdoutdata: 867 return None 868 result = Disassembly() 869 for line in stdoutdata.split('\n'): 870 line = line.rstrip() # Remove '\r' on Windows. 871 items = line.split(':', 1) 872 try: 873 addr = int(items[0], 16) 874 except ValueError: 875 addr = 0 876 result.lines.append((line, addr)) 877 return result 878 879 def disassemble_functions(self, dso_info, sorted_addr_ranges: List[AddrRange] 880 ) -> Optional[List[Disassembly]]: 881 """ Disassemble code for multiple addr ranges in a binary. sorted_addr_ranges should be 882 sorted by addr_range.start. 883 """ 884 if not sorted_addr_ranges: 885 return [] 886 real_path, arch = dso_info 887 objdump_path = self.objdump_paths.get(arch) 888 if not objdump_path: 889 objdump_path = ToolFinder.find_tool_path('llvm-objdump', self.ndk_path, arch) 890 if not objdump_path: 891 log_exit("Can't find llvm-objdump." + NDK_ERROR_MESSAGE) 892 self.objdump_paths[arch] = objdump_path 893 894 # Run objdump. 895 start_addr = sorted_addr_ranges[0].start 896 stop_addr = max(addr_range.end for addr_range in sorted_addr_ranges) 897 args = [objdump_path, '-dlC', '--no-show-raw-insn', 898 '--start-address=0x%x' % start_addr, 899 '--stop-address=0x%x' % stop_addr, 900 real_path] 901 if arch == 'arm' and 'llvm-objdump' in objdump_path: 902 args += ['--print-imm-hex'] 903 try: 904 proc = subprocess.Popen(args, stdout=subprocess.PIPE, text=True) 905 result = self._parse_disassembly_for_functions(proc.stdout, sorted_addr_ranges) 906 proc.wait() 907 except OSError: 908 return None 909 return result 910 911 def _parse_disassembly_for_functions(self, fh: TextIO, sorted_addr_ranges: List[AddrRange]) -> Optional[List[Disassembly]]: 912 current_id = 0 913 in_range = False 914 result = [Disassembly() for _ in sorted_addr_ranges] 915 while True: 916 line = fh.readline() 917 if not line: 918 break 919 line = line.rstrip() # Remove '\r\n'. 920 addr = self._get_addr_from_disassembly_line(line) 921 if current_id >= len(sorted_addr_ranges): 922 continue 923 if addr: 924 if in_range and not sorted_addr_ranges[current_id].is_in_range(addr): 925 in_range = False 926 if not in_range: 927 # Skip addr ranges before the current address. 928 while current_id < len(sorted_addr_ranges) and sorted_addr_ranges[current_id].end <= addr: 929 current_id += 1 930 if current_id < len(sorted_addr_ranges) and sorted_addr_ranges[current_id].is_in_range(addr): 931 in_range = True 932 if in_range: 933 result[current_id].lines.append((line, addr)) 934 return result 935 936 def _get_addr_from_disassembly_line(self, line: str) -> int: 937 # line may be an instruction, like: " 24a469c: stp x29, x30, [sp, #-0x60]!" or 938 # "ffffffc0085d9664: paciasp". 939 # line may be a function start point, like "00000000024a4698 <DoWork()>:". 940 items = line.strip().split() 941 if not items: 942 return 0 943 s = items[0] 944 if s.endswith(':'): 945 s = s[:-1] 946 try: 947 return int(s, 16) 948 except ValueError: 949 return 0 950 951 952class ReadElf(object): 953 """ A wrapper of readelf. """ 954 955 def __init__(self, ndk_path: Optional[str]): 956 self.readelf_path = ToolFinder.find_tool_path('llvm-readelf', ndk_path) 957 if not self.readelf_path: 958 log_exit("Can't find llvm-readelf. " + NDK_ERROR_MESSAGE) 959 960 @staticmethod 961 def is_elf_file(path: Union[Path, str]) -> bool: 962 if os.path.isfile(path): 963 with open(path, 'rb') as fh: 964 return fh.read(4) == b'\x7fELF' 965 return False 966 967 def get_arch(self, elf_file_path: Union[Path, str]) -> str: 968 """ Get arch of an elf file. """ 969 if self.is_elf_file(elf_file_path): 970 try: 971 output = subprocess.check_output([self.readelf_path, '-h', str(elf_file_path)]) 972 output = bytes_to_str(output) 973 if output.find('AArch64') != -1: 974 return 'arm64' 975 if output.find('ARM') != -1: 976 return 'arm' 977 if output.find('X86-64') != -1: 978 return 'x86_64' 979 if output.find('80386') != -1: 980 return 'x86' 981 if output.find('RISC-V') != -1: 982 return 'riscv64' 983 except subprocess.CalledProcessError: 984 pass 985 return 'unknown' 986 987 def get_build_id(self, elf_file_path: Union[Path, str], with_padding=True) -> str: 988 """ Get build id of an elf file. """ 989 if self.is_elf_file(elf_file_path): 990 try: 991 output = subprocess.check_output([self.readelf_path, '-n', str(elf_file_path)]) 992 output = bytes_to_str(output) 993 result = re.search(r'Build ID:\s*(\S+)', output) 994 if result: 995 build_id = result.group(1) 996 if with_padding: 997 build_id = self.pad_build_id(build_id) 998 return build_id 999 except subprocess.CalledProcessError: 1000 pass 1001 return "" 1002 1003 @staticmethod 1004 def pad_build_id(build_id: str) -> str: 1005 """ Pad build id to 40 hex numbers (20 bytes). """ 1006 if len(build_id) < 40: 1007 build_id += '0' * (40 - len(build_id)) 1008 else: 1009 build_id = build_id[:40] 1010 return '0x' + build_id 1011 1012 @staticmethod 1013 def unpad_build_id(build_id: str) -> str: 1014 if build_id.startswith('0x'): 1015 build_id = build_id[2:] 1016 # Unpad build id as TrimZeroesFromBuildIDString() in quipper. 1017 padding = '0' * 8 1018 while build_id.endswith(padding): 1019 build_id = build_id[:-len(padding)] 1020 return build_id 1021 1022 def get_sections(self, elf_file_path: Union[Path, str]) -> List[str]: 1023 """ Get sections of an elf file. """ 1024 section_names: List[str] = [] 1025 if self.is_elf_file(elf_file_path): 1026 try: 1027 output = subprocess.check_output([self.readelf_path, '-SW', str(elf_file_path)]) 1028 output = bytes_to_str(output) 1029 for line in output.split('\n'): 1030 # Parse line like:" [ 1] .note.android.ident NOTE 0000000000400190 ...". 1031 result = re.search(r'^\s+\[\s*\d+\]\s(.+?)\s', line) 1032 if result: 1033 section_name = result.group(1).strip() 1034 if section_name: 1035 section_names.append(section_name) 1036 except subprocess.CalledProcessError: 1037 pass 1038 return section_names 1039 1040 1041def extant_dir(arg: str) -> str: 1042 """ArgumentParser type that only accepts extant directories. 1043 1044 Args: 1045 arg: The string argument given on the command line. 1046 Returns: The argument as a realpath. 1047 Raises: 1048 argparse.ArgumentTypeError: The given path isn't a directory. 1049 """ 1050 path = os.path.realpath(arg) 1051 if not os.path.isdir(path): 1052 raise argparse.ArgumentTypeError('{} is not a directory.'.format(path)) 1053 return path 1054 1055 1056def extant_file(arg: str) -> str: 1057 """ArgumentParser type that only accepts extant files. 1058 1059 Args: 1060 arg: The string argument given on the command line. 1061 Returns: The argument as a realpath. 1062 Raises: 1063 argparse.ArgumentTypeError: The given path isn't a file. 1064 """ 1065 path = os.path.realpath(arg) 1066 if not os.path.isfile(path): 1067 raise argparse.ArgumentTypeError('{} is not a file.'.format(path)) 1068 return path 1069 1070 1071def log_fatal(msg: str): 1072 raise Exception(msg) 1073 1074 1075def log_exit(msg: str): 1076 sys.exit(msg) 1077 1078 1079class LogFormatter(logging.Formatter): 1080 """ Use custom logging format. """ 1081 1082 def __init__(self): 1083 super().__init__('%(asctime)s [%(levelname)s] (%(filename)s:%(lineno)d) %(message)s') 1084 1085 def formatTime(self, record, datefmt): 1086 return super().formatTime(record, '%H:%M:%S') + ',%03d' % record.msecs 1087 1088 1089class Log: 1090 initialized = False 1091 1092 @classmethod 1093 def init(cls, log_level: str = 'info'): 1094 assert not cls.initialized 1095 cls.initialized = True 1096 cls.logger = logging.root 1097 cls.logger.setLevel(log_level.upper()) 1098 handler = logging.StreamHandler() 1099 handler.setFormatter(LogFormatter()) 1100 cls.logger.addHandler(handler) 1101 1102 1103class ArgParseFormatter( 1104 argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter): 1105 pass 1106 1107 1108@dataclass 1109class ReportLibOptions: 1110 show_art_frames: bool 1111 remove_method: List[str] 1112 trace_offcpu: str 1113 proguard_mapping_files: List[str] 1114 sample_filters: List[str] 1115 aggregate_threads: List[str] 1116 1117 1118class BaseArgumentParser(argparse.ArgumentParser): 1119 def __init__(self, *args, **kwargs): 1120 super().__init__(*args, **kwargs, formatter_class=ArgParseFormatter) 1121 self.has_sample_filter_options = False 1122 self.sample_filter_with_pid_shortcut = False 1123 self.has_report_lib_options = False 1124 1125 def add_report_lib_options(self, group: Optional[Any] = None, 1126 default_show_art_frames: bool = False, 1127 sample_filter_group: Optional[Any] = None, 1128 sample_filter_with_pid_shortcut: bool = True): 1129 self.has_report_lib_options = True 1130 parser = group if group else self 1131 parser.add_argument( 1132 '--proguard-mapping-file', nargs='+', 1133 help='Add proguard mapping file to de-obfuscate symbols') 1134 parser.add_argument('--show-art-frames', '--show_art_frames', 1135 action=argparse.BooleanOptionalAction, default=default_show_art_frames, 1136 help='Show frames of internal methods in the ART Java interpreter.') 1137 parser.add_argument('--remove-method', nargs='+', metavar='method_name_regex', 1138 help='remove methods with name containing the regular expression') 1139 parser.add_argument( 1140 '--trace-offcpu', choices=['on-cpu', 'off-cpu', 'on-off-cpu', 'mixed-on-off-cpu'], 1141 help="""Set report mode for profiles recorded with --trace-offcpu option. All possible 1142 modes are: on-cpu (only on-cpu samples), off-cpu (only off-cpu samples), 1143 on-off-cpu (both on-cpu and off-cpu samples, can be split by event name), 1144 mixed-on-off-cpu (on-cpu and off-cpu samples using the same event name). 1145 If not set, mixed-on-off-cpu mode is used. 1146 """) 1147 self._add_sample_filter_options(sample_filter_group, sample_filter_with_pid_shortcut) 1148 parser.add_argument( 1149 '--aggregate-threads', nargs='+', metavar='thread_name_regex', 1150 help="""Aggregate threads with names matching the same regex. As a result, samples from 1151 different threads (like a thread pool) can be shown in one flamegraph. 1152 """) 1153 1154 def _add_sample_filter_options( 1155 self, group: Optional[Any] = None, with_pid_shortcut: bool = True): 1156 if not group: 1157 group = self.add_argument_group('Sample filter options') 1158 group.add_argument('--cpu', nargs='+', help="""only include samples for the selected cpus. 1159 cpu can be a number like 1, or a range like 0-3""") 1160 group.add_argument('--exclude-pid', metavar='pid', nargs='+', type=int, 1161 help='exclude samples for selected processes') 1162 group.add_argument('--exclude-tid', metavar='tid', nargs='+', type=int, 1163 help='exclude samples for selected threads') 1164 group.add_argument( 1165 '--exclude-process-name', metavar='process_name_regex', nargs='+', 1166 help='exclude samples for processes with name containing the regular expression') 1167 group.add_argument( 1168 '--exclude-thread-name', metavar='thread_name_regex', nargs='+', 1169 help='exclude samples for threads with name containing the regular expression') 1170 1171 if with_pid_shortcut: 1172 group.add_argument('--pid', metavar='pid', nargs='+', type=int, 1173 help='only include samples for selected processes') 1174 group.add_argument('--tid', metavar='tid', nargs='+', type=int, 1175 help='only include samples for selected threads') 1176 group.add_argument('--include-pid', metavar='pid', nargs='+', type=int, 1177 help='only include samples for selected processes') 1178 group.add_argument('--include-tid', metavar='tid', nargs='+', type=int, 1179 help='only include samples for selected threads') 1180 group.add_argument( 1181 '--include-process-name', metavar='process_name_regex', nargs='+', 1182 help='only include samples for processes with name containing the regular expression') 1183 group.add_argument( 1184 '--comm', '--include-thread-name', metavar='thread_name_regex', 1185 dest='include_thread_name', nargs='+', 1186 help='only include samples for threads with name containing the regular expression') 1187 group.add_argument( 1188 '--filter-file', metavar='file', 1189 help='use filter file to filter samples based on timestamps. ' + 1190 'The file format is in doc/sampler_filter.md.') 1191 self.has_sample_filter_options = True 1192 self.sample_filter_with_pid_shortcut = with_pid_shortcut 1193 1194 def _build_sample_filter(self, args: argparse.Namespace) -> List[str]: 1195 """ Build sample filters, which can be passed to ReportLib.SetSampleFilter(). """ 1196 filters = [] 1197 if args.cpu: 1198 filters.extend(['--cpu', ','.join(args.cpu)]) 1199 if args.exclude_pid: 1200 filters.extend(['--exclude-pid', ','.join(str(pid) for pid in args.exclude_pid)]) 1201 if args.exclude_tid: 1202 filters.extend(['--exclude-tid', ','.join(str(tid) for tid in args.exclude_tid)]) 1203 if args.exclude_process_name: 1204 for name in args.exclude_process_name: 1205 filters.extend(['--exclude-process-name', name]) 1206 if args.exclude_thread_name: 1207 for name in args.exclude_thread_name: 1208 filters.extend(['--exclude-thread-name', name]) 1209 1210 if args.include_pid: 1211 filters.extend(['--include-pid', ','.join(str(pid) for pid in args.include_pid)]) 1212 if args.include_tid: 1213 filters.extend(['--include-tid', ','.join(str(tid) for tid in args.include_tid)]) 1214 if self.sample_filter_with_pid_shortcut: 1215 if args.pid: 1216 filters.extend(['--include-pid', ','.join(str(pid) for pid in args.pid)]) 1217 if args.tid: 1218 filters.extend(['--include-tid', ','.join(str(pid) for pid in args.tid)]) 1219 if args.include_process_name: 1220 for name in args.include_process_name: 1221 filters.extend(['--include-process-name', name]) 1222 if args.include_thread_name: 1223 for name in args.include_thread_name: 1224 filters.extend(['--include-thread-name', name]) 1225 if args.filter_file: 1226 filters.extend(['--filter-file', args.filter_file]) 1227 return filters 1228 1229 def parse_known_args(self, *args, **kwargs): 1230 self.add_argument( 1231 '--log', choices=['debug', 'info', 'warning'], 1232 default='info', help='set log level') 1233 namespace, left_args = super().parse_known_args(*args, **kwargs) 1234 1235 if self.has_report_lib_options: 1236 sample_filters = self._build_sample_filter(namespace) 1237 report_lib_options = ReportLibOptions( 1238 namespace.show_art_frames, namespace.remove_method, namespace.trace_offcpu, 1239 namespace.proguard_mapping_file, sample_filters, namespace.aggregate_threads) 1240 setattr(namespace, 'report_lib_options', report_lib_options) 1241 1242 if not Log.initialized: 1243 Log.init(namespace.log) 1244 return namespace, left_args 1245