1#!/usr/bin/env python3 2# 3# Copyright (C) 2021 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""gecko_profile_generator.py: converts perf.data to Gecko Profile Format, 19 which can be read by https://profiler.firefox.com/. 20 21 Example: 22 ./app_profiler.py 23 ./gecko_profile_generator.py | gzip > gecko-profile.json.gz 24 25 Then open gecko-profile.json.gz in https://profiler.firefox.com/ 26""" 27 28from collections import Counter 29from dataclasses import dataclass, field 30from enum import Enum, unique 31import json 32import logging 33import sys 34from typing import List, Dict, Optional, NamedTuple, Tuple 35 36from simpleperf_report_lib import GetReportLib 37from simpleperf_utils import BaseArgumentParser, ReportLibOptions 38 39 40StringID = int 41StackID = int 42FrameID = int 43CategoryID = int 44Milliseconds = float 45GeckoProfile = Dict 46 47 48# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156 49class GeckoFrame(NamedTuple): 50 string_id: StringID 51 relevantForJS: bool 52 innerWindowID: int 53 implementation: None 54 optimizations: None 55 line: None 56 column: None 57 category: CategoryID 58 subcategory: int 59 60 61# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216 62class GeckoStack(NamedTuple): 63 prefix_id: Optional[StackID] 64 frame_id: FrameID 65 category_id: CategoryID 66 67 68# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90 69class GeckoSample(NamedTuple): 70 stack_id: Optional[StackID] 71 time_ms: Milliseconds 72 responsiveness: int 73 complete_stack: bool 74 75 def to_json(self): 76 return [self.stack_id, self.time_ms, self.responsiveness] 77 78 79# Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/profile.js#L425 80# Colors must be defined in: 81# https://github.com/firefox-devtools/profiler/blob/50124adbfa488adba6e2674a8f2618cf34b59cd2/res/css/categories.css 82@unique 83class Category(Enum): 84 # Follow Brendan Gregg's Flamegraph convention: yellow for userland 85 # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L419 86 USER = 0, 'User', 'yellow' 87 # Follow Brendan Gregg's Flamegraph convention: orange for kernel 88 # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L417 89 KERNEL = 1, 'Kernel', 'orange' 90 # Follow Brendan Gregg's Flamegraph convention: yellow for userland 91 # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L419 92 NATIVE = 2, 'Native', 'yellow' 93 # Follow Brendan Gregg's Flamegraph convention: green for Java/JIT 94 # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L411 95 DEX = 3, 'DEX', 'green' 96 OAT = 4, 'OAT', 'green' 97 # Follow Brendan Gregg's Flamegraph convention: blue for off-CPU 98 # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L470 99 OFF_CPU = 5, 'Off-CPU', 'blue' 100 # Not used by this exporter yet, but some Firefox Profiler code assumes 101 # there is an 'Other' category by searching for a category with 102 # color=grey, so include this. 103 OTHER = 6, 'Other', 'grey' 104 # Follow Brendan Gregg's Flamegraph convention: green for Java/JIT 105 # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L411 106 JIT = 7, 'JIT', 'green' 107 108 def __init__(self, value, label, color): 109 self._value_ = value 110 self.label = label 111 self.color = color 112 113 @classmethod 114 def to_json(cls): 115 return [{ 116 "name": enum.label, 117 "color": enum.color, 118 # We don't use subcategories, but Firefox Profiler seems to require it. 119 "subcategories":['Other'] 120 } for enum in cls] 121 122 123@dataclass 124class StackFrame: 125 symbol_name: str 126 dso_name: str 127 128 def to_gecko_frame_string(self): 129 return '%s (in %s)' % (self.symbol_name, self.dso_name) 130 131 def category(self) -> Category: 132 # Heuristic: kernel code contains "kallsyms" as the library name. 133 if self.dso_name == "[kernel.kallsyms]" or self.dso_name.endswith(".ko"): 134 # Heuristic: empirically, off-CPU profiles mostly measure off-CPU 135 # time accounted to the linux kernel __schedule function, which 136 # handles blocking. This only works if we have kernel symbol 137 # (kallsyms) access though. __schedule defined here: 138 # https://cs.android.com/android/kernel/superproject/+/common-android-mainline:common/kernel/sched/core.c;l=6593;drc=0c99414a07ddaa18d8eb4be90b551d2687cbde2f 139 if self.symbol_name == "__schedule": 140 return Category.OFF_CPU 141 return Category.KERNEL 142 elif self.dso_name.endswith(".so"): 143 return Category.NATIVE 144 elif self.dso_name.endswith(".vdex"): 145 return Category.DEX 146 # APKs are full of dex code. 147 elif self.dso_name.endswith(".apk"): 148 return Category.DEX 149 # /system/framework/ has .jar files which seem to be full of .dex code. 150 elif self.dso_name.endswith(".jar"): 151 return Category.DEX 152 elif self.dso_name.endswith(".oat"): 153 return Category.OAT 154 # In ART, odex is just OAT code 155 elif self.dso_name.endswith(".odex"): 156 return Category.OAT 157 # "[JIT app cache]" is returned for JIT code here: 158 # https://cs.android.com/android/platform/superproject/+/master:system/extras/simpleperf/dso.cpp;l=551;drc=4d8137f55782cc1e8cc93e4694ba3a7159d9a2bc 159 elif self.dso_name == "[JIT app cache]": 160 return Category.JIT 161 return Category.USER 162 163 164def is_complete_stack(stack: List[StackFrame]) -> bool: 165 """ Check if the callstack is complete. The stack starts from root. """ 166 for frame in stack: 167 if frame.symbol_name == '__libc_init' or frame.symbol_name == '__start_thread': 168 return True 169 return False 170 171 172@dataclass 173class Thread: 174 """A builder for a profile of a single thread. 175 176 Attributes: 177 comm: Thread command-line (name). 178 pid: process ID of containing process. 179 tid: thread ID. 180 samples: Timeline of profile samples. 181 frameTable: interned stack frame ID -> stack frame. 182 stringTable: interned string ID -> string. 183 stringMap: interned string -> string ID. 184 stackTable: interned stack ID -> stack. 185 stackMap: (stack prefix ID, leaf stack frame ID) -> interned Stack ID. 186 frameMap: Stack Frame string -> interned Frame ID. 187 """ 188 comm: str 189 pid: int 190 tid: int 191 samples: List[GeckoSample] = field(default_factory=list) 192 frameTable: List[GeckoFrame] = field(default_factory=list) 193 stringTable: List[str] = field(default_factory=list) 194 # TODO: this is redundant with frameTable, could we remove this? 195 stringMap: Dict[str, int] = field(default_factory=dict) 196 stackTable: List[GeckoStack] = field(default_factory=list) 197 stackMap: Dict[Tuple[Optional[int], int], int] = field(default_factory=dict) 198 frameMap: Dict[str, int] = field(default_factory=dict) 199 200 def _intern_stack(self, frame_id: int, prefix_id: Optional[int]) -> int: 201 """Gets a matching stack, or saves the new stack. Returns a Stack ID.""" 202 key = (prefix_id, frame_id) 203 stack_id = self.stackMap.get(key) 204 if stack_id is not None: 205 return stack_id 206 stack_id = len(self.stackTable) 207 self.stackTable.append(GeckoStack( 208 prefix_id=prefix_id, 209 frame_id=frame_id, 210 category_id=0, 211 )) 212 self.stackMap[key] = stack_id 213 return stack_id 214 215 def _intern_string(self, string: str) -> int: 216 """Gets a matching string, or saves the new string. Returns a String ID.""" 217 string_id = self.stringMap.get(string) 218 if string_id is not None: 219 return string_id 220 string_id = len(self.stringTable) 221 self.stringTable.append(string) 222 self.stringMap[string] = string_id 223 return string_id 224 225 def _intern_frame(self, frame: StackFrame) -> int: 226 """Gets a matching stack frame, or saves the new frame. Returns a Frame ID.""" 227 frame_str = frame.to_gecko_frame_string() 228 frame_id = self.frameMap.get(frame_str) 229 if frame_id is not None: 230 return frame_id 231 frame_id = len(self.frameTable) 232 self.frameMap[frame_str] = frame_id 233 string_id = self._intern_string(frame_str) 234 235 236 self.frameTable.append(GeckoFrame( 237 string_id=string_id, 238 relevantForJS=False, 239 innerWindowID=0, 240 implementation=None, 241 optimizations=None, 242 line=None, 243 column=None, 244 category=frame.category().value, 245 subcategory=0, 246 )) 247 return frame_id 248 249 def add_sample(self, comm: str, stack: List[StackFrame], time_ms: Milliseconds) -> None: 250 """Add a timestamped stack trace sample to the thread builder. 251 252 Args: 253 comm: command-line (name) of the thread at this sample 254 stack: sampled stack frames. Root first, leaf last. 255 time_ms: timestamp of sample in milliseconds 256 """ 257 # Unix threads often don't set their name immediately upon creation. 258 # Use the last name 259 if self.comm != comm: 260 self.comm = comm 261 262 prefix_stack_id = None 263 for frame in stack: 264 frame_id = self._intern_frame(frame) 265 prefix_stack_id = self._intern_stack(frame_id, prefix_stack_id) 266 267 self.samples.append(GeckoSample( 268 stack_id=prefix_stack_id, 269 time_ms=time_ms, 270 responsiveness=0, 271 complete_stack=is_complete_stack(stack), 272 )) 273 274 def sort_samples(self) -> None: 275 """ The samples aren't guaranteed to be in order. Sort them by time. """ 276 self.samples.sort(key=lambda s: s.time_ms) 277 278 def remove_stack_gaps(self, max_remove_gap_length: int, gap_distr: Dict[int, int]) -> None: 279 """ Ideally all callstacks are complete. But some may be broken for different reasons. 280 To create a smooth view in "Stack Chart", remove small gaps of broken callstacks. 281 282 Args: 283 max_remove_gap_length: the max length of continuous broken-stack samples to remove 284 """ 285 if max_remove_gap_length == 0: 286 return 287 i = 0 288 remove_flags = [False] * len(self.samples) 289 while i < len(self.samples): 290 if self.samples[i].complete_stack: 291 i += 1 292 continue 293 n = 1 294 while (i + n < len(self.samples)) and (not self.samples[i + n].complete_stack): 295 n += 1 296 gap_distr[n] += 1 297 if n <= max_remove_gap_length: 298 for j in range(i, i + n): 299 remove_flags[j] = True 300 i += n 301 if True in remove_flags: 302 old_samples = self.samples 303 self.samples = [s for s, remove in zip(old_samples, remove_flags) if not remove] 304 305 def to_json_dict(self) -> Dict: 306 """Converts this Thread to GeckoThread JSON format.""" 307 308 # Gecko profile format is row-oriented data as List[List], 309 # And a schema for interpreting each index. 310 # Schema: 311 # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md 312 # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L230 313 return { 314 "tid": self.tid, 315 "pid": self.pid, 316 "name": self.comm, 317 # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L51 318 "markers": { 319 "schema": { 320 "name": 0, 321 "startTime": 1, 322 "endTime": 2, 323 "phase": 3, 324 "category": 4, 325 "data": 5, 326 }, 327 "data": [], 328 }, 329 # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90 330 "samples": { 331 "schema": { 332 "stack": 0, 333 "time": 1, 334 "responsiveness": 2, 335 }, 336 "data": [s.to_json() for s in self.samples], 337 }, 338 # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156 339 "frameTable": { 340 "schema": { 341 "location": 0, 342 "relevantForJS": 1, 343 "innerWindowID": 2, 344 "implementation": 3, 345 "optimizations": 4, 346 "line": 5, 347 "column": 6, 348 "category": 7, 349 "subcategory": 8, 350 }, 351 "data": self.frameTable, 352 }, 353 # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216 354 "stackTable": { 355 "schema": { 356 "prefix": 0, 357 "frame": 1, 358 "category": 2, 359 }, 360 "data": self.stackTable, 361 }, 362 "stringTable": self.stringTable, 363 "registerTime": 0, 364 "unregisterTime": None, 365 "processType": "default", 366 } 367 368 369def remove_stack_gaps(max_remove_gap_length: int, thread_map: Dict[int, Thread]) -> None: 370 """ Remove stack gaps for each thread, and print status. """ 371 if max_remove_gap_length == 0: 372 return 373 total_sample_count = 0 374 remove_sample_count = 0 375 gap_distr = Counter() 376 for tid in list(thread_map.keys()): 377 thread = thread_map[tid] 378 old_n = len(thread.samples) 379 thread.remove_stack_gaps(max_remove_gap_length, gap_distr) 380 new_n = len(thread.samples) 381 total_sample_count += old_n 382 remove_sample_count += old_n - new_n 383 if new_n == 0: 384 del thread_map[tid] 385 if total_sample_count != 0: 386 logging.info('Remove stack gaps with length <= %d. %d (%.2f%%) samples are removed.', 387 max_remove_gap_length, remove_sample_count, 388 remove_sample_count / total_sample_count * 100 389 ) 390 logging.debug('Stack gap length distribution among samples (gap_length: count): %s', 391 gap_distr) 392 393 394def _gecko_profile( 395 record_file: str, 396 symfs_dir: Optional[str], 397 kallsyms_file: Optional[str], 398 report_lib_options: ReportLibOptions, 399 max_remove_gap_length: int, 400 percpu_samples: bool) -> GeckoProfile: 401 """convert a simpleperf profile to gecko format""" 402 lib = GetReportLib(record_file) 403 404 lib.ShowIpForUnknownSymbol() 405 if symfs_dir is not None: 406 lib.SetSymfs(symfs_dir) 407 if kallsyms_file is not None: 408 lib.SetKallsymsFile(kallsyms_file) 409 if percpu_samples: 410 # Grouping samples by cpus doesn't support off cpu samples. 411 if lib.GetSupportedTraceOffCpuModes(): 412 report_lib_options.trace_offcpu = 'on-cpu' 413 lib.SetReportOptions(report_lib_options) 414 415 arch = lib.GetArch() 416 meta_info = lib.MetaInfo() 417 record_cmd = lib.GetRecordCmd() 418 419 # Map from tid to Thread 420 thread_map: Dict[int, Thread] = {} 421 # Map from pid to process name 422 process_names: Dict[int, str] = {} 423 424 while True: 425 sample = lib.GetNextSample() 426 if sample is None: 427 lib.Close() 428 break 429 symbol = lib.GetSymbolOfCurrentSample() 430 callchain = lib.GetCallChainOfCurrentSample() 431 sample_time_ms = sample.time / 1000000 432 stack : List[StackFrame] = [ 433 StackFrame(symbol.symbol_name, symbol.dso_name), 434 ] 435 436 for i in range(callchain.nr): 437 entry = callchain.entries[i] 438 stack.append(StackFrame( 439 symbol_name = entry.symbol.symbol_name, 440 dso_name = entry.symbol.dso_name 441 )) 442 # We want root first, leaf last. 443 stack.reverse() 444 445 if percpu_samples: 446 if sample.tid == sample.pid: 447 process_names[sample.pid] = sample.thread_comm 448 process_name = process_names.get(sample.pid) 449 stack = [ 450 # This is a synthetic stack frame, these aren't real symbols or 451 # DSOs, but they show up nicely in the UI. 452 StackFrame( 453 symbol_name = '%s tid %d' % (sample.thread_comm, sample.tid), 454 dso_name = '%s pid %d' % (process_name, sample.pid), 455 ) 456 ] + stack 457 thread = thread_map.get(sample.cpu) 458 if thread is None: 459 thread = Thread(comm=f'Cpu {sample.cpu}', pid=sample.cpu, tid=sample.cpu) 460 thread_map[sample.cpu] = thread 461 thread.add_sample( 462 comm=f'Cpu {sample.cpu}', 463 stack=stack, 464 time_ms=sample_time_ms) 465 else: 466 # add thread sample 467 thread = thread_map.get(sample.tid) 468 if thread is None: 469 thread = Thread(comm=sample.thread_comm, pid=sample.pid, tid=sample.tid) 470 thread_map[sample.tid] = thread 471 thread.add_sample( 472 comm=sample.thread_comm, 473 stack=stack, 474 # We are being a bit fast and loose here with time here. simpleperf 475 # uses CLOCK_MONOTONIC by default, which doesn't use the normal unix 476 # epoch, but rather some arbitrary time. In practice, this doesn't 477 # matter, the Firefox Profiler normalises all the timestamps to begin at 478 # the minimum time. Consider fixing this in future, if needed, by 479 # setting `simpleperf record --clockid realtime`. 480 time_ms=sample_time_ms) 481 482 for thread in thread_map.values(): 483 thread.sort_samples() 484 485 remove_stack_gaps(max_remove_gap_length, thread_map) 486 487 threads = [thread.to_json_dict() for thread in thread_map.values()] 488 489 profile_timestamp = meta_info.get('timestamp') 490 end_time_ms = (int(profile_timestamp) * 1000) if profile_timestamp else 0 491 492 # Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L305 493 gecko_profile_meta = { 494 "interval": 1, 495 "processType": 0, 496 "product": record_cmd, 497 "device": meta_info.get("product_props"), 498 "platform": meta_info.get("android_build_fingerprint"), 499 "stackwalk": 1, 500 "debug": 0, 501 "gcpoison": 0, 502 "asyncstack": 1, 503 # The profile timestamp is actually the end time, not the start time. 504 # This is close enough for our purposes; I mostly just want to know which 505 # day the profile was taken! Consider fixing this in future, if needed, 506 # by setting `simpleperf record --clockid realtime` and taking the minimum 507 # sample time. 508 "startTime": end_time_ms, 509 "shutdownTime": None, 510 "version": 24, 511 "presymbolicated": True, 512 "categories": Category.to_json(), 513 "markerSchema": [], 514 "abi": arch, 515 "oscpu": meta_info.get("android_build_fingerprint"), 516 "appBuildID": meta_info.get("app_versioncode"), 517 } 518 519 # Schema: 520 # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L377 521 # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md 522 return { 523 "meta": gecko_profile_meta, 524 "libs": [], 525 "threads": threads, 526 "processes": [], 527 "pausedRanges": [], 528 } 529 530 531def main() -> None: 532 parser = BaseArgumentParser(description=__doc__) 533 parser.add_argument('--symfs', 534 help='Set the path to find binaries with symbols and debug info.') 535 parser.add_argument('--kallsyms', help='Set the path to find kernel symbols.') 536 parser.add_argument('-i', '--record_file', nargs='?', default='perf.data', 537 help='Default is perf.data.') 538 parser.add_argument('--remove-gaps', metavar='MAX_GAP_LENGTH', dest='max_remove_gap_length', 539 type=int, default=3, help=""" 540 Ideally all callstacks are complete. But some may be broken for different 541 reasons. To create a smooth view in "Stack Chart", remove small gaps of 542 broken callstacks. MAX_GAP_LENGTH is the max length of continuous 543 broken-stack samples we want to remove. 544 """ 545 ) 546 parser.add_argument( 547 '--percpu-samples', action='store_true', 548 help='show samples based on cpus instead of threads') 549 parser.add_report_lib_options() 550 args = parser.parse_args() 551 profile = _gecko_profile( 552 record_file=args.record_file, 553 symfs_dir=args.symfs, 554 kallsyms_file=args.kallsyms, 555 report_lib_options=args.report_lib_options, 556 max_remove_gap_length=args.max_remove_gap_length, 557 percpu_samples=args.percpu_samples, 558 ) 559 560 json.dump(profile, sys.stdout, sort_keys=True) 561 562 563if __name__ == '__main__': 564 main() 565