xref: /aosp_15_r20/system/extras/simpleperf/scripts/gecko_profile_generator.py (revision 288bf5226967eb3dac5cce6c939ccc2a7f2b4fe5)
1#!/usr/bin/env python3
2#
3# Copyright (C) 2021 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""gecko_profile_generator.py: converts perf.data to Gecko Profile Format,
19    which can be read by https://profiler.firefox.com/.
20
21  Example:
22    ./app_profiler.py
23    ./gecko_profile_generator.py | gzip > gecko-profile.json.gz
24
25  Then open gecko-profile.json.gz in https://profiler.firefox.com/
26"""
27
28from collections import Counter
29from dataclasses import dataclass, field
30from enum import Enum, unique
31import json
32import logging
33import sys
34from typing import List, Dict, Optional, NamedTuple, Tuple
35
36from simpleperf_report_lib import GetReportLib
37from simpleperf_utils import BaseArgumentParser, ReportLibOptions
38
39
40StringID = int
41StackID = int
42FrameID = int
43CategoryID = int
44Milliseconds = float
45GeckoProfile = Dict
46
47
48# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156
49class GeckoFrame(NamedTuple):
50    string_id: StringID
51    relevantForJS: bool
52    innerWindowID: int
53    implementation: None
54    optimizations: None
55    line: None
56    column: None
57    category: CategoryID
58    subcategory: int
59
60
61# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216
62class GeckoStack(NamedTuple):
63    prefix_id: Optional[StackID]
64    frame_id: FrameID
65    category_id: CategoryID
66
67
68# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90
69class GeckoSample(NamedTuple):
70    stack_id: Optional[StackID]
71    time_ms: Milliseconds
72    responsiveness: int
73    complete_stack: bool
74
75    def to_json(self):
76        return [self.stack_id, self.time_ms, self.responsiveness]
77
78
79# Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/profile.js#L425
80# Colors must be defined in:
81# https://github.com/firefox-devtools/profiler/blob/50124adbfa488adba6e2674a8f2618cf34b59cd2/res/css/categories.css
82@unique
83class Category(Enum):
84  # Follow Brendan Gregg's Flamegraph convention: yellow for userland
85  # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L419
86  USER = 0, 'User', 'yellow'
87  # Follow Brendan Gregg's Flamegraph convention: orange for kernel
88  # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L417
89  KERNEL = 1, 'Kernel', 'orange'
90  # Follow Brendan Gregg's Flamegraph convention: yellow for userland
91  # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L419
92  NATIVE = 2, 'Native', 'yellow'
93  # Follow Brendan Gregg's Flamegraph convention: green for Java/JIT
94  # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L411
95  DEX = 3, 'DEX', 'green'
96  OAT = 4, 'OAT', 'green'
97  # Follow Brendan Gregg's Flamegraph convention: blue for off-CPU
98  # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L470
99  OFF_CPU = 5, 'Off-CPU', 'blue'
100  # Not used by this exporter yet, but some Firefox Profiler code assumes
101  # there is an 'Other' category by searching for a category with
102  # color=grey, so include this.
103  OTHER = 6, 'Other', 'grey'
104  # Follow Brendan Gregg's Flamegraph convention: green for Java/JIT
105  # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L411
106  JIT = 7, 'JIT', 'green'
107
108  def __init__(self, value, label, color):
109    self._value_ = value
110    self.label = label
111    self.color = color
112
113  @classmethod
114  def to_json(cls):
115    return [{
116        "name": enum.label,
117        "color": enum.color,
118        # We don't use subcategories, but Firefox Profiler seems to require it.
119        "subcategories":['Other']
120    } for enum in cls]
121
122
123@dataclass
124class StackFrame:
125  symbol_name: str
126  dso_name: str
127
128  def to_gecko_frame_string(self):
129    return '%s (in %s)' % (self.symbol_name, self.dso_name)
130
131  def category(self) -> Category:
132    # Heuristic: kernel code contains "kallsyms" as the library name.
133    if self.dso_name == "[kernel.kallsyms]" or self.dso_name.endswith(".ko"):
134        # Heuristic: empirically, off-CPU profiles mostly measure off-CPU
135        # time accounted to the linux kernel __schedule function, which
136        # handles blocking. This only works if we have kernel symbol
137        # (kallsyms) access though.  __schedule defined here:
138        # https://cs.android.com/android/kernel/superproject/+/common-android-mainline:common/kernel/sched/core.c;l=6593;drc=0c99414a07ddaa18d8eb4be90b551d2687cbde2f
139        if self.symbol_name == "__schedule":
140            return Category.OFF_CPU
141        return Category.KERNEL
142    elif self.dso_name.endswith(".so"):
143        return Category.NATIVE
144    elif self.dso_name.endswith(".vdex"):
145        return Category.DEX
146    # APKs are full of dex code.
147    elif self.dso_name.endswith(".apk"):
148        return Category.DEX
149    # /system/framework/ has .jar files which seem to be full of .dex code.
150    elif self.dso_name.endswith(".jar"):
151        return Category.DEX
152    elif self.dso_name.endswith(".oat"):
153        return Category.OAT
154    # In ART, odex is just OAT code
155    elif self.dso_name.endswith(".odex"):
156        return Category.OAT
157    # "[JIT app cache]" is returned for JIT code here:
158    # https://cs.android.com/android/platform/superproject/+/master:system/extras/simpleperf/dso.cpp;l=551;drc=4d8137f55782cc1e8cc93e4694ba3a7159d9a2bc
159    elif self.dso_name == "[JIT app cache]":
160        return Category.JIT
161    return Category.USER
162
163
164def is_complete_stack(stack: List[StackFrame]) -> bool:
165    """ Check if the callstack is complete. The stack starts from root. """
166    for frame in stack:
167        if frame.symbol_name == '__libc_init' or frame.symbol_name == '__start_thread':
168            return True
169    return False
170
171
172@dataclass
173class Thread:
174    """A builder for a profile of a single thread.
175
176    Attributes:
177      comm: Thread command-line (name).
178      pid: process ID of containing process.
179      tid: thread ID.
180      samples: Timeline of profile samples.
181      frameTable: interned stack frame ID -> stack frame.
182      stringTable: interned string ID -> string.
183      stringMap: interned string -> string ID.
184      stackTable: interned stack ID -> stack.
185      stackMap: (stack prefix ID, leaf stack frame ID) -> interned Stack ID.
186      frameMap: Stack Frame string -> interned Frame ID.
187    """
188    comm: str
189    pid: int
190    tid: int
191    samples: List[GeckoSample] = field(default_factory=list)
192    frameTable: List[GeckoFrame] = field(default_factory=list)
193    stringTable: List[str] = field(default_factory=list)
194    # TODO: this is redundant with frameTable, could we remove this?
195    stringMap: Dict[str, int] = field(default_factory=dict)
196    stackTable: List[GeckoStack] = field(default_factory=list)
197    stackMap: Dict[Tuple[Optional[int], int], int] = field(default_factory=dict)
198    frameMap: Dict[str, int] = field(default_factory=dict)
199
200    def _intern_stack(self, frame_id: int, prefix_id: Optional[int]) -> int:
201        """Gets a matching stack, or saves the new stack. Returns a Stack ID."""
202        key = (prefix_id, frame_id)
203        stack_id = self.stackMap.get(key)
204        if stack_id is not None:
205            return stack_id
206        stack_id = len(self.stackTable)
207        self.stackTable.append(GeckoStack(
208            prefix_id=prefix_id,
209            frame_id=frame_id,
210            category_id=0,
211        ))
212        self.stackMap[key] = stack_id
213        return stack_id
214
215    def _intern_string(self, string: str) -> int:
216        """Gets a matching string, or saves the new string. Returns a String ID."""
217        string_id = self.stringMap.get(string)
218        if string_id is not None:
219            return string_id
220        string_id = len(self.stringTable)
221        self.stringTable.append(string)
222        self.stringMap[string] = string_id
223        return string_id
224
225    def _intern_frame(self, frame: StackFrame) -> int:
226        """Gets a matching stack frame, or saves the new frame. Returns a Frame ID."""
227        frame_str = frame.to_gecko_frame_string()
228        frame_id = self.frameMap.get(frame_str)
229        if frame_id is not None:
230            return frame_id
231        frame_id = len(self.frameTable)
232        self.frameMap[frame_str] = frame_id
233        string_id = self._intern_string(frame_str)
234
235
236        self.frameTable.append(GeckoFrame(
237            string_id=string_id,
238            relevantForJS=False,
239            innerWindowID=0,
240            implementation=None,
241            optimizations=None,
242            line=None,
243            column=None,
244            category=frame.category().value,
245            subcategory=0,
246        ))
247        return frame_id
248
249    def add_sample(self, comm: str, stack: List[StackFrame], time_ms: Milliseconds) -> None:
250        """Add a timestamped stack trace sample to the thread builder.
251
252        Args:
253          comm: command-line (name) of the thread at this sample
254          stack: sampled stack frames. Root first, leaf last.
255          time_ms: timestamp of sample in milliseconds
256        """
257        # Unix threads often don't set their name immediately upon creation.
258        # Use the last name
259        if self.comm != comm:
260            self.comm = comm
261
262        prefix_stack_id = None
263        for frame in stack:
264            frame_id = self._intern_frame(frame)
265            prefix_stack_id = self._intern_stack(frame_id, prefix_stack_id)
266
267        self.samples.append(GeckoSample(
268            stack_id=prefix_stack_id,
269            time_ms=time_ms,
270            responsiveness=0,
271            complete_stack=is_complete_stack(stack),
272        ))
273
274    def sort_samples(self) -> None:
275        """ The samples aren't guaranteed to be in order. Sort them by time. """
276        self.samples.sort(key=lambda s: s.time_ms)
277
278    def remove_stack_gaps(self, max_remove_gap_length: int, gap_distr: Dict[int, int]) -> None:
279        """ Ideally all callstacks are complete. But some may be broken for different reasons.
280            To create a smooth view in "Stack Chart", remove small gaps of broken callstacks.
281
282        Args:
283            max_remove_gap_length: the max length of continuous broken-stack samples to remove
284        """
285        if max_remove_gap_length == 0:
286            return
287        i = 0
288        remove_flags = [False] * len(self.samples)
289        while i < len(self.samples):
290            if self.samples[i].complete_stack:
291                i += 1
292                continue
293            n = 1
294            while (i + n < len(self.samples)) and (not self.samples[i + n].complete_stack):
295                n += 1
296            gap_distr[n] += 1
297            if n <= max_remove_gap_length:
298                for j in range(i, i + n):
299                    remove_flags[j] = True
300            i += n
301        if True in remove_flags:
302            old_samples = self.samples
303            self.samples = [s for s, remove in zip(old_samples, remove_flags) if not remove]
304
305    def to_json_dict(self) -> Dict:
306        """Converts this Thread to GeckoThread JSON format."""
307
308        # Gecko profile format is row-oriented data as List[List],
309        # And a schema for interpreting each index.
310        # Schema:
311        # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md
312        # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L230
313        return {
314            "tid": self.tid,
315            "pid": self.pid,
316            "name": self.comm,
317            # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L51
318            "markers": {
319                "schema": {
320                    "name": 0,
321                    "startTime": 1,
322                    "endTime": 2,
323                    "phase": 3,
324                    "category": 4,
325                    "data": 5,
326                },
327                "data": [],
328            },
329            # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90
330            "samples": {
331                "schema": {
332                    "stack": 0,
333                    "time": 1,
334                    "responsiveness": 2,
335                },
336                "data": [s.to_json() for s in self.samples],
337            },
338            # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156
339            "frameTable": {
340                "schema": {
341                    "location": 0,
342                    "relevantForJS": 1,
343                    "innerWindowID": 2,
344                    "implementation": 3,
345                    "optimizations": 4,
346                    "line": 5,
347                    "column": 6,
348                    "category": 7,
349                    "subcategory": 8,
350                },
351                "data": self.frameTable,
352            },
353            # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216
354            "stackTable": {
355                "schema": {
356                    "prefix": 0,
357                    "frame": 1,
358                    "category": 2,
359                },
360                "data": self.stackTable,
361            },
362            "stringTable": self.stringTable,
363            "registerTime": 0,
364            "unregisterTime": None,
365            "processType": "default",
366        }
367
368
369def remove_stack_gaps(max_remove_gap_length: int, thread_map: Dict[int, Thread]) -> None:
370    """ Remove stack gaps for each thread, and print status. """
371    if max_remove_gap_length == 0:
372        return
373    total_sample_count = 0
374    remove_sample_count = 0
375    gap_distr = Counter()
376    for tid in list(thread_map.keys()):
377        thread = thread_map[tid]
378        old_n = len(thread.samples)
379        thread.remove_stack_gaps(max_remove_gap_length, gap_distr)
380        new_n = len(thread.samples)
381        total_sample_count += old_n
382        remove_sample_count += old_n - new_n
383        if new_n == 0:
384            del thread_map[tid]
385    if total_sample_count != 0:
386        logging.info('Remove stack gaps with length <= %d. %d (%.2f%%) samples are removed.',
387                     max_remove_gap_length, remove_sample_count,
388                     remove_sample_count / total_sample_count * 100
389                     )
390        logging.debug('Stack gap length distribution among samples (gap_length: count): %s',
391                      gap_distr)
392
393
394def _gecko_profile(
395        record_file: str,
396        symfs_dir: Optional[str],
397        kallsyms_file: Optional[str],
398        report_lib_options: ReportLibOptions,
399        max_remove_gap_length: int,
400        percpu_samples: bool) -> GeckoProfile:
401    """convert a simpleperf profile to gecko format"""
402    lib = GetReportLib(record_file)
403
404    lib.ShowIpForUnknownSymbol()
405    if symfs_dir is not None:
406        lib.SetSymfs(symfs_dir)
407    if kallsyms_file is not None:
408        lib.SetKallsymsFile(kallsyms_file)
409    if percpu_samples:
410        # Grouping samples by cpus doesn't support off cpu samples.
411        if lib.GetSupportedTraceOffCpuModes():
412            report_lib_options.trace_offcpu = 'on-cpu'
413    lib.SetReportOptions(report_lib_options)
414
415    arch = lib.GetArch()
416    meta_info = lib.MetaInfo()
417    record_cmd = lib.GetRecordCmd()
418
419    # Map from tid to Thread
420    thread_map: Dict[int, Thread] = {}
421    # Map from pid to process name
422    process_names: Dict[int, str] = {}
423
424    while True:
425        sample = lib.GetNextSample()
426        if sample is None:
427            lib.Close()
428            break
429        symbol = lib.GetSymbolOfCurrentSample()
430        callchain = lib.GetCallChainOfCurrentSample()
431        sample_time_ms = sample.time / 1000000
432        stack : List[StackFrame] = [
433            StackFrame(symbol.symbol_name, symbol.dso_name),
434        ]
435
436        for i in range(callchain.nr):
437            entry = callchain.entries[i]
438            stack.append(StackFrame(
439                symbol_name = entry.symbol.symbol_name,
440                dso_name = entry.symbol.dso_name
441            ))
442        # We want root first, leaf last.
443        stack.reverse()
444
445        if percpu_samples:
446            if sample.tid == sample.pid:
447                process_names[sample.pid] = sample.thread_comm
448            process_name = process_names.get(sample.pid)
449            stack = [
450                # This is a synthetic stack frame, these aren't real symbols or
451                # DSOs, but they show up nicely in the UI.
452                StackFrame(
453                    symbol_name = '%s tid %d' % (sample.thread_comm, sample.tid),
454                    dso_name = '%s pid %d' % (process_name, sample.pid),
455                )
456            ] + stack
457            thread = thread_map.get(sample.cpu)
458            if thread is None:
459                thread = Thread(comm=f'Cpu {sample.cpu}', pid=sample.cpu, tid=sample.cpu)
460                thread_map[sample.cpu] = thread
461            thread.add_sample(
462                comm=f'Cpu {sample.cpu}',
463                stack=stack,
464                time_ms=sample_time_ms)
465        else:
466            # add thread sample
467            thread = thread_map.get(sample.tid)
468            if thread is None:
469                thread = Thread(comm=sample.thread_comm, pid=sample.pid, tid=sample.tid)
470                thread_map[sample.tid] = thread
471            thread.add_sample(
472                comm=sample.thread_comm,
473                stack=stack,
474                # We are being a bit fast and loose here with time here.  simpleperf
475                # uses CLOCK_MONOTONIC by default, which doesn't use the normal unix
476                # epoch, but rather some arbitrary time. In practice, this doesn't
477                # matter, the Firefox Profiler normalises all the timestamps to begin at
478                # the minimum time.  Consider fixing this in future, if needed, by
479                # setting `simpleperf record --clockid realtime`.
480                time_ms=sample_time_ms)
481
482    for thread in thread_map.values():
483        thread.sort_samples()
484
485    remove_stack_gaps(max_remove_gap_length, thread_map)
486
487    threads = [thread.to_json_dict() for thread in thread_map.values()]
488
489    profile_timestamp = meta_info.get('timestamp')
490    end_time_ms = (int(profile_timestamp) * 1000) if profile_timestamp else 0
491
492    # Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L305
493    gecko_profile_meta = {
494        "interval": 1,
495        "processType": 0,
496        "product": record_cmd,
497        "device": meta_info.get("product_props"),
498        "platform": meta_info.get("android_build_fingerprint"),
499        "stackwalk": 1,
500        "debug": 0,
501        "gcpoison": 0,
502        "asyncstack": 1,
503        # The profile timestamp is actually the end time, not the start time.
504        # This is close enough for our purposes; I mostly just want to know which
505        # day the profile was taken! Consider fixing this in future, if needed,
506        # by setting `simpleperf record --clockid realtime` and taking the minimum
507        # sample time.
508        "startTime": end_time_ms,
509        "shutdownTime": None,
510        "version": 24,
511        "presymbolicated": True,
512        "categories": Category.to_json(),
513        "markerSchema": [],
514        "abi": arch,
515        "oscpu": meta_info.get("android_build_fingerprint"),
516        "appBuildID": meta_info.get("app_versioncode"),
517    }
518
519    # Schema:
520    # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L377
521    # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md
522    return {
523        "meta": gecko_profile_meta,
524        "libs": [],
525        "threads": threads,
526        "processes": [],
527        "pausedRanges": [],
528    }
529
530
531def main() -> None:
532    parser = BaseArgumentParser(description=__doc__)
533    parser.add_argument('--symfs',
534                        help='Set the path to find binaries with symbols and debug info.')
535    parser.add_argument('--kallsyms', help='Set the path to find kernel symbols.')
536    parser.add_argument('-i', '--record_file', nargs='?', default='perf.data',
537                        help='Default is perf.data.')
538    parser.add_argument('--remove-gaps', metavar='MAX_GAP_LENGTH', dest='max_remove_gap_length',
539                        type=int, default=3, help="""
540                        Ideally all callstacks are complete. But some may be broken for different
541                        reasons. To create a smooth view in "Stack Chart", remove small gaps of
542                        broken callstacks. MAX_GAP_LENGTH is the max length of continuous
543                        broken-stack samples we want to remove.
544                        """
545                        )
546    parser.add_argument(
547        '--percpu-samples', action='store_true',
548        help='show samples based on cpus instead of threads')
549    parser.add_report_lib_options()
550    args = parser.parse_args()
551    profile = _gecko_profile(
552        record_file=args.record_file,
553        symfs_dir=args.symfs,
554        kallsyms_file=args.kallsyms,
555        report_lib_options=args.report_lib_options,
556        max_remove_gap_length=args.max_remove_gap_length,
557        percpu_samples=args.percpu_samples,
558    )
559
560    json.dump(profile, sys.stdout, sort_keys=True)
561
562
563if __name__ == '__main__':
564    main()
565