xref: /aosp_15_r20/external/bcc/tools/offcputime.py (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1#!/usr/bin/env python
2#
3# offcputime    Summarize off-CPU time by stack trace
4#               For Linux, uses BCC, eBPF.
5#
6# USAGE: offcputime [-h] [-p PID | -t TID | -u | -k] [-U | -K] [-d] [-f] [-s]
7#                   [--stack-storage-size STACK_STORAGE_SIZE]
8#                   [-m MIN_BLOCK_TIME] [-M MAX_BLOCK_TIME] [--state STATE]
9#                   [duration]
10#
11# Copyright 2016 Netflix, Inc.
12# Licensed under the Apache License, Version 2.0 (the "License")
13#
14# 13-Jan-2016	Brendan Gregg	Created this.
15# 27-Mar-2023	Rocky Xing      Added option to show symbol offsets.
16# 04-Apr-2023   Rocky Xing      Updated default stack storage size.
17
18from __future__ import print_function
19from bcc import BPF
20from sys import stderr
21import argparse
22import errno
23import signal
24
25# arg validation
26def positive_int(val):
27    try:
28        ival = int(val)
29    except ValueError:
30        raise argparse.ArgumentTypeError("must be an integer")
31
32    if ival < 0:
33        raise argparse.ArgumentTypeError("must be positive")
34    return ival
35
36def positive_nonzero_int(val):
37    ival = positive_int(val)
38    if ival == 0:
39        raise argparse.ArgumentTypeError("must be nonzero")
40    return ival
41
42def stack_id_err(stack_id):
43    # -EFAULT in get_stackid normally means the stack-trace is not available,
44    # Such as getting kernel stack trace in userspace code
45    return (stack_id < 0) and (stack_id != -errno.EFAULT)
46
47# arguments
48examples = """examples:
49    ./offcputime             # trace off-CPU stack time until Ctrl-C
50    ./offcputime 5           # trace for 5 seconds only
51    ./offcputime -f 5        # 5 seconds, and output in folded format
52    ./offcputime -s 5        # 5 seconds, and show symbol offsets
53    ./offcputime -m 1000     # trace only events that last more than 1000 usec
54    ./offcputime -M 10000    # trace only events that last less than 10000 usec
55    ./offcputime -p 185      # only trace threads for PID 185
56    ./offcputime -t 188      # only trace thread 188
57    ./offcputime -u          # only trace user threads (no kernel)
58    ./offcputime -k          # only trace kernel threads (no user)
59    ./offcputime -U          # only show user space stacks (no kernel)
60    ./offcputime -K          # only show kernel space stacks (no user)
61"""
62parser = argparse.ArgumentParser(
63    description="Summarize off-CPU time by stack trace",
64    formatter_class=argparse.RawDescriptionHelpFormatter,
65    epilog=examples)
66thread_group = parser.add_mutually_exclusive_group()
67# Note: this script provides --pid and --tid flags but their arguments are
68# referred to internally using kernel nomenclature: TGID and PID.
69thread_group.add_argument("-p", "--pid", metavar="PID", dest="tgid",
70    help="trace this PID only", type=positive_int)
71thread_group.add_argument("-t", "--tid", metavar="TID", dest="pid",
72    help="trace this TID only", type=positive_int)
73thread_group.add_argument("-u", "--user-threads-only", action="store_true",
74    help="user threads only (no kernel threads)")
75thread_group.add_argument("-k", "--kernel-threads-only", action="store_true",
76    help="kernel threads only (no user threads)")
77stack_group = parser.add_mutually_exclusive_group()
78stack_group.add_argument("-U", "--user-stacks-only", action="store_true",
79    help="show stacks from user space only (no kernel space stacks)")
80stack_group.add_argument("-K", "--kernel-stacks-only", action="store_true",
81    help="show stacks from kernel space only (no user space stacks)")
82parser.add_argument("-d", "--delimited", action="store_true",
83    help="insert delimiter between kernel/user stacks")
84parser.add_argument("-f", "--folded", action="store_true",
85    help="output folded format")
86parser.add_argument("-s", "--offset", action="store_true",
87    help="show address offsets")
88parser.add_argument("--stack-storage-size", default=16384,
89    type=positive_nonzero_int,
90    help="the number of unique stack traces that can be stored and "
91         "displayed (default 16384)")
92parser.add_argument("duration", nargs="?", default=99999999,
93    type=positive_nonzero_int,
94    help="duration of trace, in seconds")
95parser.add_argument("-m", "--min-block-time", default=1,
96    type=positive_nonzero_int,
97    help="the amount of time in microseconds over which we " +
98         "store traces (default 1)")
99parser.add_argument("-M", "--max-block-time", default=(1 << 64) - 1,
100    type=positive_nonzero_int,
101    help="the amount of time in microseconds under which we " +
102         "store traces (default U64_MAX)")
103parser.add_argument("--state", type=positive_int,
104    help="filter on this thread state bitmask (eg, 2 == TASK_UNINTERRUPTIBLE" +
105         ") see include/linux/sched.h")
106parser.add_argument("--ebpf", action="store_true",
107    help=argparse.SUPPRESS)
108args = parser.parse_args()
109folded = args.folded
110duration = int(args.duration)
111debug = 0
112
113if args.folded and args.offset:
114    print("ERROR: can only use -f or -s. Exiting.")
115    exit()
116
117# signal handler
118def signal_ignore(signal, frame):
119    print()
120
121# define BPF program
122bpf_text = """
123#include <uapi/linux/ptrace.h>
124#include <linux/sched.h>
125
126#define MINBLOCK_US    MINBLOCK_US_VALUEULL
127#define MAXBLOCK_US    MAXBLOCK_US_VALUEULL
128
129struct key_t {
130    u32 pid;
131    u32 tgid;
132    int user_stack_id;
133    int kernel_stack_id;
134    char name[TASK_COMM_LEN];
135};
136BPF_HASH(counts, struct key_t);
137BPF_HASH(start, u32);
138BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE);
139
140struct warn_event_t {
141    u32 pid;
142    u32 tgid;
143    u32 t_start;
144    u32 t_end;
145};
146BPF_PERF_OUTPUT(warn_events);
147
148int oncpu(struct pt_regs *ctx, struct task_struct *prev) {
149    u32 pid = prev->pid;
150    u32 tgid = prev->tgid;
151    u64 ts, *tsp;
152
153    // record previous thread sleep time
154    if ((THREAD_FILTER) && (STATE_FILTER)) {
155        ts = bpf_ktime_get_ns();
156        start.update(&pid, &ts);
157    }
158
159    // get the current thread's start time
160    pid = bpf_get_current_pid_tgid();
161    tgid = bpf_get_current_pid_tgid() >> 32;
162    tsp = start.lookup(&pid);
163    if (tsp == 0) {
164        return 0;        // missed start or filtered
165    }
166
167    // calculate current thread's delta time
168    u64 t_start = *tsp;
169    u64 t_end = bpf_ktime_get_ns();
170    start.delete(&pid);
171    if (t_start > t_end) {
172        struct warn_event_t event = {
173            .pid = pid,
174            .tgid = tgid,
175            .t_start = t_start,
176            .t_end = t_end,
177        };
178        warn_events.perf_submit(ctx, &event, sizeof(event));
179        return 0;
180    }
181    u64 delta = t_end - t_start;
182    delta = delta / 1000;
183    if ((delta < MINBLOCK_US) || (delta > MAXBLOCK_US)) {
184        return 0;
185    }
186
187    // create map key
188    struct key_t key = {};
189
190    key.pid = pid;
191    key.tgid = tgid;
192    key.user_stack_id = USER_STACK_GET;
193    key.kernel_stack_id = KERNEL_STACK_GET;
194    bpf_get_current_comm(&key.name, sizeof(key.name));
195
196    counts.increment(key, delta);
197    return 0;
198}
199"""
200
201# set thread filter
202thread_context = ""
203if args.tgid is not None:
204    thread_context = "PID %d" % args.tgid
205    thread_filter = 'tgid == %d' % args.tgid
206elif args.pid is not None:
207    thread_context = "TID %d" % args.pid
208    thread_filter = 'pid == %d' % args.pid
209elif args.user_threads_only:
210    thread_context = "user threads"
211    thread_filter = '!(prev->flags & PF_KTHREAD)'
212elif args.kernel_threads_only:
213    thread_context = "kernel threads"
214    thread_filter = 'prev->flags & PF_KTHREAD'
215else:
216    thread_context = "all threads"
217    thread_filter = '1'
218if args.state == 0:
219    state_filter = 'prev->STATE_FIELD == 0'
220elif args.state:
221    # these states are sometimes bitmask checked
222    state_filter = 'prev->STATE_FIELD & %d' % args.state
223else:
224    state_filter = '1'
225bpf_text = bpf_text.replace('THREAD_FILTER', thread_filter)
226bpf_text = bpf_text.replace('STATE_FILTER', state_filter)
227if BPF.kernel_struct_has_field(b'task_struct', b'__state') == 1:
228    bpf_text = bpf_text.replace('STATE_FIELD', '__state')
229else:
230    bpf_text = bpf_text.replace('STATE_FIELD', 'state')
231
232# set stack storage size
233bpf_text = bpf_text.replace('STACK_STORAGE_SIZE', str(args.stack_storage_size))
234bpf_text = bpf_text.replace('MINBLOCK_US_VALUE', str(args.min_block_time))
235bpf_text = bpf_text.replace('MAXBLOCK_US_VALUE', str(args.max_block_time))
236
237# handle stack args
238kernel_stack_get = "stack_traces.get_stackid(ctx, 0)"
239user_stack_get = "stack_traces.get_stackid(ctx, BPF_F_USER_STACK)"
240stack_context = ""
241if args.user_stacks_only:
242    stack_context = "user"
243    kernel_stack_get = "-1"
244elif args.kernel_stacks_only:
245    stack_context = "kernel"
246    user_stack_get = "-1"
247else:
248    stack_context = "user + kernel"
249bpf_text = bpf_text.replace('USER_STACK_GET', user_stack_get)
250bpf_text = bpf_text.replace('KERNEL_STACK_GET', kernel_stack_get)
251
252need_delimiter = args.delimited and not (args.kernel_stacks_only or
253                                         args.user_stacks_only)
254
255# check for an edge case; the code below will handle this case correctly
256# but ultimately nothing will be displayed
257if args.kernel_threads_only and args.user_stacks_only:
258    print("ERROR: Displaying user stacks for kernel threads " +
259          "doesn't make sense.", file=stderr)
260    exit(2)
261
262if debug or args.ebpf:
263    print(bpf_text)
264    if args.ebpf:
265        print("ERROR: Exiting")
266        exit(3)
267
268# initialize BPF
269b = BPF(text=bpf_text)
270b.attach_kprobe(event_re="^finish_task_switch$|^finish_task_switch\.isra\.\d$",
271                fn_name="oncpu")
272matched = b.num_open_kprobes()
273if matched == 0:
274    print("error: 0 functions traced. Exiting.", file=stderr)
275    exit(4)
276
277# header
278if not folded:
279    print("Tracing off-CPU time (us) of %s by %s stack" %
280        (thread_context, stack_context), end="")
281    if duration < 99999999:
282        print(" for %d secs." % duration)
283    else:
284        print("... Hit Ctrl-C to end.")
285
286
287def print_warn_event(cpu, data, size):
288    event = b["warn_events"].event(data)
289    # See https://github.com/iovisor/bcc/pull/3227 for those wondering how can this happen.
290    print("WARN: Skipped an event with negative duration: pid:%d, tgid:%d, off-cpu:%d, on-cpu:%d"
291          % (event.pid, event.tgid, event.t_start, event.t_end),
292          file=stderr)
293
294b["warn_events"].open_perf_buffer(print_warn_event)
295try:
296    duration_ms = duration * 1000
297    start_time_ms = int(BPF.monotonic_time() / 1000000)
298    while True:
299        elapsed_ms = int(BPF.monotonic_time() / 1000000) - start_time_ms
300        if elapsed_ms >= duration_ms:
301            break
302        b.perf_buffer_poll(timeout=duration_ms - elapsed_ms)
303except KeyboardInterrupt:
304    # as cleanup can take many seconds, trap Ctrl-C:
305    signal.signal(signal.SIGINT, signal_ignore)
306
307if not folded:
308    print()
309
310show_offset = False
311if args.offset:
312    show_offset = True
313
314missing_stacks = 0
315has_enomem = False
316counts = b.get_table("counts")
317stack_traces = b.get_table("stack_traces")
318for k, v in sorted(counts.items(), key=lambda counts: counts[1].value):
319    # handle get_stackid errors
320    if not args.user_stacks_only and stack_id_err(k.kernel_stack_id):
321        missing_stacks += 1
322        has_enomem = has_enomem or k.kernel_stack_id == -errno.ENOMEM
323    if not args.kernel_stacks_only and stack_id_err(k.user_stack_id):
324        missing_stacks += 1
325        has_enomem = has_enomem or k.user_stack_id == -errno.ENOMEM
326
327    # user stacks will be symbolized by tgid, not pid, to avoid the overhead
328    # of one symbol resolver per thread
329    user_stack = [] if k.user_stack_id < 0 else \
330        stack_traces.walk(k.user_stack_id)
331    kernel_stack = [] if k.kernel_stack_id < 0 else \
332        stack_traces.walk(k.kernel_stack_id)
333
334    if folded:
335        # print folded stack output
336        user_stack = list(user_stack)
337        kernel_stack = list(kernel_stack)
338        line = [k.name.decode('utf-8', 'replace')]
339        # if we failed to get the stack is, such as due to no space (-ENOMEM) or
340        # hash collision (-EEXIST), we still print a placeholder for consistency
341        if not args.kernel_stacks_only:
342            if stack_id_err(k.user_stack_id):
343                line.append("[Missed User Stack]")
344            else:
345                line.extend([b.sym(addr, k.tgid).decode('utf-8', 'replace')
346                    for addr in reversed(user_stack)])
347        if not args.user_stacks_only:
348            line.extend(["-"] if (need_delimiter and k.kernel_stack_id >= 0 and k.user_stack_id >= 0) else [])
349            if stack_id_err(k.kernel_stack_id):
350                line.append("[Missed Kernel Stack]")
351            else:
352                line.extend([b.ksym(addr).decode('utf-8', 'replace')
353                    for addr in reversed(kernel_stack)])
354        print("%s %d" % (";".join(line), v.value))
355    else:
356        # print default multi-line stack output
357        if not args.user_stacks_only:
358            if stack_id_err(k.kernel_stack_id):
359                print("    [Missed Kernel Stack]")
360            else:
361                for addr in kernel_stack:
362                    print("    %s" % b.ksym(addr, show_offset=show_offset).decode('utf-8', 'replace'))
363        if not args.kernel_stacks_only:
364            if need_delimiter and k.user_stack_id >= 0 and k.kernel_stack_id >= 0:
365                print("    --")
366            if stack_id_err(k.user_stack_id):
367                print("    [Missed User Stack]")
368            else:
369                for addr in user_stack:
370                    print("    %s" % b.sym(addr, k.tgid, show_offset=show_offset).decode('utf-8', 'replace'))
371        print("    %-16s %s (%d)" % ("-", k.name.decode('utf-8', 'replace'), k.pid))
372        print("        %d\n" % v.value)
373
374if missing_stacks > 0:
375    enomem_str = "" if not has_enomem else \
376        " Consider increasing --stack-storage-size."
377    print("WARNING: %d stack traces lost and could not be displayed.%s" %
378        (missing_stacks, enomem_str),
379        file=stderr)
380