xref: /aosp_15_r20/external/bcc/tools/execsnoop.py (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1#!/usr/bin/env python
2# @lint-avoid-python-3-compatibility-imports
3#
4# execsnoop Trace new processes via exec() syscalls.
5#           For Linux, uses BCC, eBPF. Embedded C.
6#
7# USAGE: execsnoop [-h] [-T] [-t] [-x] [--cgroupmap CGROUPMAP]
8#                  [--mntnsmap MNTNSMAP] [-u USER] [-q] [-n NAME] [-l LINE]
9#                  [-U] [--max-args MAX_ARGS] [-P PPID]
10#
11# This currently will print up to a maximum of 19 arguments, plus the process
12# name, so 20 fields in total (MAXARG).
13#
14# This won't catch all new processes: an application may fork() but not exec().
15#
16# Copyright 2016 Netflix, Inc.
17# Licensed under the Apache License, Version 2.0 (the "License")
18#
19# 07-Feb-2016   Brendan Gregg   Created this.
20# 11-Aug-2022   Rocky Xing      Added PPID filter support.
21
22from __future__ import print_function
23from bcc import BPF
24from bcc.containers import filter_by_containers
25from bcc.utils import ArgString, printb
26import bcc.utils as utils
27import argparse
28import re
29import time
30import pwd
31from collections import defaultdict
32from time import strftime
33
34
35def parse_uid(user):
36    try:
37        result = int(user)
38    except ValueError:
39        try:
40            user_info = pwd.getpwnam(user)
41        except KeyError:
42            raise argparse.ArgumentTypeError(
43                "{0!r} is not valid UID or user entry".format(user))
44        else:
45            return user_info.pw_uid
46    else:
47        # Maybe validate if UID < 0 ?
48        return result
49
50
51# arguments
52examples = """examples:
53    ./execsnoop                      # trace all exec() syscalls
54    ./execsnoop -x                   # include failed exec()s
55    ./execsnoop -T                   # include time (HH:MM:SS)
56    ./execsnoop -P 181               # only trace new processes whose parent PID is 181
57    ./execsnoop -U                   # include UID
58    ./execsnoop -u 1000              # only trace UID 1000
59    ./execsnoop -u user              # get user UID and trace only them
60    ./execsnoop -t                   # include timestamps
61    ./execsnoop -q                   # add "quotemarks" around arguments
62    ./execsnoop -n main              # only print command lines containing "main"
63    ./execsnoop -l tpkg              # only print command where arguments contains "tpkg"
64    ./execsnoop --cgroupmap mappath  # only trace cgroups in this BPF map
65    ./execsnoop --mntnsmap mappath   # only trace mount namespaces in the map
66"""
67parser = argparse.ArgumentParser(
68    description="Trace exec() syscalls",
69    formatter_class=argparse.RawDescriptionHelpFormatter,
70    epilog=examples)
71parser.add_argument("-T", "--time", action="store_true",
72    help="include time column on output (HH:MM:SS)")
73parser.add_argument("-t", "--timestamp", action="store_true",
74    help="include timestamp on output")
75parser.add_argument("-x", "--fails", action="store_true",
76    help="include failed exec()s")
77parser.add_argument("--cgroupmap",
78    help="trace cgroups in this BPF map only")
79parser.add_argument("--mntnsmap",
80    help="trace mount namespaces in this BPF map only")
81parser.add_argument("-u", "--uid", type=parse_uid, metavar='USER',
82    help="trace this UID only")
83parser.add_argument("-q", "--quote", action="store_true",
84    help="Add quotemarks (\") around arguments."
85    )
86parser.add_argument("-n", "--name",
87    type=ArgString,
88    help="only print commands matching this name (regex), any arg")
89parser.add_argument("-l", "--line",
90    type=ArgString,
91    help="only print commands where arg contains this line (regex)")
92parser.add_argument("-U", "--print-uid", action="store_true",
93    help="print UID column")
94parser.add_argument("--max-args", default="20",
95    help="maximum number of arguments parsed and displayed, defaults to 20")
96parser.add_argument("-P", "--ppid",
97    help="trace this parent PID only")
98parser.add_argument("--ebpf", action="store_true",
99    help=argparse.SUPPRESS)
100args = parser.parse_args()
101
102# define BPF program
103bpf_text = """
104#include <uapi/linux/ptrace.h>
105#include <linux/sched.h>
106#include <linux/fs.h>
107
108#define ARGSIZE  128
109
110enum event_type {
111    EVENT_ARG,
112    EVENT_RET,
113};
114
115struct data_t {
116    u32 pid;  // PID as in the userspace term (i.e. task->tgid in kernel)
117    u32 ppid; // Parent PID as in the userspace term (i.e task->real_parent->tgid in kernel)
118    u32 uid;
119    char comm[TASK_COMM_LEN];
120    enum event_type type;
121    char argv[ARGSIZE];
122    int retval;
123};
124
125BPF_PERF_OUTPUT(events);
126
127static int __submit_arg(struct pt_regs *ctx, void *ptr, struct data_t *data)
128{
129    bpf_probe_read_user(data->argv, sizeof(data->argv), ptr);
130    events.perf_submit(ctx, data, sizeof(struct data_t));
131    return 1;
132}
133
134static int submit_arg(struct pt_regs *ctx, void *ptr, struct data_t *data)
135{
136    const char *argp = NULL;
137    bpf_probe_read_user(&argp, sizeof(argp), ptr);
138    if (argp) {
139        return __submit_arg(ctx, (void *)(argp), data);
140    }
141    return 0;
142}
143
144int syscall__execve(struct pt_regs *ctx,
145    const char __user *filename,
146    const char __user *const __user *__argv,
147    const char __user *const __user *__envp)
148{
149
150    u32 uid = bpf_get_current_uid_gid() & 0xffffffff;
151
152    UID_FILTER
153
154    if (container_should_be_filtered()) {
155        return 0;
156    }
157
158    // create data here and pass to submit_arg to save stack space (#555)
159    struct data_t data = {};
160    struct task_struct *task;
161
162    data.pid = bpf_get_current_pid_tgid() >> 32;
163
164    task = (struct task_struct *)bpf_get_current_task();
165    // Some kernels, like Ubuntu 4.13.0-generic, return 0
166    // as the real_parent->tgid.
167    // We use the get_ppid function as a fallback in those cases. (#1883)
168    data.ppid = task->real_parent->tgid;
169
170    PPID_FILTER
171
172    bpf_get_current_comm(&data.comm, sizeof(data.comm));
173    data.type = EVENT_ARG;
174
175    __submit_arg(ctx, (void *)filename, &data);
176
177    // skip first arg, as we submitted filename
178    #pragma unroll
179    for (int i = 1; i < MAXARG; i++) {
180        if (submit_arg(ctx, (void *)&__argv[i], &data) == 0)
181             goto out;
182    }
183
184    // handle truncated argument list
185    char ellipsis[] = "...";
186    __submit_arg(ctx, (void *)ellipsis, &data);
187out:
188    return 0;
189}
190
191int do_ret_sys_execve(struct pt_regs *ctx)
192{
193    if (container_should_be_filtered()) {
194        return 0;
195    }
196
197    struct data_t data = {};
198    struct task_struct *task;
199
200    u32 uid = bpf_get_current_uid_gid() & 0xffffffff;
201    UID_FILTER
202
203    data.pid = bpf_get_current_pid_tgid() >> 32;
204    data.uid = uid;
205
206    task = (struct task_struct *)bpf_get_current_task();
207    // Some kernels, like Ubuntu 4.13.0-generic, return 0
208    // as the real_parent->tgid.
209    // We use the get_ppid function as a fallback in those cases. (#1883)
210    data.ppid = task->real_parent->tgid;
211
212    PPID_FILTER
213
214    bpf_get_current_comm(&data.comm, sizeof(data.comm));
215    data.type = EVENT_RET;
216    data.retval = PT_REGS_RC(ctx);
217    events.perf_submit(ctx, &data, sizeof(data));
218
219    return 0;
220}
221"""
222
223bpf_text = bpf_text.replace("MAXARG", args.max_args)
224
225if args.uid:
226    bpf_text = bpf_text.replace('UID_FILTER',
227        'if (uid != %s) { return 0; }' % args.uid)
228else:
229    bpf_text = bpf_text.replace('UID_FILTER', '')
230
231if args.ppid:
232    bpf_text = bpf_text.replace('PPID_FILTER',
233        'if (data.ppid != %s) { return 0; }' % args.ppid)
234else:
235    bpf_text = bpf_text.replace('PPID_FILTER', '')
236
237bpf_text = filter_by_containers(args) + bpf_text
238if args.ebpf:
239    print(bpf_text)
240    exit()
241
242# initialize BPF
243b = BPF(text=bpf_text)
244execve_fnname = b.get_syscall_fnname("execve")
245b.attach_kprobe(event=execve_fnname, fn_name="syscall__execve")
246b.attach_kretprobe(event=execve_fnname, fn_name="do_ret_sys_execve")
247
248# header
249if args.time:
250    print("%-9s" % ("TIME"), end="")
251if args.timestamp:
252    print("%-8s" % ("TIME(s)"), end="")
253if args.print_uid:
254    print("%-6s" % ("UID"), end="")
255print("%-16s %-7s %-7s %3s %s" % ("PCOMM", "PID", "PPID", "RET", "ARGS"))
256
257class EventType(object):
258    EVENT_ARG = 0
259    EVENT_RET = 1
260
261start_ts = time.time()
262argv = defaultdict(list)
263
264# This is best-effort PPID matching. Short-lived processes may exit
265# before we get a chance to read the PPID.
266# This is a fallback for when fetching the PPID from task->real_parent->tgip
267# returns 0, which happens in some kernel versions.
268def get_ppid(pid):
269    try:
270        with open("/proc/%d/status" % pid) as status:
271            for line in status:
272                if line.startswith("PPid:"):
273                    return int(line.split()[1])
274    except IOError:
275        pass
276    return 0
277
278# process event
279def print_event(cpu, data, size):
280    event = b["events"].event(data)
281    skip = False
282
283    if event.type == EventType.EVENT_ARG:
284        argv[event.pid].append(event.argv)
285    elif event.type == EventType.EVENT_RET:
286        if event.retval != 0 and not args.fails:
287            skip = True
288        if args.name and not re.search(bytes(args.name), event.comm):
289            skip = True
290        if args.line and not re.search(bytes(args.line),
291                                       b' '.join(argv[event.pid])):
292            skip = True
293        if args.quote:
294            argv[event.pid] = [
295                b"\"" + arg.replace(b"\"", b"\\\"") + b"\""
296                for arg in argv[event.pid]
297            ]
298
299        if not skip:
300            if args.time:
301                printb(b"%-9s" % strftime("%H:%M:%S").encode('ascii'), nl="")
302            if args.timestamp:
303                printb(b"%-8.3f" % (time.time() - start_ts), nl="")
304            if args.print_uid:
305                printb(b"%-6d" % event.uid, nl="")
306            ppid = event.ppid if event.ppid > 0 else get_ppid(event.pid)
307            ppid = b"%d" % ppid if ppid > 0 else b"?"
308            argv_text = b' '.join(argv[event.pid]).replace(b'\n', b'\\n')
309            printb(b"%-16s %-7d %-7s %3d %s" % (event.comm, event.pid,
310                   ppid, event.retval, argv_text))
311        try:
312            del(argv[event.pid])
313        except Exception:
314            pass
315
316
317# loop with callback to print_event
318b["events"].open_perf_buffer(print_event)
319while 1:
320    try:
321        b.perf_buffer_poll()
322    except KeyboardInterrupt:
323        exit()
324