xref: /aosp_15_r20/external/bcc/tools/mountsnoop.py (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1#!/usr/bin/env python
2#
3# mountsnoop Trace mount() and umount syscalls.
4#            For Linux, uses BCC, eBPF. Embedded C.
5#
6# USAGE: mountsnoop [-h]
7#
8# Copyright (c) 2016 Facebook, Inc.
9# Licensed under the Apache License, Version 2.0 (the "License")
10#
11# 14-Oct-2016   Omar Sandoval   Created this.
12
13from __future__ import print_function
14import argparse
15import bcc
16from bcc.containers import filter_by_containers
17import ctypes
18import errno
19import functools
20import sys
21
22
23bpf_text = r"""
24#include <uapi/linux/ptrace.h>
25#include <linux/sched.h>
26
27#include <linux/nsproxy.h>
28#include <linux/ns_common.h>
29
30/*
31 * XXX: struct mnt_namespace is defined in fs/mount.h, which is private to the
32 * VFS and not installed in any kernel-devel packages. So, let's duplicate the
33 * important part of the definition. There are actually more members in the
34 * real struct, but we don't need them, and they're more likely to change.
35 *
36 * To add support for --selector option, we need to call filter_by_containers().
37 * But this function adds code which defines struct mnt_namespace.
38 * To avoid having this structure twice, we define MNT_NAMESPACE_DEFINED in
39 * filter_by_containers(), then here we check if macro is already defined before
40 * adding struct definition.
41 */
42#ifndef MNT_NAMESPACE_DEFINED
43struct mnt_namespace {
44    // This field was removed in https://github.com/torvalds/linux/commit/1a7b8969e664d6af328f00fe6eb7aabd61a71d13
45    #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 11, 0)
46    atomic_t count;
47    #endif
48    struct ns_common ns;
49};
50#endif /* !MNT_NAMESPACE_DEFINED */
51
52/*
53 * XXX: this could really use first-class string support in BPF. target is a
54 * NUL-terminated path up to PATH_MAX in length. source and type are
55 * NUL-terminated strings up to PAGE_SIZE in length. data is a weird case: it's
56 * almost always a NUL-terminated string, but for some filesystems (e.g., older
57 * NFS variants), it's a binary structure with plenty of NUL bytes, so the
58 * kernel always copies up to PAGE_SIZE bytes, stopping when it hits a fault.
59 *
60 * The best we can do with the existing BPF helpers is to copy as much of each
61 * argument as we can. Our stack space is limited, and we need to leave some
62 * headroom for the rest of the function, so this should be a decent value.
63 */
64#define MAX_STR_LEN 412
65
66enum event_type {
67    EVENT_MOUNT,
68    EVENT_MOUNT_SOURCE,
69    EVENT_MOUNT_TARGET,
70    EVENT_MOUNT_TYPE,
71    EVENT_MOUNT_DATA,
72    EVENT_MOUNT_RET,
73    EVENT_UMOUNT,
74    EVENT_UMOUNT_TARGET,
75    EVENT_UMOUNT_RET,
76};
77
78struct data_t {
79    enum event_type type;
80    pid_t pid, tgid;
81    union {
82        /* EVENT_MOUNT, EVENT_UMOUNT */
83        struct {
84            /* current->nsproxy->mnt_ns->ns.inum */
85            unsigned int mnt_ns;
86            char comm[TASK_COMM_LEN];
87            char pcomm[TASK_COMM_LEN];
88            pid_t ppid;
89            unsigned long flags;
90        } enter;
91        /*
92         * EVENT_MOUNT_SOURCE, EVENT_MOUNT_TARGET, EVENT_MOUNT_TYPE,
93         * EVENT_MOUNT_DATA, EVENT_UMOUNT_TARGET
94         */
95        char str[MAX_STR_LEN];
96        /* EVENT_MOUNT_RET, EVENT_UMOUNT_RET */
97        int retval;
98    };
99};
100
101BPF_PERF_OUTPUT(events);
102
103int syscall__mount(struct pt_regs *ctx, char __user *source,
104                      char __user *target, char __user *type,
105                      unsigned long flags, char __user *data)
106{
107    struct data_t event = {};
108    struct task_struct *task;
109    struct nsproxy *nsproxy;
110    struct mnt_namespace *mnt_ns;
111
112    if (container_should_be_filtered()) {
113        return 0;
114    }
115
116    event.pid = bpf_get_current_pid_tgid() & 0xffffffff;
117    event.tgid = bpf_get_current_pid_tgid() >> 32;
118
119    event.type = EVENT_MOUNT;
120    bpf_get_current_comm(event.enter.comm, sizeof(event.enter.comm));
121    event.enter.flags = flags;
122    task = (struct task_struct *)bpf_get_current_task();
123    event.enter.ppid = task->real_parent->tgid;
124    bpf_probe_read_kernel_str(&event.enter.pcomm, TASK_COMM_LEN, task->real_parent->comm);
125    nsproxy = task->nsproxy;
126    mnt_ns = nsproxy->mnt_ns;
127    event.enter.mnt_ns = mnt_ns->ns.inum;
128    events.perf_submit(ctx, &event, sizeof(event));
129
130    event.type = EVENT_MOUNT_SOURCE;
131    __builtin_memset(event.str, 0, sizeof(event.str));
132    bpf_probe_read_user(event.str, sizeof(event.str), source);
133    events.perf_submit(ctx, &event, sizeof(event));
134
135    event.type = EVENT_MOUNT_TARGET;
136    __builtin_memset(event.str, 0, sizeof(event.str));
137    bpf_probe_read_user(event.str, sizeof(event.str), target);
138    events.perf_submit(ctx, &event, sizeof(event));
139
140    event.type = EVENT_MOUNT_TYPE;
141    __builtin_memset(event.str, 0, sizeof(event.str));
142    bpf_probe_read_user(event.str, sizeof(event.str), type);
143    events.perf_submit(ctx, &event, sizeof(event));
144
145    event.type = EVENT_MOUNT_DATA;
146    __builtin_memset(event.str, 0, sizeof(event.str));
147    bpf_probe_read_user(event.str, sizeof(event.str), data);
148    events.perf_submit(ctx, &event, sizeof(event));
149
150    return 0;
151}
152
153int do_ret_sys_mount(struct pt_regs *ctx)
154{
155    struct data_t event = {};
156
157    event.type = EVENT_MOUNT_RET;
158    event.pid = bpf_get_current_pid_tgid() & 0xffffffff;
159    event.tgid = bpf_get_current_pid_tgid() >> 32;
160    event.retval = PT_REGS_RC(ctx);
161    events.perf_submit(ctx, &event, sizeof(event));
162
163    return 0;
164}
165
166int syscall__umount(struct pt_regs *ctx, char __user *target, int flags)
167{
168    struct data_t event = {};
169    struct task_struct *task;
170    struct nsproxy *nsproxy;
171    struct mnt_namespace *mnt_ns;
172
173    if (container_should_be_filtered()) {
174        return 0;
175    }
176
177    event.pid = bpf_get_current_pid_tgid() & 0xffffffff;
178    event.tgid = bpf_get_current_pid_tgid() >> 32;
179
180    event.type = EVENT_UMOUNT;
181    bpf_get_current_comm(event.enter.comm, sizeof(event.enter.comm));
182    event.enter.flags = flags;
183    task = (struct task_struct *)bpf_get_current_task();
184    event.enter.ppid = task->real_parent->tgid;
185    bpf_probe_read_kernel_str(&event.enter.pcomm, TASK_COMM_LEN, task->real_parent->comm);
186    nsproxy = task->nsproxy;
187    mnt_ns = nsproxy->mnt_ns;
188    event.enter.mnt_ns = mnt_ns->ns.inum;
189    events.perf_submit(ctx, &event, sizeof(event));
190
191    event.type = EVENT_UMOUNT_TARGET;
192    __builtin_memset(event.str, 0, sizeof(event.str));
193    bpf_probe_read_user(event.str, sizeof(event.str), target);
194    events.perf_submit(ctx, &event, sizeof(event));
195
196    return 0;
197}
198
199int do_ret_sys_umount(struct pt_regs *ctx)
200{
201    struct data_t event = {};
202
203    event.type = EVENT_UMOUNT_RET;
204    event.pid = bpf_get_current_pid_tgid() & 0xffffffff;
205    event.tgid = bpf_get_current_pid_tgid() >> 32;
206    event.retval = PT_REGS_RC(ctx);
207    events.perf_submit(ctx, &event, sizeof(event));
208
209    return 0;
210}
211"""
212
213# sys/mount.h
214MS_MGC_VAL = 0xc0ed0000
215MS_MGC_MSK = 0xffff0000
216MOUNT_FLAGS = [
217    ('MS_RDONLY', 1),
218    ('MS_NOSUID', 2),
219    ('MS_NODEV', 4),
220    ('MS_NOEXEC', 8),
221    ('MS_SYNCHRONOUS', 16),
222    ('MS_REMOUNT', 32),
223    ('MS_MANDLOCK', 64),
224    ('MS_DIRSYNC', 128),
225    ('MS_NOATIME', 1024),
226    ('MS_NODIRATIME', 2048),
227    ('MS_BIND', 4096),
228    ('MS_MOVE', 8192),
229    ('MS_REC', 16384),
230    ('MS_SILENT', 32768),
231    ('MS_POSIXACL', 1 << 16),
232    ('MS_UNBINDABLE', 1 << 17),
233    ('MS_PRIVATE', 1 << 18),
234    ('MS_SLAVE', 1 << 19),
235    ('MS_SHARED', 1 << 20),
236    ('MS_RELATIME', 1 << 21),
237    ('MS_KERNMOUNT', 1 << 22),
238    ('MS_I_VERSION', 1 << 23),
239    ('MS_STRICTATIME', 1 << 24),
240    ('MS_LAZYTIME', 1 << 25),
241    ('MS_ACTIVE', 1 << 30),
242    ('MS_NOUSER', 1 << 31),
243]
244UMOUNT_FLAGS = [
245    ('MNT_FORCE', 1),
246    ('MNT_DETACH', 2),
247    ('MNT_EXPIRE', 4),
248    ('UMOUNT_NOFOLLOW', 8),
249]
250
251
252TASK_COMM_LEN = 16  # linux/sched.h
253MAX_STR_LEN = 412
254
255
256class EventType(object):
257    EVENT_MOUNT = 0
258    EVENT_MOUNT_SOURCE = 1
259    EVENT_MOUNT_TARGET = 2
260    EVENT_MOUNT_TYPE = 3
261    EVENT_MOUNT_DATA = 4
262    EVENT_MOUNT_RET = 5
263    EVENT_UMOUNT = 6
264    EVENT_UMOUNT_TARGET = 7
265    EVENT_UMOUNT_RET = 8
266
267
268class EnterData(ctypes.Structure):
269    _fields_ = [
270        ('mnt_ns', ctypes.c_uint),
271        ('comm', ctypes.c_char * TASK_COMM_LEN),
272        ('pcomm', ctypes.c_char * TASK_COMM_LEN),
273        ('ppid', ctypes.c_uint),
274        ('flags', ctypes.c_ulong),
275    ]
276
277
278class DataUnion(ctypes.Union):
279    _fields_ = [
280        ('enter', EnterData),
281        ('str', ctypes.c_char * MAX_STR_LEN),
282        ('retval', ctypes.c_int),
283    ]
284
285
286class Event(ctypes.Structure):
287    _fields_ = [
288        ('type', ctypes.c_uint),
289        ('pid', ctypes.c_uint),
290        ('tgid', ctypes.c_uint),
291        ('union', DataUnion),
292    ]
293
294
295def _decode_flags(flags, flag_list):
296    str_flags = []
297    for flag, bit in flag_list:
298        if flags & bit:
299            str_flags.append(flag)
300        flags &= ~bit
301    if flags or not str_flags:
302        str_flags.append('0x{:x}'.format(flags))
303    return str_flags
304
305
306def decode_flags(flags, flag_list):
307    return '|'.join(_decode_flags(flags, flag_list))
308
309
310def decode_mount_flags(flags):
311    str_flags = []
312    if flags & MS_MGC_MSK == MS_MGC_VAL:
313        flags &= ~MS_MGC_MSK
314        str_flags.append('MS_MGC_VAL')
315    str_flags.extend(_decode_flags(flags, MOUNT_FLAGS))
316    return '|'.join(str_flags)
317
318
319def decode_umount_flags(flags):
320    return decode_flags(flags, UMOUNT_FLAGS)
321
322
323def decode_errno(retval):
324    try:
325        return '-' + errno.errorcode[-retval]
326    except KeyError:
327        return str(retval)
328
329
330_escape_chars = {
331    ord('\a'): '\\a',
332    ord('\b'): '\\b',
333    ord('\t'): '\\t',
334    ord('\n'): '\\n',
335    ord('\v'): '\\v',
336    ord('\f'): '\\f',
337    ord('\r'): '\\r',
338    ord('"'): '\\"',
339    ord('\\'): '\\\\',
340}
341
342
343def escape_character(c):
344    try:
345        return _escape_chars[c]
346    except KeyError:
347        if 0x20 <= c <= 0x7e:
348            return chr(c)
349        else:
350            return '\\x{:02x}'.format(c)
351
352
353if sys.version_info.major < 3:
354    def decode_mount_string(s):
355        return '"{}"'.format(''.join(escape_character(ord(c)) for c in s))
356else:
357    def decode_mount_string(s):
358        return '"{}"'.format(''.join(escape_character(c) for c in s))
359
360
361def print_event(mounts, umounts, parent, cpu, data, size):
362    event = ctypes.cast(data, ctypes.POINTER(Event)).contents
363
364    try:
365        if event.type == EventType.EVENT_MOUNT:
366            mounts[event.pid] = {
367                'pid': event.pid,
368                'tgid': event.tgid,
369                'mnt_ns': event.union.enter.mnt_ns,
370                'comm': event.union.enter.comm,
371                'flags': event.union.enter.flags,
372                'ppid': event.union.enter.ppid,
373                'pcomm': event.union.enter.pcomm,
374            }
375        elif event.type == EventType.EVENT_MOUNT_SOURCE:
376            mounts[event.pid]['source'] = event.union.str
377        elif event.type == EventType.EVENT_MOUNT_TARGET:
378            mounts[event.pid]['target'] = event.union.str
379        elif event.type == EventType.EVENT_MOUNT_TYPE:
380            mounts[event.pid]['type'] = event.union.str
381        elif event.type == EventType.EVENT_MOUNT_DATA:
382            # XXX: data is not always a NUL-terminated string
383            mounts[event.pid]['data'] = event.union.str
384        elif event.type == EventType.EVENT_UMOUNT:
385            umounts[event.pid] = {
386                'pid': event.pid,
387                'tgid': event.tgid,
388                'mnt_ns': event.union.enter.mnt_ns,
389                'comm': event.union.enter.comm,
390                'flags': event.union.enter.flags,
391                'ppid': event.union.enter.ppid,
392                'pcomm': event.union.enter.pcomm,
393            }
394        elif event.type == EventType.EVENT_UMOUNT_TARGET:
395            umounts[event.pid]['target'] = event.union.str
396        elif (event.type == EventType.EVENT_MOUNT_RET or
397              event.type == EventType.EVENT_UMOUNT_RET):
398            if event.type == EventType.EVENT_MOUNT_RET:
399                syscall = mounts.pop(event.pid)
400                call = ('mount({source}, {target}, {type}, {flags}, {data}) ' +
401                        '= {retval}').format(
402                    source=decode_mount_string(syscall['source']),
403                    target=decode_mount_string(syscall['target']),
404                    type=decode_mount_string(syscall['type']),
405                    flags=decode_mount_flags(syscall['flags']),
406                    data=decode_mount_string(syscall['data']),
407                    retval=decode_errno(event.union.retval))
408            else:
409                syscall = umounts.pop(event.pid)
410                call = 'umount({target}, {flags}) = {retval}'.format(
411                    target=decode_mount_string(syscall['target']),
412                    flags=decode_umount_flags(syscall['flags']),
413                    retval=decode_errno(event.union.retval))
414            if parent:
415                print('{:16} {:<7} {:<7} {:16} {:<7} {:<11} {}'.format(
416                    syscall['comm'].decode('utf-8', 'replace'), syscall['tgid'],
417                    syscall['pid'], syscall['pcomm'].decode('utf-8', 'replace'),
418                    syscall['ppid'], syscall['mnt_ns'], call))
419            else:
420                print('{:16} {:<7} {:<7} {:<11} {}'.format(
421                    syscall['comm'].decode('utf-8', 'replace'), syscall['tgid'],
422                    syscall['pid'], syscall['mnt_ns'], call))
423        sys.stdout.flush()
424    except KeyError:
425        # This might happen if we lost an event.
426        pass
427
428
429def main():
430    parser = argparse.ArgumentParser(
431        description='trace mount() and umount() syscalls'
432    )
433    parser.add_argument("--ebpf", action="store_true",
434        help=argparse.SUPPRESS)
435    parser.add_argument("-P", "--parent_process", action="store_true",
436        help="also snoop the parent process")
437    parser.add_argument("--cgroupmap",
438        help="trace cgroups in this BPF map only")
439    parser.add_argument("--mntnsmap",
440        help="trace mount namespaces in this BPF map only")
441    args = parser.parse_args()
442
443    mounts = {}
444    umounts = {}
445    global bpf_text
446    bpf_text = filter_by_containers(args) + bpf_text
447    if args.ebpf:
448        print(bpf_text)
449        exit()
450    b = bcc.BPF(text=bpf_text)
451    mount_fnname = b.get_syscall_fnname("mount")
452    b.attach_kprobe(event=mount_fnname, fn_name="syscall__mount")
453    b.attach_kretprobe(event=mount_fnname, fn_name="do_ret_sys_mount")
454    umount_fnname = b.get_syscall_fnname("umount")
455    b.attach_kprobe(event=umount_fnname, fn_name="syscall__umount")
456    b.attach_kretprobe(event=umount_fnname, fn_name="do_ret_sys_umount")
457    b['events'].open_perf_buffer(
458        functools.partial(print_event, mounts, umounts, args.parent_process))
459
460    if args.parent_process:
461        print('{:16} {:<7} {:<7} {:16} {:<7} {:<11} {}'.format(
462              'COMM', 'PID', 'TID', 'PCOMM', 'PPID', 'MNT_NS', 'CALL'))
463    else:
464        print('{:16} {:<7} {:<7} {:<11} {}'.format(
465            'COMM', 'PID', 'TID', 'MNT_NS', 'CALL'))
466
467    while True:
468        try:
469            b.perf_buffer_poll()
470        except KeyboardInterrupt:
471            exit()
472
473
474
475if __name__ == '__main__':
476    main()
477