1#!/usr/bin/env python 2# 3# mountsnoop Trace mount() and umount syscalls. 4# For Linux, uses BCC, eBPF. Embedded C. 5# 6# USAGE: mountsnoop [-h] 7# 8# Copyright (c) 2016 Facebook, Inc. 9# Licensed under the Apache License, Version 2.0 (the "License") 10# 11# 14-Oct-2016 Omar Sandoval Created this. 12 13from __future__ import print_function 14import argparse 15import bcc 16from bcc.containers import filter_by_containers 17import ctypes 18import errno 19import functools 20import sys 21 22 23bpf_text = r""" 24#include <uapi/linux/ptrace.h> 25#include <linux/sched.h> 26 27#include <linux/nsproxy.h> 28#include <linux/ns_common.h> 29 30/* 31 * XXX: struct mnt_namespace is defined in fs/mount.h, which is private to the 32 * VFS and not installed in any kernel-devel packages. So, let's duplicate the 33 * important part of the definition. There are actually more members in the 34 * real struct, but we don't need them, and they're more likely to change. 35 * 36 * To add support for --selector option, we need to call filter_by_containers(). 37 * But this function adds code which defines struct mnt_namespace. 38 * To avoid having this structure twice, we define MNT_NAMESPACE_DEFINED in 39 * filter_by_containers(), then here we check if macro is already defined before 40 * adding struct definition. 41 */ 42#ifndef MNT_NAMESPACE_DEFINED 43struct mnt_namespace { 44 // This field was removed in https://github.com/torvalds/linux/commit/1a7b8969e664d6af328f00fe6eb7aabd61a71d13 45 #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 11, 0) 46 atomic_t count; 47 #endif 48 struct ns_common ns; 49}; 50#endif /* !MNT_NAMESPACE_DEFINED */ 51 52/* 53 * XXX: this could really use first-class string support in BPF. target is a 54 * NUL-terminated path up to PATH_MAX in length. source and type are 55 * NUL-terminated strings up to PAGE_SIZE in length. data is a weird case: it's 56 * almost always a NUL-terminated string, but for some filesystems (e.g., older 57 * NFS variants), it's a binary structure with plenty of NUL bytes, so the 58 * kernel always copies up to PAGE_SIZE bytes, stopping when it hits a fault. 59 * 60 * The best we can do with the existing BPF helpers is to copy as much of each 61 * argument as we can. Our stack space is limited, and we need to leave some 62 * headroom for the rest of the function, so this should be a decent value. 63 */ 64#define MAX_STR_LEN 412 65 66enum event_type { 67 EVENT_MOUNT, 68 EVENT_MOUNT_SOURCE, 69 EVENT_MOUNT_TARGET, 70 EVENT_MOUNT_TYPE, 71 EVENT_MOUNT_DATA, 72 EVENT_MOUNT_RET, 73 EVENT_UMOUNT, 74 EVENT_UMOUNT_TARGET, 75 EVENT_UMOUNT_RET, 76}; 77 78struct data_t { 79 enum event_type type; 80 pid_t pid, tgid; 81 union { 82 /* EVENT_MOUNT, EVENT_UMOUNT */ 83 struct { 84 /* current->nsproxy->mnt_ns->ns.inum */ 85 unsigned int mnt_ns; 86 char comm[TASK_COMM_LEN]; 87 char pcomm[TASK_COMM_LEN]; 88 pid_t ppid; 89 unsigned long flags; 90 } enter; 91 /* 92 * EVENT_MOUNT_SOURCE, EVENT_MOUNT_TARGET, EVENT_MOUNT_TYPE, 93 * EVENT_MOUNT_DATA, EVENT_UMOUNT_TARGET 94 */ 95 char str[MAX_STR_LEN]; 96 /* EVENT_MOUNT_RET, EVENT_UMOUNT_RET */ 97 int retval; 98 }; 99}; 100 101BPF_PERF_OUTPUT(events); 102 103int syscall__mount(struct pt_regs *ctx, char __user *source, 104 char __user *target, char __user *type, 105 unsigned long flags, char __user *data) 106{ 107 struct data_t event = {}; 108 struct task_struct *task; 109 struct nsproxy *nsproxy; 110 struct mnt_namespace *mnt_ns; 111 112 if (container_should_be_filtered()) { 113 return 0; 114 } 115 116 event.pid = bpf_get_current_pid_tgid() & 0xffffffff; 117 event.tgid = bpf_get_current_pid_tgid() >> 32; 118 119 event.type = EVENT_MOUNT; 120 bpf_get_current_comm(event.enter.comm, sizeof(event.enter.comm)); 121 event.enter.flags = flags; 122 task = (struct task_struct *)bpf_get_current_task(); 123 event.enter.ppid = task->real_parent->tgid; 124 bpf_probe_read_kernel_str(&event.enter.pcomm, TASK_COMM_LEN, task->real_parent->comm); 125 nsproxy = task->nsproxy; 126 mnt_ns = nsproxy->mnt_ns; 127 event.enter.mnt_ns = mnt_ns->ns.inum; 128 events.perf_submit(ctx, &event, sizeof(event)); 129 130 event.type = EVENT_MOUNT_SOURCE; 131 __builtin_memset(event.str, 0, sizeof(event.str)); 132 bpf_probe_read_user(event.str, sizeof(event.str), source); 133 events.perf_submit(ctx, &event, sizeof(event)); 134 135 event.type = EVENT_MOUNT_TARGET; 136 __builtin_memset(event.str, 0, sizeof(event.str)); 137 bpf_probe_read_user(event.str, sizeof(event.str), target); 138 events.perf_submit(ctx, &event, sizeof(event)); 139 140 event.type = EVENT_MOUNT_TYPE; 141 __builtin_memset(event.str, 0, sizeof(event.str)); 142 bpf_probe_read_user(event.str, sizeof(event.str), type); 143 events.perf_submit(ctx, &event, sizeof(event)); 144 145 event.type = EVENT_MOUNT_DATA; 146 __builtin_memset(event.str, 0, sizeof(event.str)); 147 bpf_probe_read_user(event.str, sizeof(event.str), data); 148 events.perf_submit(ctx, &event, sizeof(event)); 149 150 return 0; 151} 152 153int do_ret_sys_mount(struct pt_regs *ctx) 154{ 155 struct data_t event = {}; 156 157 event.type = EVENT_MOUNT_RET; 158 event.pid = bpf_get_current_pid_tgid() & 0xffffffff; 159 event.tgid = bpf_get_current_pid_tgid() >> 32; 160 event.retval = PT_REGS_RC(ctx); 161 events.perf_submit(ctx, &event, sizeof(event)); 162 163 return 0; 164} 165 166int syscall__umount(struct pt_regs *ctx, char __user *target, int flags) 167{ 168 struct data_t event = {}; 169 struct task_struct *task; 170 struct nsproxy *nsproxy; 171 struct mnt_namespace *mnt_ns; 172 173 if (container_should_be_filtered()) { 174 return 0; 175 } 176 177 event.pid = bpf_get_current_pid_tgid() & 0xffffffff; 178 event.tgid = bpf_get_current_pid_tgid() >> 32; 179 180 event.type = EVENT_UMOUNT; 181 bpf_get_current_comm(event.enter.comm, sizeof(event.enter.comm)); 182 event.enter.flags = flags; 183 task = (struct task_struct *)bpf_get_current_task(); 184 event.enter.ppid = task->real_parent->tgid; 185 bpf_probe_read_kernel_str(&event.enter.pcomm, TASK_COMM_LEN, task->real_parent->comm); 186 nsproxy = task->nsproxy; 187 mnt_ns = nsproxy->mnt_ns; 188 event.enter.mnt_ns = mnt_ns->ns.inum; 189 events.perf_submit(ctx, &event, sizeof(event)); 190 191 event.type = EVENT_UMOUNT_TARGET; 192 __builtin_memset(event.str, 0, sizeof(event.str)); 193 bpf_probe_read_user(event.str, sizeof(event.str), target); 194 events.perf_submit(ctx, &event, sizeof(event)); 195 196 return 0; 197} 198 199int do_ret_sys_umount(struct pt_regs *ctx) 200{ 201 struct data_t event = {}; 202 203 event.type = EVENT_UMOUNT_RET; 204 event.pid = bpf_get_current_pid_tgid() & 0xffffffff; 205 event.tgid = bpf_get_current_pid_tgid() >> 32; 206 event.retval = PT_REGS_RC(ctx); 207 events.perf_submit(ctx, &event, sizeof(event)); 208 209 return 0; 210} 211""" 212 213# sys/mount.h 214MS_MGC_VAL = 0xc0ed0000 215MS_MGC_MSK = 0xffff0000 216MOUNT_FLAGS = [ 217 ('MS_RDONLY', 1), 218 ('MS_NOSUID', 2), 219 ('MS_NODEV', 4), 220 ('MS_NOEXEC', 8), 221 ('MS_SYNCHRONOUS', 16), 222 ('MS_REMOUNT', 32), 223 ('MS_MANDLOCK', 64), 224 ('MS_DIRSYNC', 128), 225 ('MS_NOATIME', 1024), 226 ('MS_NODIRATIME', 2048), 227 ('MS_BIND', 4096), 228 ('MS_MOVE', 8192), 229 ('MS_REC', 16384), 230 ('MS_SILENT', 32768), 231 ('MS_POSIXACL', 1 << 16), 232 ('MS_UNBINDABLE', 1 << 17), 233 ('MS_PRIVATE', 1 << 18), 234 ('MS_SLAVE', 1 << 19), 235 ('MS_SHARED', 1 << 20), 236 ('MS_RELATIME', 1 << 21), 237 ('MS_KERNMOUNT', 1 << 22), 238 ('MS_I_VERSION', 1 << 23), 239 ('MS_STRICTATIME', 1 << 24), 240 ('MS_LAZYTIME', 1 << 25), 241 ('MS_ACTIVE', 1 << 30), 242 ('MS_NOUSER', 1 << 31), 243] 244UMOUNT_FLAGS = [ 245 ('MNT_FORCE', 1), 246 ('MNT_DETACH', 2), 247 ('MNT_EXPIRE', 4), 248 ('UMOUNT_NOFOLLOW', 8), 249] 250 251 252TASK_COMM_LEN = 16 # linux/sched.h 253MAX_STR_LEN = 412 254 255 256class EventType(object): 257 EVENT_MOUNT = 0 258 EVENT_MOUNT_SOURCE = 1 259 EVENT_MOUNT_TARGET = 2 260 EVENT_MOUNT_TYPE = 3 261 EVENT_MOUNT_DATA = 4 262 EVENT_MOUNT_RET = 5 263 EVENT_UMOUNT = 6 264 EVENT_UMOUNT_TARGET = 7 265 EVENT_UMOUNT_RET = 8 266 267 268class EnterData(ctypes.Structure): 269 _fields_ = [ 270 ('mnt_ns', ctypes.c_uint), 271 ('comm', ctypes.c_char * TASK_COMM_LEN), 272 ('pcomm', ctypes.c_char * TASK_COMM_LEN), 273 ('ppid', ctypes.c_uint), 274 ('flags', ctypes.c_ulong), 275 ] 276 277 278class DataUnion(ctypes.Union): 279 _fields_ = [ 280 ('enter', EnterData), 281 ('str', ctypes.c_char * MAX_STR_LEN), 282 ('retval', ctypes.c_int), 283 ] 284 285 286class Event(ctypes.Structure): 287 _fields_ = [ 288 ('type', ctypes.c_uint), 289 ('pid', ctypes.c_uint), 290 ('tgid', ctypes.c_uint), 291 ('union', DataUnion), 292 ] 293 294 295def _decode_flags(flags, flag_list): 296 str_flags = [] 297 for flag, bit in flag_list: 298 if flags & bit: 299 str_flags.append(flag) 300 flags &= ~bit 301 if flags or not str_flags: 302 str_flags.append('0x{:x}'.format(flags)) 303 return str_flags 304 305 306def decode_flags(flags, flag_list): 307 return '|'.join(_decode_flags(flags, flag_list)) 308 309 310def decode_mount_flags(flags): 311 str_flags = [] 312 if flags & MS_MGC_MSK == MS_MGC_VAL: 313 flags &= ~MS_MGC_MSK 314 str_flags.append('MS_MGC_VAL') 315 str_flags.extend(_decode_flags(flags, MOUNT_FLAGS)) 316 return '|'.join(str_flags) 317 318 319def decode_umount_flags(flags): 320 return decode_flags(flags, UMOUNT_FLAGS) 321 322 323def decode_errno(retval): 324 try: 325 return '-' + errno.errorcode[-retval] 326 except KeyError: 327 return str(retval) 328 329 330_escape_chars = { 331 ord('\a'): '\\a', 332 ord('\b'): '\\b', 333 ord('\t'): '\\t', 334 ord('\n'): '\\n', 335 ord('\v'): '\\v', 336 ord('\f'): '\\f', 337 ord('\r'): '\\r', 338 ord('"'): '\\"', 339 ord('\\'): '\\\\', 340} 341 342 343def escape_character(c): 344 try: 345 return _escape_chars[c] 346 except KeyError: 347 if 0x20 <= c <= 0x7e: 348 return chr(c) 349 else: 350 return '\\x{:02x}'.format(c) 351 352 353if sys.version_info.major < 3: 354 def decode_mount_string(s): 355 return '"{}"'.format(''.join(escape_character(ord(c)) for c in s)) 356else: 357 def decode_mount_string(s): 358 return '"{}"'.format(''.join(escape_character(c) for c in s)) 359 360 361def print_event(mounts, umounts, parent, cpu, data, size): 362 event = ctypes.cast(data, ctypes.POINTER(Event)).contents 363 364 try: 365 if event.type == EventType.EVENT_MOUNT: 366 mounts[event.pid] = { 367 'pid': event.pid, 368 'tgid': event.tgid, 369 'mnt_ns': event.union.enter.mnt_ns, 370 'comm': event.union.enter.comm, 371 'flags': event.union.enter.flags, 372 'ppid': event.union.enter.ppid, 373 'pcomm': event.union.enter.pcomm, 374 } 375 elif event.type == EventType.EVENT_MOUNT_SOURCE: 376 mounts[event.pid]['source'] = event.union.str 377 elif event.type == EventType.EVENT_MOUNT_TARGET: 378 mounts[event.pid]['target'] = event.union.str 379 elif event.type == EventType.EVENT_MOUNT_TYPE: 380 mounts[event.pid]['type'] = event.union.str 381 elif event.type == EventType.EVENT_MOUNT_DATA: 382 # XXX: data is not always a NUL-terminated string 383 mounts[event.pid]['data'] = event.union.str 384 elif event.type == EventType.EVENT_UMOUNT: 385 umounts[event.pid] = { 386 'pid': event.pid, 387 'tgid': event.tgid, 388 'mnt_ns': event.union.enter.mnt_ns, 389 'comm': event.union.enter.comm, 390 'flags': event.union.enter.flags, 391 'ppid': event.union.enter.ppid, 392 'pcomm': event.union.enter.pcomm, 393 } 394 elif event.type == EventType.EVENT_UMOUNT_TARGET: 395 umounts[event.pid]['target'] = event.union.str 396 elif (event.type == EventType.EVENT_MOUNT_RET or 397 event.type == EventType.EVENT_UMOUNT_RET): 398 if event.type == EventType.EVENT_MOUNT_RET: 399 syscall = mounts.pop(event.pid) 400 call = ('mount({source}, {target}, {type}, {flags}, {data}) ' + 401 '= {retval}').format( 402 source=decode_mount_string(syscall['source']), 403 target=decode_mount_string(syscall['target']), 404 type=decode_mount_string(syscall['type']), 405 flags=decode_mount_flags(syscall['flags']), 406 data=decode_mount_string(syscall['data']), 407 retval=decode_errno(event.union.retval)) 408 else: 409 syscall = umounts.pop(event.pid) 410 call = 'umount({target}, {flags}) = {retval}'.format( 411 target=decode_mount_string(syscall['target']), 412 flags=decode_umount_flags(syscall['flags']), 413 retval=decode_errno(event.union.retval)) 414 if parent: 415 print('{:16} {:<7} {:<7} {:16} {:<7} {:<11} {}'.format( 416 syscall['comm'].decode('utf-8', 'replace'), syscall['tgid'], 417 syscall['pid'], syscall['pcomm'].decode('utf-8', 'replace'), 418 syscall['ppid'], syscall['mnt_ns'], call)) 419 else: 420 print('{:16} {:<7} {:<7} {:<11} {}'.format( 421 syscall['comm'].decode('utf-8', 'replace'), syscall['tgid'], 422 syscall['pid'], syscall['mnt_ns'], call)) 423 sys.stdout.flush() 424 except KeyError: 425 # This might happen if we lost an event. 426 pass 427 428 429def main(): 430 parser = argparse.ArgumentParser( 431 description='trace mount() and umount() syscalls' 432 ) 433 parser.add_argument("--ebpf", action="store_true", 434 help=argparse.SUPPRESS) 435 parser.add_argument("-P", "--parent_process", action="store_true", 436 help="also snoop the parent process") 437 parser.add_argument("--cgroupmap", 438 help="trace cgroups in this BPF map only") 439 parser.add_argument("--mntnsmap", 440 help="trace mount namespaces in this BPF map only") 441 args = parser.parse_args() 442 443 mounts = {} 444 umounts = {} 445 global bpf_text 446 bpf_text = filter_by_containers(args) + bpf_text 447 if args.ebpf: 448 print(bpf_text) 449 exit() 450 b = bcc.BPF(text=bpf_text) 451 mount_fnname = b.get_syscall_fnname("mount") 452 b.attach_kprobe(event=mount_fnname, fn_name="syscall__mount") 453 b.attach_kretprobe(event=mount_fnname, fn_name="do_ret_sys_mount") 454 umount_fnname = b.get_syscall_fnname("umount") 455 b.attach_kprobe(event=umount_fnname, fn_name="syscall__umount") 456 b.attach_kretprobe(event=umount_fnname, fn_name="do_ret_sys_umount") 457 b['events'].open_perf_buffer( 458 functools.partial(print_event, mounts, umounts, args.parent_process)) 459 460 if args.parent_process: 461 print('{:16} {:<7} {:<7} {:16} {:<7} {:<11} {}'.format( 462 'COMM', 'PID', 'TID', 'PCOMM', 'PPID', 'MNT_NS', 'CALL')) 463 else: 464 print('{:16} {:<7} {:<7} {:<11} {}'.format( 465 'COMM', 'PID', 'TID', 'MNT_NS', 'CALL')) 466 467 while True: 468 try: 469 b.perf_buffer_poll() 470 except KeyboardInterrupt: 471 exit() 472 473 474 475if __name__ == '__main__': 476 main() 477