1#!/usr/bin/env python 2# 3# tcpv4tracer Trace TCP connections. 4# For Linux, uses BCC, eBPF. Embedded C. 5# 6# USAGE: tcpv4tracer [-h] [-v] [-p PID] [-N NETNS] [-4 | -6] 7# 8# You should generally try to avoid writing long scripts that measure multiple 9# functions and walk multiple kernel structures, as they will be a burden to 10# maintain as the kernel changes. 11# The following code should be replaced, and simplified, when static TCP probes 12# exist. 13# 14# Copyright 2017-2020 Kinvolk GmbH 15# 16# Licensed under the Apache License, Version 2.0 (the "License") 17from __future__ import print_function 18from bcc import BPF 19from bcc.containers import filter_by_containers 20 21import argparse as ap 22from socket import inet_ntop, AF_INET, AF_INET6 23from struct import pack 24 25parser = ap.ArgumentParser(description="Trace TCP connections", 26 formatter_class=ap.RawDescriptionHelpFormatter) 27parser.add_argument("-t", "--timestamp", action="store_true", 28 help="include timestamp on output") 29parser.add_argument("-p", "--pid", default=0, type=int, 30 help="trace this PID only") 31parser.add_argument("-N", "--netns", default=0, type=int, 32 help="trace this Network Namespace only") 33parser.add_argument("--cgroupmap", 34 help="trace cgroups in this BPF map only") 35parser.add_argument("--mntnsmap", 36 help="trace mount namespaces in this BPF map only") 37group = parser.add_mutually_exclusive_group() 38group.add_argument("-4", "--ipv4", action="store_true", 39 help="trace IPv4 family only") 40group.add_argument("-6", "--ipv6", action="store_true", 41 help="trace IPv6 family only") 42parser.add_argument("-v", "--verbose", action="store_true", 43 help="include Network Namespace in the output") 44parser.add_argument("--ebpf", action="store_true", 45 help=ap.SUPPRESS) 46args = parser.parse_args() 47 48bpf_text = """ 49#include <uapi/linux/ptrace.h> 50#pragma clang diagnostic push 51#pragma clang diagnostic ignored "-Wtautological-compare" 52#include <net/sock.h> 53#pragma clang diagnostic pop 54#include <net/inet_sock.h> 55#include <net/net_namespace.h> 56#include <bcc/proto.h> 57 58#define TCP_EVENT_TYPE_CONNECT 1 59#define TCP_EVENT_TYPE_ACCEPT 2 60#define TCP_EVENT_TYPE_CLOSE 3 61 62struct tcp_ipv4_event_t { 63 u64 ts_ns; 64 u32 type; 65 u32 pid; 66 char comm[TASK_COMM_LEN]; 67 u8 ip; 68 u32 saddr; 69 u32 daddr; 70 u16 sport; 71 u16 dport; 72 u32 netns; 73}; 74BPF_PERF_OUTPUT(tcp_ipv4_event); 75 76struct tcp_ipv6_event_t { 77 u64 ts_ns; 78 u32 type; 79 u32 pid; 80 char comm[TASK_COMM_LEN]; 81 unsigned __int128 saddr; 82 unsigned __int128 daddr; 83 u16 sport; 84 u16 dport; 85 u32 netns; 86 u8 ip; 87}; 88BPF_PERF_OUTPUT(tcp_ipv6_event); 89 90// tcp_set_state doesn't run in the context of the process that initiated the 91// connection so we need to store a map TUPLE -> PID to send the right PID on 92// the event 93struct ipv4_tuple_t { 94 u32 saddr; 95 u32 daddr; 96 u16 sport; 97 u16 dport; 98 u32 netns; 99}; 100 101struct ipv6_tuple_t { 102 unsigned __int128 saddr; 103 unsigned __int128 daddr; 104 u16 sport; 105 u16 dport; 106 u32 netns; 107}; 108 109struct pid_comm_t { 110 u64 pid; 111 char comm[TASK_COMM_LEN]; 112}; 113 114BPF_HASH(tuplepid_ipv4, struct ipv4_tuple_t, struct pid_comm_t); 115BPF_HASH(tuplepid_ipv6, struct ipv6_tuple_t, struct pid_comm_t); 116 117BPF_HASH(connectsock, u64, struct sock *); 118 119static int read_ipv4_tuple(struct ipv4_tuple_t *tuple, struct sock *skp) 120{ 121 u32 net_ns_inum = 0; 122 u32 saddr = skp->__sk_common.skc_rcv_saddr; 123 u32 daddr = skp->__sk_common.skc_daddr; 124 struct inet_sock *sockp = (struct inet_sock *)skp; 125 u16 sport = sockp->inet_sport; 126 u16 dport = skp->__sk_common.skc_dport; 127#ifdef CONFIG_NET_NS 128 net_ns_inum = skp->__sk_common.skc_net.net->ns.inum; 129#endif 130 131 ##FILTER_NETNS## 132 133 tuple->saddr = saddr; 134 tuple->daddr = daddr; 135 tuple->sport = sport; 136 tuple->dport = dport; 137 tuple->netns = net_ns_inum; 138 139 // if addresses or ports are 0, ignore 140 if (saddr == 0 || daddr == 0 || sport == 0 || dport == 0) { 141 return 0; 142 } 143 144 return 1; 145} 146 147static int read_ipv6_tuple(struct ipv6_tuple_t *tuple, struct sock *skp) 148{ 149 u32 net_ns_inum = 0; 150 unsigned __int128 saddr = 0, daddr = 0; 151 struct inet_sock *sockp = (struct inet_sock *)skp; 152 u16 sport = sockp->inet_sport; 153 u16 dport = skp->__sk_common.skc_dport; 154#ifdef CONFIG_NET_NS 155 net_ns_inum = skp->__sk_common.skc_net.net->ns.inum; 156#endif 157 bpf_probe_read_kernel(&saddr, sizeof(saddr), 158 skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); 159 bpf_probe_read_kernel(&daddr, sizeof(daddr), 160 skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32); 161 162 ##FILTER_NETNS## 163 164 tuple->saddr = saddr; 165 tuple->daddr = daddr; 166 tuple->sport = sport; 167 tuple->dport = dport; 168 tuple->netns = net_ns_inum; 169 170 // if addresses or ports are 0, ignore 171 if (saddr == 0 || daddr == 0 || sport == 0 || dport == 0) { 172 return 0; 173 } 174 175 return 1; 176} 177 178static bool check_family(struct sock *sk, u16 expected_family) { 179 u64 zero = 0; 180 u16 family = sk->__sk_common.skc_family; 181 return family == expected_family; 182} 183 184int trace_connect_v4_entry(struct pt_regs *ctx, struct sock *sk) 185{ 186 if (container_should_be_filtered()) { 187 return 0; 188 } 189 190 u64 pid = bpf_get_current_pid_tgid(); 191 192 ##FILTER_PID## 193 194 u16 family = sk->__sk_common.skc_family; 195 ##FILTER_FAMILY## 196 197 198 // stash the sock ptr for lookup on return 199 connectsock.update(&pid, &sk); 200 201 return 0; 202} 203 204int trace_connect_v4_return(struct pt_regs *ctx) 205{ 206 int ret = PT_REGS_RC(ctx); 207 u64 pid = bpf_get_current_pid_tgid(); 208 209 struct sock **skpp; 210 skpp = connectsock.lookup(&pid); 211 if (skpp == 0) { 212 return 0; // missed entry 213 } 214 215 connectsock.delete(&pid); 216 217 if (ret != 0) { 218 // failed to send SYNC packet, may not have populated 219 // socket __sk_common.{skc_rcv_saddr, ...} 220 return 0; 221 } 222 223 // pull in details 224 struct sock *skp = *skpp; 225 struct ipv4_tuple_t t = { }; 226 if (!read_ipv4_tuple(&t, skp)) { 227 return 0; 228 } 229 230 struct pid_comm_t p = { }; 231 p.pid = pid; 232 bpf_get_current_comm(&p.comm, sizeof(p.comm)); 233 234 tuplepid_ipv4.update(&t, &p); 235 236 return 0; 237} 238 239int trace_connect_v6_entry(struct pt_regs *ctx, struct sock *sk) 240{ 241 if (container_should_be_filtered()) { 242 return 0; 243 } 244 u64 pid = bpf_get_current_pid_tgid(); 245 246 ##FILTER_PID## 247 u16 family = sk->__sk_common.skc_family; 248 ##FILTER_FAMILY## 249 250 // stash the sock ptr for lookup on return 251 connectsock.update(&pid, &sk); 252 253 return 0; 254} 255 256int trace_connect_v6_return(struct pt_regs *ctx) 257{ 258 int ret = PT_REGS_RC(ctx); 259 u64 pid = bpf_get_current_pid_tgid(); 260 261 struct sock **skpp; 262 skpp = connectsock.lookup(&pid); 263 if (skpp == 0) { 264 return 0; // missed entry 265 } 266 267 connectsock.delete(&pid); 268 269 if (ret != 0) { 270 // failed to send SYNC packet, may not have populated 271 // socket __sk_common.{skc_rcv_saddr, ...} 272 return 0; 273 } 274 275 // pull in details 276 struct sock *skp = *skpp; 277 struct ipv6_tuple_t t = { }; 278 if (!read_ipv6_tuple(&t, skp)) { 279 return 0; 280 } 281 282 struct pid_comm_t p = { }; 283 p.pid = pid; 284 bpf_get_current_comm(&p.comm, sizeof(p.comm)); 285 286 tuplepid_ipv6.update(&t, &p); 287 288 return 0; 289} 290 291int trace_tcp_set_state_entry(struct pt_regs *ctx, struct sock *skp, int state) 292{ 293 if (state != TCP_ESTABLISHED && state != TCP_CLOSE) { 294 return 0; 295 } 296 297 u16 family = skp->__sk_common.skc_family; 298 ##FILTER_FAMILY## 299 300 u8 ipver = 0; 301 if (check_family(skp, AF_INET)) { 302 ipver = 4; 303 struct ipv4_tuple_t t = { }; 304 if (!read_ipv4_tuple(&t, skp)) { 305 return 0; 306 } 307 308 if (state == TCP_CLOSE) { 309 tuplepid_ipv4.delete(&t); 310 return 0; 311 } 312 313 struct pid_comm_t *p; 314 p = tuplepid_ipv4.lookup(&t); 315 if (p == 0) { 316 return 0; // missed entry 317 } 318 319 struct tcp_ipv4_event_t evt4 = { }; 320 evt4.ts_ns = bpf_ktime_get_ns(); 321 evt4.type = TCP_EVENT_TYPE_CONNECT; 322 evt4.pid = p->pid >> 32; 323 evt4.ip = ipver; 324 evt4.saddr = t.saddr; 325 evt4.daddr = t.daddr; 326 evt4.sport = ntohs(t.sport); 327 evt4.dport = ntohs(t.dport); 328 evt4.netns = t.netns; 329 330 int i; 331 for (i = 0; i < TASK_COMM_LEN; i++) { 332 evt4.comm[i] = p->comm[i]; 333 } 334 335 tcp_ipv4_event.perf_submit(ctx, &evt4, sizeof(evt4)); 336 tuplepid_ipv4.delete(&t); 337 } else if (check_family(skp, AF_INET6)) { 338 ipver = 6; 339 struct ipv6_tuple_t t = { }; 340 if (!read_ipv6_tuple(&t, skp)) { 341 return 0; 342 } 343 344 if (state == TCP_CLOSE) { 345 tuplepid_ipv6.delete(&t); 346 return 0; 347 } 348 349 struct pid_comm_t *p; 350 p = tuplepid_ipv6.lookup(&t); 351 if (p == 0) { 352 return 0; // missed entry 353 } 354 355 struct tcp_ipv6_event_t evt6 = { }; 356 evt6.ts_ns = bpf_ktime_get_ns(); 357 evt6.type = TCP_EVENT_TYPE_CONNECT; 358 evt6.pid = p->pid >> 32; 359 evt6.ip = ipver; 360 evt6.saddr = t.saddr; 361 evt6.daddr = t.daddr; 362 evt6.sport = ntohs(t.sport); 363 evt6.dport = ntohs(t.dport); 364 evt6.netns = t.netns; 365 366 int i; 367 for (i = 0; i < TASK_COMM_LEN; i++) { 368 evt6.comm[i] = p->comm[i]; 369 } 370 371 tcp_ipv6_event.perf_submit(ctx, &evt6, sizeof(evt6)); 372 tuplepid_ipv6.delete(&t); 373 } 374 // else drop 375 376 return 0; 377} 378 379int trace_close_entry(struct pt_regs *ctx, struct sock *skp) 380{ 381 if (container_should_be_filtered()) { 382 return 0; 383 } 384 385 u64 pid = bpf_get_current_pid_tgid(); 386 387 ##FILTER_PID## 388 389 u16 family = skp->__sk_common.skc_family; 390 ##FILTER_FAMILY## 391 392 u8 oldstate = skp->sk_state; 393 // Don't generate close events for connections that were never 394 // established in the first place. 395 if (oldstate == TCP_SYN_SENT || 396 oldstate == TCP_SYN_RECV || 397 oldstate == TCP_NEW_SYN_RECV) 398 return 0; 399 400 u8 ipver = 0; 401 if (check_family(skp, AF_INET)) { 402 ipver = 4; 403 struct ipv4_tuple_t t = { }; 404 if (!read_ipv4_tuple(&t, skp)) { 405 return 0; 406 } 407 408 struct tcp_ipv4_event_t evt4 = { }; 409 evt4.ts_ns = bpf_ktime_get_ns(); 410 evt4.type = TCP_EVENT_TYPE_CLOSE; 411 evt4.pid = pid >> 32; 412 evt4.ip = ipver; 413 evt4.saddr = t.saddr; 414 evt4.daddr = t.daddr; 415 evt4.sport = ntohs(t.sport); 416 evt4.dport = ntohs(t.dport); 417 evt4.netns = t.netns; 418 bpf_get_current_comm(&evt4.comm, sizeof(evt4.comm)); 419 420 tcp_ipv4_event.perf_submit(ctx, &evt4, sizeof(evt4)); 421 } else if (check_family(skp, AF_INET6)) { 422 ipver = 6; 423 struct ipv6_tuple_t t = { }; 424 if (!read_ipv6_tuple(&t, skp)) { 425 return 0; 426 } 427 428 struct tcp_ipv6_event_t evt6 = { }; 429 evt6.ts_ns = bpf_ktime_get_ns(); 430 evt6.type = TCP_EVENT_TYPE_CLOSE; 431 evt6.pid = pid >> 32; 432 evt6.ip = ipver; 433 evt6.saddr = t.saddr; 434 evt6.daddr = t.daddr; 435 evt6.sport = ntohs(t.sport); 436 evt6.dport = ntohs(t.dport); 437 evt6.netns = t.netns; 438 bpf_get_current_comm(&evt6.comm, sizeof(evt6.comm)); 439 440 tcp_ipv6_event.perf_submit(ctx, &evt6, sizeof(evt6)); 441 } 442 // else drop 443 444 return 0; 445}; 446 447int trace_accept_return(struct pt_regs *ctx) 448{ 449 if (container_should_be_filtered()) { 450 return 0; 451 } 452 453 struct sock *newsk = (struct sock *)PT_REGS_RC(ctx); 454 u64 pid = bpf_get_current_pid_tgid(); 455 456 ##FILTER_PID## 457 458 if (newsk == NULL) { 459 return 0; 460 } 461 462 // pull in details 463 u16 lport = 0, dport = 0; 464 u32 net_ns_inum = 0; 465 u8 ipver = 0; 466 467 dport = newsk->__sk_common.skc_dport; 468 lport = newsk->__sk_common.skc_num; 469 470 // Get network namespace id, if kernel supports it 471#ifdef CONFIG_NET_NS 472 net_ns_inum = newsk->__sk_common.skc_net.net->ns.inum; 473#endif 474 475 ##FILTER_NETNS## 476 477 u16 family = newsk->__sk_common.skc_family; 478 ##FILTER_FAMILY## 479 480 if (check_family(newsk, AF_INET)) { 481 ipver = 4; 482 483 struct tcp_ipv4_event_t evt4 = { 0 }; 484 485 evt4.ts_ns = bpf_ktime_get_ns(); 486 evt4.type = TCP_EVENT_TYPE_ACCEPT; 487 evt4.netns = net_ns_inum; 488 evt4.pid = pid >> 32; 489 evt4.ip = ipver; 490 491 evt4.saddr = newsk->__sk_common.skc_rcv_saddr; 492 evt4.daddr = newsk->__sk_common.skc_daddr; 493 494 evt4.sport = lport; 495 evt4.dport = ntohs(dport); 496 bpf_get_current_comm(&evt4.comm, sizeof(evt4.comm)); 497 498 // do not send event if IP address is 0.0.0.0 or port is 0 499 if (evt4.saddr != 0 && evt4.daddr != 0 && 500 evt4.sport != 0 && evt4.dport != 0) { 501 tcp_ipv4_event.perf_submit(ctx, &evt4, sizeof(evt4)); 502 } 503 } else if (check_family(newsk, AF_INET6)) { 504 ipver = 6; 505 506 struct tcp_ipv6_event_t evt6 = { 0 }; 507 508 evt6.ts_ns = bpf_ktime_get_ns(); 509 evt6.type = TCP_EVENT_TYPE_ACCEPT; 510 evt6.netns = net_ns_inum; 511 evt6.pid = pid >> 32; 512 evt6.ip = ipver; 513 514 bpf_probe_read_kernel(&evt6.saddr, sizeof(evt6.saddr), 515 newsk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); 516 bpf_probe_read_kernel(&evt6.daddr, sizeof(evt6.daddr), 517 newsk->__sk_common.skc_v6_daddr.in6_u.u6_addr32); 518 519 evt6.sport = lport; 520 evt6.dport = ntohs(dport); 521 bpf_get_current_comm(&evt6.comm, sizeof(evt6.comm)); 522 523 // do not send event if IP address is 0.0.0.0 or port is 0 524 if (evt6.saddr != 0 && evt6.daddr != 0 && 525 evt6.sport != 0 && evt6.dport != 0) { 526 tcp_ipv6_event.perf_submit(ctx, &evt6, sizeof(evt6)); 527 } 528 } 529 // else drop 530 531 return 0; 532} 533""" 534 535verbose_types = {"C": "connect", "A": "accept", 536 "X": "close", "U": "unknown"} 537 538 539def print_ipv4_event(cpu, data, size): 540 event = b["tcp_ipv4_event"].event(data) 541 global start_ts 542 543 if args.timestamp: 544 if start_ts == 0: 545 start_ts = event.ts_ns 546 if args.verbose: 547 print("%-14d" % (event.ts_ns - start_ts), end="") 548 else: 549 print("%-9.3f" % ((event.ts_ns - start_ts) / 1000000000.0), end="") 550 if event.type == 1: 551 type_str = "C" 552 elif event.type == 2: 553 type_str = "A" 554 elif event.type == 3: 555 type_str = "X" 556 else: 557 type_str = "U" 558 559 if args.verbose: 560 print("%-12s " % (verbose_types[type_str]), end="") 561 else: 562 print("%-2s " % (type_str), end="") 563 564 print("%-6d %-16s %-2d %-16s %-16s %-6d %-6d" % 565 (event.pid, event.comm.decode('utf-8', 'replace'), 566 event.ip, 567 inet_ntop(AF_INET, pack("I", event.saddr)), 568 inet_ntop(AF_INET, pack("I", event.daddr)), 569 event.sport, 570 event.dport), end="") 571 if args.verbose and not args.netns: 572 print(" %-8d" % event.netns) 573 else: 574 print() 575 576 577def print_ipv6_event(cpu, data, size): 578 event = b["tcp_ipv6_event"].event(data) 579 global start_ts 580 if args.timestamp: 581 if start_ts == 0: 582 start_ts = event.ts_ns 583 if args.verbose: 584 print("%-14d" % (event.ts_ns - start_ts), end="") 585 else: 586 print("%-9.3f" % ((event.ts_ns - start_ts) / 1000000000.0), end="") 587 if event.type == 1: 588 type_str = "C" 589 elif event.type == 2: 590 type_str = "A" 591 elif event.type == 3: 592 type_str = "X" 593 else: 594 type_str = "U" 595 596 if args.verbose: 597 print("%-12s " % (verbose_types[type_str]), end="") 598 else: 599 print("%-2s " % (type_str), end="") 600 601 print("%-6d %-16s %-2d %-16s %-16s %-6d %-6d" % 602 (event.pid, event.comm.decode('utf-8', 'replace'), 603 event.ip, 604 "[" + inet_ntop(AF_INET6, event.saddr) + "]", 605 "[" + inet_ntop(AF_INET6, event.daddr) + "]", 606 event.sport, 607 event.dport), end="") 608 if args.verbose and not args.netns: 609 print(" %-8d" % event.netns) 610 else: 611 print() 612 613 614pid_filter = "" 615netns_filter = "" 616 617if args.pid: 618 pid_filter = 'if (pid >> 32 != %d) { return 0; }' % args.pid 619if args.netns: 620 netns_filter = 'if (net_ns_inum != %d) { return 0; }' % args.netns 621if args.ipv4: 622 bpf_text = bpf_text.replace('##FILTER_FAMILY##', 623 'if (family != AF_INET) { return 0; }') 624elif args.ipv6: 625 bpf_text = bpf_text.replace('##FILTER_FAMILY##', 626 'if (family != AF_INET6) { return 0; }') 627bpf_text = bpf_text.replace('##FILTER_FAMILY##', '') 628bpf_text = bpf_text.replace('##FILTER_PID##', pid_filter) 629bpf_text = bpf_text.replace('##FILTER_NETNS##', netns_filter) 630bpf_text = filter_by_containers(args) + bpf_text 631 632if args.ebpf: 633 print(bpf_text) 634 exit() 635 636# initialize BPF 637b = BPF(text=bpf_text) 638if args.ipv4: 639 b.attach_kprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_entry") 640 b.attach_kretprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_return") 641elif args.ipv6: 642 b.attach_kprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_entry") 643 b.attach_kretprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_return") 644else: 645 b.attach_kprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_entry") 646 b.attach_kretprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_return") 647 b.attach_kprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_entry") 648 b.attach_kretprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_return") 649b.attach_kprobe(event="tcp_set_state", fn_name="trace_tcp_set_state_entry") 650b.attach_kprobe(event="tcp_close", fn_name="trace_close_entry") 651b.attach_kretprobe(event="inet_csk_accept", fn_name="trace_accept_return") 652 653print("Tracing TCP established connections. Ctrl-C to end.") 654 655# header 656if args.verbose: 657 if args.timestamp: 658 print("%-14s" % ("TIME(ns)"), end="") 659 print("%-12s %-6s %-16s %-2s %-16s %-16s %-6s %-7s" % ("TYPE", 660 "PID", "COMM", "IP", "SADDR", "DADDR", "SPORT", "DPORT"), end="") 661 if not args.netns: 662 print("%-8s" % "NETNS", end="") 663 print() 664else: 665 if args.timestamp: 666 print("%-9s" % ("TIME(s)"), end="") 667 print("%-2s %-6s %-16s %-2s %-16s %-16s %-6s %-6s" % 668 ("T", "PID", "COMM", "IP", "SADDR", "DADDR", "SPORT", "DPORT")) 669 670start_ts = 0 671 672 673b["tcp_ipv4_event"].open_perf_buffer(print_ipv4_event) 674b["tcp_ipv6_event"].open_perf_buffer(print_ipv6_event) 675while True: 676 try: 677 b.perf_buffer_poll() 678 except KeyboardInterrupt: 679 exit() 680