xref: /aosp_15_r20/external/bcc/tools/tcptracer.py (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1#!/usr/bin/env python
2#
3# tcpv4tracer   Trace TCP connections.
4#               For Linux, uses BCC, eBPF. Embedded C.
5#
6# USAGE: tcpv4tracer [-h] [-v] [-p PID] [-N NETNS] [-4 | -6]
7#
8# You should generally try to avoid writing long scripts that measure multiple
9# functions and walk multiple kernel structures, as they will be a burden to
10# maintain as the kernel changes.
11# The following code should be replaced, and simplified, when static TCP probes
12# exist.
13#
14# Copyright 2017-2020 Kinvolk GmbH
15#
16# Licensed under the Apache License, Version 2.0 (the "License")
17from __future__ import print_function
18from bcc import BPF
19from bcc.containers import filter_by_containers
20
21import argparse as ap
22from socket import inet_ntop, AF_INET, AF_INET6
23from struct import pack
24
25parser = ap.ArgumentParser(description="Trace TCP connections",
26                           formatter_class=ap.RawDescriptionHelpFormatter)
27parser.add_argument("-t", "--timestamp", action="store_true",
28                    help="include timestamp on output")
29parser.add_argument("-p", "--pid", default=0, type=int,
30                    help="trace this PID only")
31parser.add_argument("-N", "--netns", default=0, type=int,
32                    help="trace this Network Namespace only")
33parser.add_argument("--cgroupmap",
34                    help="trace cgroups in this BPF map only")
35parser.add_argument("--mntnsmap",
36                    help="trace mount namespaces in this BPF map only")
37group = parser.add_mutually_exclusive_group()
38group.add_argument("-4", "--ipv4", action="store_true",
39                    help="trace IPv4 family only")
40group.add_argument("-6", "--ipv6", action="store_true",
41                   help="trace IPv6 family only")
42parser.add_argument("-v", "--verbose", action="store_true",
43                    help="include Network Namespace in the output")
44parser.add_argument("--ebpf", action="store_true",
45                    help=ap.SUPPRESS)
46args = parser.parse_args()
47
48bpf_text = """
49#include <uapi/linux/ptrace.h>
50#pragma clang diagnostic push
51#pragma clang diagnostic ignored "-Wtautological-compare"
52#include <net/sock.h>
53#pragma clang diagnostic pop
54#include <net/inet_sock.h>
55#include <net/net_namespace.h>
56#include <bcc/proto.h>
57
58#define TCP_EVENT_TYPE_CONNECT 1
59#define TCP_EVENT_TYPE_ACCEPT  2
60#define TCP_EVENT_TYPE_CLOSE   3
61
62struct tcp_ipv4_event_t {
63    u64 ts_ns;
64    u32 type;
65    u32 pid;
66    char comm[TASK_COMM_LEN];
67    u8 ip;
68    u32 saddr;
69    u32 daddr;
70    u16 sport;
71    u16 dport;
72    u32 netns;
73};
74BPF_PERF_OUTPUT(tcp_ipv4_event);
75
76struct tcp_ipv6_event_t {
77    u64 ts_ns;
78    u32 type;
79    u32 pid;
80    char comm[TASK_COMM_LEN];
81    unsigned __int128 saddr;
82    unsigned __int128 daddr;
83    u16 sport;
84    u16 dport;
85    u32 netns;
86    u8 ip;
87};
88BPF_PERF_OUTPUT(tcp_ipv6_event);
89
90// tcp_set_state doesn't run in the context of the process that initiated the
91// connection so we need to store a map TUPLE -> PID to send the right PID on
92// the event
93struct ipv4_tuple_t {
94    u32 saddr;
95    u32 daddr;
96    u16 sport;
97    u16 dport;
98    u32 netns;
99};
100
101struct ipv6_tuple_t {
102    unsigned __int128 saddr;
103    unsigned __int128 daddr;
104    u16 sport;
105    u16 dport;
106    u32 netns;
107};
108
109struct pid_comm_t {
110    u64 pid;
111    char comm[TASK_COMM_LEN];
112};
113
114BPF_HASH(tuplepid_ipv4, struct ipv4_tuple_t, struct pid_comm_t);
115BPF_HASH(tuplepid_ipv6, struct ipv6_tuple_t, struct pid_comm_t);
116
117BPF_HASH(connectsock, u64, struct sock *);
118
119static int read_ipv4_tuple(struct ipv4_tuple_t *tuple, struct sock *skp)
120{
121  u32 net_ns_inum = 0;
122  u32 saddr = skp->__sk_common.skc_rcv_saddr;
123  u32 daddr = skp->__sk_common.skc_daddr;
124  struct inet_sock *sockp = (struct inet_sock *)skp;
125  u16 sport = sockp->inet_sport;
126  u16 dport = skp->__sk_common.skc_dport;
127#ifdef CONFIG_NET_NS
128  net_ns_inum = skp->__sk_common.skc_net.net->ns.inum;
129#endif
130
131  ##FILTER_NETNS##
132
133  tuple->saddr = saddr;
134  tuple->daddr = daddr;
135  tuple->sport = sport;
136  tuple->dport = dport;
137  tuple->netns = net_ns_inum;
138
139  // if addresses or ports are 0, ignore
140  if (saddr == 0 || daddr == 0 || sport == 0 || dport == 0) {
141      return 0;
142  }
143
144  return 1;
145}
146
147static int read_ipv6_tuple(struct ipv6_tuple_t *tuple, struct sock *skp)
148{
149  u32 net_ns_inum = 0;
150  unsigned __int128 saddr = 0, daddr = 0;
151  struct inet_sock *sockp = (struct inet_sock *)skp;
152  u16 sport = sockp->inet_sport;
153  u16 dport = skp->__sk_common.skc_dport;
154#ifdef CONFIG_NET_NS
155  net_ns_inum = skp->__sk_common.skc_net.net->ns.inum;
156#endif
157  bpf_probe_read_kernel(&saddr, sizeof(saddr),
158                 skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
159  bpf_probe_read_kernel(&daddr, sizeof(daddr),
160                 skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
161
162  ##FILTER_NETNS##
163
164  tuple->saddr = saddr;
165  tuple->daddr = daddr;
166  tuple->sport = sport;
167  tuple->dport = dport;
168  tuple->netns = net_ns_inum;
169
170  // if addresses or ports are 0, ignore
171  if (saddr == 0 || daddr == 0 || sport == 0 || dport == 0) {
172      return 0;
173  }
174
175  return 1;
176}
177
178static bool check_family(struct sock *sk, u16 expected_family) {
179  u64 zero = 0;
180  u16 family = sk->__sk_common.skc_family;
181  return family == expected_family;
182}
183
184int trace_connect_v4_entry(struct pt_regs *ctx, struct sock *sk)
185{
186  if (container_should_be_filtered()) {
187    return 0;
188  }
189
190  u64 pid = bpf_get_current_pid_tgid();
191
192  ##FILTER_PID##
193
194  u16 family = sk->__sk_common.skc_family;
195  ##FILTER_FAMILY##
196
197
198  // stash the sock ptr for lookup on return
199  connectsock.update(&pid, &sk);
200
201  return 0;
202}
203
204int trace_connect_v4_return(struct pt_regs *ctx)
205{
206  int ret = PT_REGS_RC(ctx);
207  u64 pid = bpf_get_current_pid_tgid();
208
209  struct sock **skpp;
210  skpp = connectsock.lookup(&pid);
211  if (skpp == 0) {
212      return 0;       // missed entry
213  }
214
215  connectsock.delete(&pid);
216
217  if (ret != 0) {
218      // failed to send SYNC packet, may not have populated
219      // socket __sk_common.{skc_rcv_saddr, ...}
220      return 0;
221  }
222
223  // pull in details
224  struct sock *skp = *skpp;
225  struct ipv4_tuple_t t = { };
226  if (!read_ipv4_tuple(&t, skp)) {
227      return 0;
228  }
229
230  struct pid_comm_t p = { };
231  p.pid = pid;
232  bpf_get_current_comm(&p.comm, sizeof(p.comm));
233
234  tuplepid_ipv4.update(&t, &p);
235
236  return 0;
237}
238
239int trace_connect_v6_entry(struct pt_regs *ctx, struct sock *sk)
240{
241  if (container_should_be_filtered()) {
242    return 0;
243  }
244  u64 pid = bpf_get_current_pid_tgid();
245
246  ##FILTER_PID##
247  u16 family = sk->__sk_common.skc_family;
248  ##FILTER_FAMILY##
249
250  // stash the sock ptr for lookup on return
251  connectsock.update(&pid, &sk);
252
253  return 0;
254}
255
256int trace_connect_v6_return(struct pt_regs *ctx)
257{
258  int ret = PT_REGS_RC(ctx);
259  u64 pid = bpf_get_current_pid_tgid();
260
261  struct sock **skpp;
262  skpp = connectsock.lookup(&pid);
263  if (skpp == 0) {
264      return 0;       // missed entry
265  }
266
267  connectsock.delete(&pid);
268
269  if (ret != 0) {
270      // failed to send SYNC packet, may not have populated
271      // socket __sk_common.{skc_rcv_saddr, ...}
272      return 0;
273  }
274
275  // pull in details
276  struct sock *skp = *skpp;
277  struct ipv6_tuple_t t = { };
278  if (!read_ipv6_tuple(&t, skp)) {
279      return 0;
280  }
281
282  struct pid_comm_t p = { };
283  p.pid = pid;
284  bpf_get_current_comm(&p.comm, sizeof(p.comm));
285
286  tuplepid_ipv6.update(&t, &p);
287
288  return 0;
289}
290
291int trace_tcp_set_state_entry(struct pt_regs *ctx, struct sock *skp, int state)
292{
293  if (state != TCP_ESTABLISHED && state != TCP_CLOSE) {
294      return 0;
295  }
296
297  u16 family = skp->__sk_common.skc_family;
298  ##FILTER_FAMILY##
299
300  u8 ipver = 0;
301  if (check_family(skp, AF_INET)) {
302      ipver = 4;
303      struct ipv4_tuple_t t = { };
304      if (!read_ipv4_tuple(&t, skp)) {
305          return 0;
306      }
307
308      if (state == TCP_CLOSE) {
309          tuplepid_ipv4.delete(&t);
310          return 0;
311      }
312
313      struct pid_comm_t *p;
314      p = tuplepid_ipv4.lookup(&t);
315      if (p == 0) {
316          return 0;       // missed entry
317      }
318
319      struct tcp_ipv4_event_t evt4 = { };
320      evt4.ts_ns = bpf_ktime_get_ns();
321      evt4.type = TCP_EVENT_TYPE_CONNECT;
322      evt4.pid = p->pid >> 32;
323      evt4.ip = ipver;
324      evt4.saddr = t.saddr;
325      evt4.daddr = t.daddr;
326      evt4.sport = ntohs(t.sport);
327      evt4.dport = ntohs(t.dport);
328      evt4.netns = t.netns;
329
330      int i;
331      for (i = 0; i < TASK_COMM_LEN; i++) {
332          evt4.comm[i] = p->comm[i];
333      }
334
335      tcp_ipv4_event.perf_submit(ctx, &evt4, sizeof(evt4));
336      tuplepid_ipv4.delete(&t);
337  } else if (check_family(skp, AF_INET6)) {
338      ipver = 6;
339      struct ipv6_tuple_t t = { };
340      if (!read_ipv6_tuple(&t, skp)) {
341          return 0;
342      }
343
344      if (state == TCP_CLOSE) {
345          tuplepid_ipv6.delete(&t);
346          return 0;
347      }
348
349      struct pid_comm_t *p;
350      p = tuplepid_ipv6.lookup(&t);
351      if (p == 0) {
352          return 0;       // missed entry
353      }
354
355      struct tcp_ipv6_event_t evt6 = { };
356      evt6.ts_ns = bpf_ktime_get_ns();
357      evt6.type = TCP_EVENT_TYPE_CONNECT;
358      evt6.pid = p->pid >> 32;
359      evt6.ip = ipver;
360      evt6.saddr = t.saddr;
361      evt6.daddr = t.daddr;
362      evt6.sport = ntohs(t.sport);
363      evt6.dport = ntohs(t.dport);
364      evt6.netns = t.netns;
365
366      int i;
367      for (i = 0; i < TASK_COMM_LEN; i++) {
368          evt6.comm[i] = p->comm[i];
369      }
370
371      tcp_ipv6_event.perf_submit(ctx, &evt6, sizeof(evt6));
372      tuplepid_ipv6.delete(&t);
373  }
374  // else drop
375
376  return 0;
377}
378
379int trace_close_entry(struct pt_regs *ctx, struct sock *skp)
380{
381  if (container_should_be_filtered()) {
382    return 0;
383  }
384
385  u64 pid = bpf_get_current_pid_tgid();
386
387  ##FILTER_PID##
388
389  u16 family = skp->__sk_common.skc_family;
390  ##FILTER_FAMILY##
391
392  u8 oldstate = skp->sk_state;
393  // Don't generate close events for connections that were never
394  // established in the first place.
395  if (oldstate == TCP_SYN_SENT ||
396      oldstate == TCP_SYN_RECV ||
397      oldstate == TCP_NEW_SYN_RECV)
398      return 0;
399
400  u8 ipver = 0;
401  if (check_family(skp, AF_INET)) {
402      ipver = 4;
403      struct ipv4_tuple_t t = { };
404      if (!read_ipv4_tuple(&t, skp)) {
405          return 0;
406      }
407
408      struct tcp_ipv4_event_t evt4 = { };
409      evt4.ts_ns = bpf_ktime_get_ns();
410      evt4.type = TCP_EVENT_TYPE_CLOSE;
411      evt4.pid = pid >> 32;
412      evt4.ip = ipver;
413      evt4.saddr = t.saddr;
414      evt4.daddr = t.daddr;
415      evt4.sport = ntohs(t.sport);
416      evt4.dport = ntohs(t.dport);
417      evt4.netns = t.netns;
418      bpf_get_current_comm(&evt4.comm, sizeof(evt4.comm));
419
420      tcp_ipv4_event.perf_submit(ctx, &evt4, sizeof(evt4));
421  } else if (check_family(skp, AF_INET6)) {
422      ipver = 6;
423      struct ipv6_tuple_t t = { };
424      if (!read_ipv6_tuple(&t, skp)) {
425          return 0;
426      }
427
428      struct tcp_ipv6_event_t evt6 = { };
429      evt6.ts_ns = bpf_ktime_get_ns();
430      evt6.type = TCP_EVENT_TYPE_CLOSE;
431      evt6.pid = pid >> 32;
432      evt6.ip = ipver;
433      evt6.saddr = t.saddr;
434      evt6.daddr = t.daddr;
435      evt6.sport = ntohs(t.sport);
436      evt6.dport = ntohs(t.dport);
437      evt6.netns = t.netns;
438      bpf_get_current_comm(&evt6.comm, sizeof(evt6.comm));
439
440      tcp_ipv6_event.perf_submit(ctx, &evt6, sizeof(evt6));
441  }
442  // else drop
443
444  return 0;
445};
446
447int trace_accept_return(struct pt_regs *ctx)
448{
449  if (container_should_be_filtered()) {
450    return 0;
451  }
452
453  struct sock *newsk = (struct sock *)PT_REGS_RC(ctx);
454  u64 pid = bpf_get_current_pid_tgid();
455
456  ##FILTER_PID##
457
458  if (newsk == NULL) {
459      return 0;
460  }
461
462  // pull in details
463  u16 lport = 0, dport = 0;
464  u32 net_ns_inum = 0;
465  u8 ipver = 0;
466
467  dport = newsk->__sk_common.skc_dport;
468  lport = newsk->__sk_common.skc_num;
469
470  // Get network namespace id, if kernel supports it
471#ifdef CONFIG_NET_NS
472  net_ns_inum = newsk->__sk_common.skc_net.net->ns.inum;
473#endif
474
475  ##FILTER_NETNS##
476
477  u16 family = newsk->__sk_common.skc_family;
478  ##FILTER_FAMILY##
479
480  if (check_family(newsk, AF_INET)) {
481      ipver = 4;
482
483      struct tcp_ipv4_event_t evt4 = { 0 };
484
485      evt4.ts_ns = bpf_ktime_get_ns();
486      evt4.type = TCP_EVENT_TYPE_ACCEPT;
487      evt4.netns = net_ns_inum;
488      evt4.pid = pid >> 32;
489      evt4.ip = ipver;
490
491      evt4.saddr = newsk->__sk_common.skc_rcv_saddr;
492      evt4.daddr = newsk->__sk_common.skc_daddr;
493
494      evt4.sport = lport;
495      evt4.dport = ntohs(dport);
496      bpf_get_current_comm(&evt4.comm, sizeof(evt4.comm));
497
498      // do not send event if IP address is 0.0.0.0 or port is 0
499      if (evt4.saddr != 0 && evt4.daddr != 0 &&
500          evt4.sport != 0 && evt4.dport != 0) {
501          tcp_ipv4_event.perf_submit(ctx, &evt4, sizeof(evt4));
502      }
503  } else if (check_family(newsk, AF_INET6)) {
504      ipver = 6;
505
506      struct tcp_ipv6_event_t evt6 = { 0 };
507
508      evt6.ts_ns = bpf_ktime_get_ns();
509      evt6.type = TCP_EVENT_TYPE_ACCEPT;
510      evt6.netns = net_ns_inum;
511      evt6.pid = pid >> 32;
512      evt6.ip = ipver;
513
514      bpf_probe_read_kernel(&evt6.saddr, sizeof(evt6.saddr),
515                     newsk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
516      bpf_probe_read_kernel(&evt6.daddr, sizeof(evt6.daddr),
517                     newsk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
518
519      evt6.sport = lport;
520      evt6.dport = ntohs(dport);
521      bpf_get_current_comm(&evt6.comm, sizeof(evt6.comm));
522
523      // do not send event if IP address is 0.0.0.0 or port is 0
524      if (evt6.saddr != 0 && evt6.daddr != 0 &&
525          evt6.sport != 0 && evt6.dport != 0) {
526          tcp_ipv6_event.perf_submit(ctx, &evt6, sizeof(evt6));
527      }
528  }
529  // else drop
530
531  return 0;
532}
533"""
534
535verbose_types = {"C": "connect", "A": "accept",
536                 "X": "close", "U": "unknown"}
537
538
539def print_ipv4_event(cpu, data, size):
540    event = b["tcp_ipv4_event"].event(data)
541    global start_ts
542
543    if args.timestamp:
544        if start_ts == 0:
545            start_ts = event.ts_ns
546        if args.verbose:
547            print("%-14d" % (event.ts_ns - start_ts), end="")
548        else:
549            print("%-9.3f" % ((event.ts_ns - start_ts) / 1000000000.0), end="")
550    if event.type == 1:
551        type_str = "C"
552    elif event.type == 2:
553        type_str = "A"
554    elif event.type == 3:
555        type_str = "X"
556    else:
557        type_str = "U"
558
559    if args.verbose:
560        print("%-12s " % (verbose_types[type_str]), end="")
561    else:
562        print("%-2s " % (type_str), end="")
563
564    print("%-6d %-16s %-2d %-16s %-16s %-6d %-6d" %
565          (event.pid, event.comm.decode('utf-8', 'replace'),
566           event.ip,
567           inet_ntop(AF_INET, pack("I", event.saddr)),
568           inet_ntop(AF_INET, pack("I", event.daddr)),
569           event.sport,
570           event.dport), end="")
571    if args.verbose and not args.netns:
572        print(" %-8d" % event.netns)
573    else:
574        print()
575
576
577def print_ipv6_event(cpu, data, size):
578    event = b["tcp_ipv6_event"].event(data)
579    global start_ts
580    if args.timestamp:
581        if start_ts == 0:
582            start_ts = event.ts_ns
583        if args.verbose:
584            print("%-14d" % (event.ts_ns - start_ts), end="")
585        else:
586            print("%-9.3f" % ((event.ts_ns - start_ts) / 1000000000.0), end="")
587    if event.type == 1:
588        type_str = "C"
589    elif event.type == 2:
590        type_str = "A"
591    elif event.type == 3:
592        type_str = "X"
593    else:
594        type_str = "U"
595
596    if args.verbose:
597        print("%-12s " % (verbose_types[type_str]), end="")
598    else:
599        print("%-2s " % (type_str), end="")
600
601    print("%-6d %-16s %-2d %-16s %-16s %-6d %-6d" %
602          (event.pid, event.comm.decode('utf-8', 'replace'),
603           event.ip,
604           "[" + inet_ntop(AF_INET6, event.saddr) + "]",
605           "[" + inet_ntop(AF_INET6, event.daddr) + "]",
606           event.sport,
607           event.dport), end="")
608    if args.verbose and not args.netns:
609        print(" %-8d" % event.netns)
610    else:
611        print()
612
613
614pid_filter = ""
615netns_filter = ""
616
617if args.pid:
618    pid_filter = 'if (pid >> 32 != %d) { return 0; }' % args.pid
619if args.netns:
620    netns_filter = 'if (net_ns_inum != %d) { return 0; }' % args.netns
621if args.ipv4:
622    bpf_text = bpf_text.replace('##FILTER_FAMILY##',
623        'if (family != AF_INET) { return 0; }')
624elif args.ipv6:
625    bpf_text = bpf_text.replace('##FILTER_FAMILY##',
626        'if (family != AF_INET6) { return 0; }')
627bpf_text = bpf_text.replace('##FILTER_FAMILY##', '')
628bpf_text = bpf_text.replace('##FILTER_PID##', pid_filter)
629bpf_text = bpf_text.replace('##FILTER_NETNS##', netns_filter)
630bpf_text = filter_by_containers(args) + bpf_text
631
632if args.ebpf:
633    print(bpf_text)
634    exit()
635
636# initialize BPF
637b = BPF(text=bpf_text)
638if args.ipv4:
639    b.attach_kprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_entry")
640    b.attach_kretprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_return")
641elif args.ipv6:
642    b.attach_kprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_entry")
643    b.attach_kretprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_return")
644else:
645    b.attach_kprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_entry")
646    b.attach_kretprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_return")
647    b.attach_kprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_entry")
648    b.attach_kretprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_return")
649b.attach_kprobe(event="tcp_set_state", fn_name="trace_tcp_set_state_entry")
650b.attach_kprobe(event="tcp_close", fn_name="trace_close_entry")
651b.attach_kretprobe(event="inet_csk_accept", fn_name="trace_accept_return")
652
653print("Tracing TCP established connections. Ctrl-C to end.")
654
655# header
656if args.verbose:
657    if args.timestamp:
658        print("%-14s" % ("TIME(ns)"), end="")
659    print("%-12s %-6s %-16s %-2s %-16s %-16s %-6s %-7s" % ("TYPE",
660          "PID", "COMM", "IP", "SADDR", "DADDR", "SPORT", "DPORT"), end="")
661    if not args.netns:
662        print("%-8s" % "NETNS", end="")
663    print()
664else:
665    if args.timestamp:
666        print("%-9s" % ("TIME(s)"), end="")
667    print("%-2s %-6s %-16s %-2s %-16s %-16s %-6s %-6s" %
668          ("T", "PID", "COMM", "IP", "SADDR", "DADDR", "SPORT", "DPORT"))
669
670start_ts = 0
671
672
673b["tcp_ipv4_event"].open_perf_buffer(print_ipv4_event)
674b["tcp_ipv6_event"].open_perf_buffer(print_ipv6_event)
675while True:
676    try:
677        b.perf_buffer_poll()
678    except KeyboardInterrupt:
679        exit()
680