xref: /aosp_15_r20/external/bcc/tools/tcpstates.py (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# @lint-avoid-python-3-compatibility-imports
4#
5# tcpstates   Trace the TCP session state changes with durations.
6#             For Linux, uses BCC, BPF. Embedded C.
7#
8# USAGE: tcpstates [-h] [-C] [-S] [interval [count]] [-4 | -6]
9#
10# This uses the sock:inet_sock_set_state tracepoint, added to Linux 4.16.
11# Linux 4.16 also adds more state transitions so that they can be traced.
12#
13# Copyright 2018 Netflix, Inc.
14# Licensed under the Apache License, Version 2.0 (the "License")
15#
16# 20-Mar-2018   Brendan Gregg   Created this.
17
18from __future__ import print_function
19from bcc import BPF
20import argparse
21from socket import inet_ntop, AF_INET, AF_INET6
22from time import strftime, time
23from os import getuid
24
25# arguments
26examples = """examples:
27    ./tcpstates           # trace all TCP state changes
28    ./tcpstates -t        # include timestamp column
29    ./tcpstates -T        # include time column (HH:MM:SS)
30    ./tcpstates -w        # wider columns (fit IPv6)
31    ./tcpstates -stT      # csv output, with times & timestamps
32    ./tcpstates -Y        # log events to the systemd journal
33    ./tcpstates -L 80     # only trace local port 80
34    ./tcpstates -L 80,81  # only trace local ports 80 and 81
35    ./tcpstates -D 80     # only trace remote port 80
36    ./tcpstates -4        # trace IPv4 family only
37    ./tcpstates -6        # trace IPv6 family only
38"""
39parser = argparse.ArgumentParser(
40    description="Trace TCP session state changes and durations",
41    formatter_class=argparse.RawDescriptionHelpFormatter,
42    epilog=examples)
43parser.add_argument("-T", "--time", action="store_true",
44    help="include time column on output (HH:MM:SS)")
45parser.add_argument("-t", "--timestamp", action="store_true",
46    help="include timestamp on output (seconds)")
47parser.add_argument("-w", "--wide", action="store_true",
48    help="wide column output (fits IPv6 addresses)")
49parser.add_argument("-s", "--csv", action="store_true",
50    help="comma separated values output")
51parser.add_argument("-L", "--localport",
52    help="comma-separated list of local ports to trace.")
53parser.add_argument("-D", "--remoteport",
54    help="comma-separated list of remote ports to trace.")
55parser.add_argument("--ebpf", action="store_true",
56    help=argparse.SUPPRESS)
57parser.add_argument("-Y", "--journal", action="store_true",
58    help="log session state changes to the systemd journal")
59group = parser.add_mutually_exclusive_group()
60group.add_argument("-4", "--ipv4", action="store_true",
61    help="trace IPv4 family only")
62group.add_argument("-6", "--ipv6", action="store_true",
63    help="trace IPv6 family only")
64args = parser.parse_args()
65debug = 0
66
67# define BPF program
68bpf_header = """
69#include <uapi/linux/ptrace.h>
70#include <linux/tcp.h>
71#include <net/sock.h>
72#include <bcc/proto.h>
73
74BPF_HASH(last, struct sock *, u64);
75
76// separate data structs for ipv4 and ipv6
77struct ipv4_data_t {
78    u64 ts_us;
79    u64 skaddr;
80    u32 saddr[1];
81    u32 daddr[1];
82    u64 span_us;
83    u32 pid;
84    u16 lport;
85    u16 dport;
86    int oldstate;
87    int newstate;
88    char task[TASK_COMM_LEN];
89};
90BPF_PERF_OUTPUT(ipv4_events);
91
92struct ipv6_data_t {
93    u64 ts_us;
94    u64 skaddr;
95    u32 saddr[4];
96    u32 daddr[4];
97    u64 span_us;
98    u32 pid;
99    u16 lport;
100    u16 dport;
101    int oldstate;
102    int newstate;
103    char task[TASK_COMM_LEN];
104};
105BPF_PERF_OUTPUT(ipv6_events);
106"""
107
108bpf_text_tracepoint = """
109TRACEPOINT_PROBE(sock, inet_sock_set_state)
110{
111    if (args->protocol != IPPROTO_TCP)
112        return 0;
113
114    u32 pid = bpf_get_current_pid_tgid() >> 32;
115    // sk is used as a UUID
116    struct sock *sk = (struct sock *)args->skaddr;
117
118    // lport is either used in a filter here, or later
119    u16 lport = args->sport;
120    FILTER_LPORT
121
122    // dport is either used in a filter here, or later
123    u16 dport = args->dport;
124    FILTER_DPORT
125
126    // calculate delta
127    u64 *tsp, delta_us;
128    tsp = last.lookup(&sk);
129    if (tsp == 0)
130        delta_us = 0;
131    else
132        delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
133    u16 family = args->family;
134    FILTER_FAMILY
135
136    // workaround to avoid llvm optimization which will cause context ptr args modified
137    int tcp_newstate = args->newstate;
138
139    if (args->family == AF_INET) {
140        struct ipv4_data_t data4 = {
141            .span_us = delta_us,
142            .oldstate = args->oldstate,
143            .newstate = args->newstate };
144        data4.skaddr = (u64)args->skaddr;
145        data4.ts_us = bpf_ktime_get_ns() / 1000;
146        __builtin_memcpy(&data4.saddr, args->saddr, sizeof(data4.saddr));
147        __builtin_memcpy(&data4.daddr, args->daddr, sizeof(data4.daddr));
148        data4.lport = lport;
149        data4.dport = dport;
150        data4.pid = pid;
151
152        bpf_get_current_comm(&data4.task, sizeof(data4.task));
153        ipv4_events.perf_submit(args, &data4, sizeof(data4));
154
155    } else /* 6 */ {
156        struct ipv6_data_t data6 = {
157            .span_us = delta_us,
158            .oldstate = args->oldstate,
159            .newstate = args->newstate };
160        data6.skaddr = (u64)args->skaddr;
161        data6.ts_us = bpf_ktime_get_ns() / 1000;
162        __builtin_memcpy(&data6.saddr, args->saddr_v6, sizeof(data6.saddr));
163        __builtin_memcpy(&data6.daddr, args->daddr_v6, sizeof(data6.daddr));
164        data6.lport = lport;
165        data6.dport = dport;
166        data6.pid = pid;
167        bpf_get_current_comm(&data6.task, sizeof(data6.task));
168        ipv6_events.perf_submit(args, &data6, sizeof(data6));
169    }
170
171    if (tcp_newstate == TCP_CLOSE) {
172        last.delete(&sk);
173    } else {
174        u64 ts = bpf_ktime_get_ns();
175        last.update(&sk, &ts);
176    }
177
178    return 0;
179}
180"""
181
182bpf_text_kprobe = """
183int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
184{
185    u32 pid = bpf_get_current_pid_tgid() >> 32;
186    // sk is used as a UUID
187
188    // lport is either used in a filter here, or later
189    u16 lport = sk->__sk_common.skc_num;
190    FILTER_LPORT
191
192    // dport is either used in a filter here, or later
193    u16 dport = sk->__sk_common.skc_dport;
194    dport = ntohs(dport);
195    FILTER_DPORT
196
197    // calculate delta
198    u64 *tsp, delta_us;
199    tsp = last.lookup(&sk);
200    if (tsp == 0)
201        delta_us = 0;
202    else
203        delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
204
205    u16 family = sk->__sk_common.skc_family;
206    FILTER_FAMILY
207
208    if (family == AF_INET) {
209        struct ipv4_data_t data4 = {
210            .span_us = delta_us,
211            .oldstate = sk->__sk_common.skc_state,
212            .newstate = state };
213        data4.skaddr = (u64)sk;
214        data4.ts_us = bpf_ktime_get_ns() / 1000;
215        data4.saddr = sk->__sk_common.skc_rcv_saddr;
216        data4.daddr = sk->__sk_common.skc_daddr;
217        data4.lport = lport;
218        data4.dport = dport;
219        data4.pid = pid;
220
221        bpf_get_current_comm(&data4.task, sizeof(data4.task));
222        ipv4_events.perf_submit(ctx, &data4, sizeof(data4));
223
224    } else /* 6 */ {
225        struct ipv6_data_t data6 = {
226            .span_us = delta_us,
227            .oldstate = sk->__sk_common.skc_state,
228            .newstate = state };
229        data6.skaddr = (u64)sk;
230        data6.ts_us = bpf_ktime_get_ns() / 1000;
231        bpf_probe_read_kernel(&data6.saddr, sizeof(data6.saddr),
232            sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
233        bpf_probe_read_kernel(&data6.daddr, sizeof(data6.daddr),
234            sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
235        data6.lport = lport;
236        data6.dport = dport;
237        data6.pid = pid;
238        bpf_get_current_comm(&data6.task, sizeof(data6.task));
239        ipv6_events.perf_submit(ctx, &data6, sizeof(data6));
240    }
241
242    if (state == TCP_CLOSE) {
243        last.delete(&sk);
244    } else {
245        u64 ts = bpf_ktime_get_ns();
246        last.update(&sk, &ts);
247    }
248
249    return 0;
250
251};
252"""
253
254bpf_text = bpf_header
255if BPF.tracepoint_exists("sock", "inet_sock_set_state"):
256    bpf_text += bpf_text_tracepoint
257else:
258    bpf_text += bpf_text_kprobe
259
260# code substitutions
261if args.remoteport:
262    dports = [int(dport) for dport in args.remoteport.split(',')]
263    dports_if = ' && '.join(['dport != %d' % dport for dport in dports])
264    bpf_text = bpf_text.replace('FILTER_DPORT',
265        'if (%s) { last.delete(&sk); return 0; }' % dports_if)
266if args.localport:
267    lports = [int(lport) for lport in args.localport.split(',')]
268    lports_if = ' && '.join(['lport != %d' % lport for lport in lports])
269    bpf_text = bpf_text.replace('FILTER_LPORT',
270        'if (%s) { last.delete(&sk); return 0; }' % lports_if)
271if args.ipv4:
272    bpf_text = bpf_text.replace('FILTER_FAMILY',
273        'if (family != AF_INET) { return 0; }')
274elif args.ipv6:
275    bpf_text = bpf_text.replace('FILTER_FAMILY',
276        'if (family != AF_INET6) { return 0; }')
277bpf_text = bpf_text.replace('FILTER_FAMILY', '')
278bpf_text = bpf_text.replace('FILTER_DPORT', '')
279bpf_text = bpf_text.replace('FILTER_LPORT', '')
280
281if debug or args.ebpf:
282    print(bpf_text)
283    if args.ebpf:
284        exit()
285
286#
287# Setup output formats
288#
289# Don't change the default output (next 2 lines): this fits in 80 chars. I
290# know it doesn't have NS or UIDs etc. I know. If you really, really, really
291# need to add columns, columns that solve real actual problems, I'd start by
292# adding an extended mode (-x) to included those columns.
293#
294header_string = "%-16s %-5s %-10.10s %s%-15s %-5s %-15s %-5s %-11s -> %-11s %s"
295format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
296    "-> %-11s %.3f")
297if args.wide:
298    header_string = ("%-16s %-5s %-16.16s %-2s %-39s %-5s %-39s %-5s %-11s " +
299        "-> %-11s %s")
300    format_string = ("%-16x %-5d %-16.16s %-2s %-39s %-5s %-39s %-5d %-11s " +
301        "-> %-11s %.3f")
302if args.csv:
303    header_string = "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s"
304    format_string = "%x,%d,%s,%s,%s,%s,%s,%d,%s,%s,%.3f"
305
306if args.journal:
307    try:
308        from systemd import journal
309    except ImportError:
310        print("ERROR: Journal logging requires the systemd.journal module")
311        exit(1)
312
313
314def tcpstate2str(state):
315    # from include/net/tcp_states.h:
316    tcpstate = {
317        1: "ESTABLISHED",
318        2: "SYN_SENT",
319        3: "SYN_RECV",
320        4: "FIN_WAIT1",
321        5: "FIN_WAIT2",
322        6: "TIME_WAIT",
323        7: "CLOSE",
324        8: "CLOSE_WAIT",
325        9: "LAST_ACK",
326        10: "LISTEN",
327        11: "CLOSING",
328        12: "NEW_SYN_RECV",
329    }
330
331    if state in tcpstate:
332        return tcpstate[state]
333    else:
334        return str(state)
335
336def journal_fields(event, addr_family):
337    addr_pfx = 'IPV4'
338    if addr_family == AF_INET6:
339        addr_pfx = 'IPV6'
340
341    fields = {
342        # Standard fields described in systemd.journal-fields(7). journal.send
343        # will fill in CODE_LINE, CODE_FILE, and CODE_FUNC for us. If we're
344        # root and specify OBJECT_PID, systemd-journald will add other OBJECT_*
345        # fields for us.
346        'SYSLOG_IDENTIFIER': 'tcpstates',
347        'PRIORITY': 5,
348        '_SOURCE_REALTIME_TIMESTAMP': time() * 1000000,
349        'OBJECT_PID': str(event.pid),
350        'OBJECT_COMM': event.task.decode('utf-8', 'replace'),
351        # Custom fields, aka "stuff we sort of made up".
352        'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, event.saddr),
353        'OBJECT_TCP_SOURCE_PORT': str(event.lport),
354        'OBJECT_' + addr_pfx + '_DESTINATION_ADDRESS': inet_ntop(addr_family, event.daddr),
355        'OBJECT_TCP_DESTINATION_PORT': str(event.dport),
356        'OBJECT_TCP_OLD_STATE': tcpstate2str(event.oldstate),
357        'OBJECT_TCP_NEW_STATE': tcpstate2str(event.newstate),
358        'OBJECT_TCP_SPAN_TIME': str(event.span_us)
359        }
360
361    msg_format_string = (u"%(OBJECT_COMM)s " +
362        u"%(OBJECT_" + addr_pfx + "_SOURCE_ADDRESS)s " +
363        u"%(OBJECT_TCP_SOURCE_PORT)s → " +
364        u"%(OBJECT_" + addr_pfx + "_DESTINATION_ADDRESS)s " +
365        u"%(OBJECT_TCP_DESTINATION_PORT)s " +
366        u"%(OBJECT_TCP_OLD_STATE)s → %(OBJECT_TCP_NEW_STATE)s")
367    fields['MESSAGE'] = msg_format_string % (fields)
368
369    if getuid() == 0:
370        del fields['OBJECT_COMM'] # Handled by systemd-journald
371
372    return fields
373
374# process event
375def print_event(event, addr_family):
376    global start_ts
377    if args.time:
378        if args.csv:
379            print("%s," % strftime("%H:%M:%S"), end="")
380        else:
381            print("%-8s " % strftime("%H:%M:%S"), end="")
382    if args.timestamp:
383        if start_ts == 0:
384            start_ts = event.ts_us
385        delta_s = (float(event.ts_us) - start_ts) / 1000000
386        if args.csv:
387            print("%.6f," % delta_s, end="")
388        else:
389            print("%-9.6f " % delta_s, end="")
390    if addr_family == AF_INET:
391        version = "4"
392    else:
393        version = "6"
394    print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'),
395        version if args.wide or args.csv else "",
396        inet_ntop(addr_family, event.saddr), event.lport,
397        inet_ntop(addr_family, event.daddr), event.dport,
398        tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
399        float(event.span_us) / 1000))
400    if args.journal:
401        journal.send(**journal_fields(event, addr_family))
402
403def print_ipv4_event(cpu, data, size):
404    event = b["ipv4_events"].event(data)
405    print_event(event, AF_INET)
406
407def print_ipv6_event(cpu, data, size):
408    event = b["ipv6_events"].event(data)
409    print_event(event, AF_INET6)
410
411# initialize BPF
412b = BPF(text=bpf_text)
413
414# header
415if args.time:
416    if args.csv:
417        print("%s," % ("TIME"), end="")
418    else:
419        print("%-8s " % ("TIME"), end="")
420if args.timestamp:
421    if args.csv:
422        print("%s," % ("TIME(s)"), end="")
423    else:
424        print("%-9s " % ("TIME(s)"), end="")
425print(header_string % ("SKADDR", "C-PID", "C-COMM",
426    "IP" if args.wide or args.csv else "",
427    "LADDR", "LPORT", "RADDR", "RPORT",
428    "OLDSTATE", "NEWSTATE", "MS"))
429
430start_ts = 0
431
432# read events
433b["ipv4_events"].open_perf_buffer(print_ipv4_event, page_cnt=64)
434b["ipv6_events"].open_perf_buffer(print_ipv6_event, page_cnt=64)
435while 1:
436    try:
437        b.perf_buffer_poll()
438    except KeyboardInterrupt:
439        exit()
440