xref: /aosp_15_r20/external/bcc/tools/tcpretrans.py (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1#!/usr/bin/env python
2# @lint-avoid-python-3-compatibility-imports
3#
4# tcpretrans    Trace or count TCP retransmits and TLPs.
5#               For Linux, uses BCC, eBPF. Embedded C.
6#
7# USAGE: tcpretrans [-c] [-h] [-l] [-4 | -6]
8#
9# This uses dynamic tracing of kernel functions, and will need to be updated
10# to match kernel changes.
11#
12# Copyright 2016 Netflix, Inc.
13# Licensed under the Apache License, Version 2.0 (the "License")
14#
15# 14-Feb-2016   Brendan Gregg   Created this.
16# 03-Nov-2017   Matthias Tafelmeier Extended this.
17
18from __future__ import print_function
19from bcc import BPF
20import argparse
21from time import strftime
22from socket import inet_ntop, AF_INET, AF_INET6
23from struct import pack
24from time import sleep
25
26# arguments
27examples = """examples:
28    ./tcpretrans           # trace TCP retransmits
29    ./tcpretrans -l        # include TLP attempts
30    ./tcpretrans -4        # trace IPv4 family only
31    ./tcpretrans -6        # trace IPv6 family only
32"""
33parser = argparse.ArgumentParser(
34    description="Trace TCP retransmits",
35    formatter_class=argparse.RawDescriptionHelpFormatter,
36    epilog=examples)
37parser.add_argument("-s", "--sequence", action="store_true",
38    help="display TCP sequence numbers")
39parser.add_argument("-l", "--lossprobe", action="store_true",
40    help="include tail loss probe attempts")
41parser.add_argument("-c", "--count", action="store_true",
42    help="count occurred retransmits per flow")
43group = parser.add_mutually_exclusive_group()
44group.add_argument("-4", "--ipv4", action="store_true",
45    help="trace IPv4 family only")
46group.add_argument("-6", "--ipv6", action="store_true",
47    help="trace IPv6 family only")
48parser.add_argument("--ebpf", action="store_true",
49    help=argparse.SUPPRESS)
50args = parser.parse_args()
51debug = 0
52
53# define BPF program
54bpf_text = """
55#include <uapi/linux/ptrace.h>
56#include <net/sock.h>
57#include <net/tcp.h>
58#include <bcc/proto.h>
59
60#define RETRANSMIT  1
61#define TLP         2
62
63// separate data structs for ipv4 and ipv6
64struct ipv4_data_t {
65    u32 pid;
66    u64 ip;
67    u32 seq;
68    u32 saddr;
69    u32 daddr;
70    u16 lport;
71    u16 dport;
72    u64 state;
73    u64 type;
74};
75BPF_PERF_OUTPUT(ipv4_events);
76
77struct ipv6_data_t {
78    u32 pid;
79    u32 seq;
80    u64 ip;
81    unsigned __int128 saddr;
82    unsigned __int128 daddr;
83    u16 lport;
84    u16 dport;
85    u64 state;
86    u64 type;
87};
88BPF_PERF_OUTPUT(ipv6_events);
89
90// separate flow keys per address family
91struct ipv4_flow_key_t {
92    u32 saddr;
93    u32 daddr;
94    u16 lport;
95    u16 dport;
96};
97BPF_HASH(ipv4_count, struct ipv4_flow_key_t);
98
99struct ipv6_flow_key_t {
100    unsigned __int128 saddr;
101    unsigned __int128 daddr;
102    u16 lport;
103    u16 dport;
104};
105BPF_HASH(ipv6_count, struct ipv6_flow_key_t);
106"""
107
108bpf_text_kprobe = """
109static int trace_event(struct pt_regs *ctx, struct sock *skp, struct sk_buff *skb, int type)
110{
111    struct tcp_skb_cb *tcb;
112    u32 seq;
113
114    if (skp == NULL)
115        return 0;
116    u32 pid = bpf_get_current_pid_tgid() >> 32;
117
118    // pull in details
119    u16 family = skp->__sk_common.skc_family;
120    u16 lport = skp->__sk_common.skc_num;
121    u16 dport = skp->__sk_common.skc_dport;
122    char state = skp->__sk_common.skc_state;
123
124    seq = 0;
125    if (skb) {
126        /* macro TCP_SKB_CB from net/tcp.h */
127        tcb = ((struct tcp_skb_cb *)&((skb)->cb[0]));
128        seq = tcb->seq;
129    }
130
131    FILTER_FAMILY
132
133    if (family == AF_INET) {
134        IPV4_INIT
135        IPV4_CORE
136    } else if (family == AF_INET6) {
137        IPV6_INIT
138        IPV6_CORE
139    }
140    // else drop
141
142    return 0;
143}
144"""
145
146bpf_text_kprobe_retransmit = """
147int trace_retransmit(struct pt_regs *ctx, struct sock *sk, struct sk_buff *skb)
148{
149    trace_event(ctx, sk, skb, RETRANSMIT);
150    return 0;
151}
152"""
153
154bpf_text_kprobe_tlp = """
155int trace_tlp(struct pt_regs *ctx, struct sock *sk)
156{
157    trace_event(ctx, sk, NULL, TLP);
158    return 0;
159}
160"""
161
162bpf_text_tracepoint = """
163TRACEPOINT_PROBE(tcp, tcp_retransmit_skb)
164{
165    struct tcp_skb_cb *tcb;
166    u32 seq;
167
168    u32 pid = bpf_get_current_pid_tgid() >> 32;
169    const struct sock *skp = (const struct sock *)args->skaddr;
170    const struct sk_buff *skb = (const struct sk_buff *)args->skbaddr;
171    u16 lport = args->sport;
172    u16 dport = args->dport;
173    char state = skp->__sk_common.skc_state;
174    u16 family = skp->__sk_common.skc_family;
175
176    seq = 0;
177    if (skb) {
178        /* macro TCP_SKB_CB from net/tcp.h */
179        tcb = ((struct tcp_skb_cb *)&((skb)->cb[0]));
180        seq = tcb->seq;
181    }
182
183    FILTER_FAMILY
184
185    if (family == AF_INET) {
186        IPV4_CODE
187    } else if (family == AF_INET6) {
188        IPV6_CODE
189    }
190    return 0;
191}
192"""
193
194struct_init = { 'ipv4':
195        { 'count' :
196            """
197               struct ipv4_flow_key_t flow_key = {};
198               flow_key.saddr = skp->__sk_common.skc_rcv_saddr;
199               flow_key.daddr = skp->__sk_common.skc_daddr;
200               // lport is host order
201               flow_key.lport = lport;
202               flow_key.dport = ntohs(dport);""",
203               'trace' :
204               """
205               struct ipv4_data_t data4 = {};
206               data4.pid = pid;
207               data4.ip = 4;
208               data4.seq = seq;
209               data4.type = type;
210               data4.saddr = skp->__sk_common.skc_rcv_saddr;
211               data4.daddr = skp->__sk_common.skc_daddr;
212               // lport is host order
213               data4.lport = lport;
214               data4.dport = ntohs(dport);
215               data4.state = state; """
216               },
217        'ipv6':
218        { 'count' :
219            """
220                    struct ipv6_flow_key_t flow_key = {};
221                    bpf_probe_read_kernel(&flow_key.saddr, sizeof(flow_key.saddr),
222                        skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
223                    bpf_probe_read_kernel(&flow_key.daddr, sizeof(flow_key.daddr),
224                        skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
225                    // lport is host order
226                    flow_key.lport = lport;
227                    flow_key.dport = ntohs(dport);""",
228          'trace' : """
229                    struct ipv6_data_t data6 = {};
230                    data6.pid = pid;
231                    data6.ip = 6;
232                    data6.seq = seq;
233                    data6.type = type;
234                    bpf_probe_read_kernel(&data6.saddr, sizeof(data6.saddr),
235                        skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
236                    bpf_probe_read_kernel(&data6.daddr, sizeof(data6.daddr),
237                        skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
238                    // lport is host order
239                    data6.lport = lport;
240                    data6.dport = ntohs(dport);
241                    data6.state = state;"""
242                }
243        }
244
245struct_init_tracepoint = { 'ipv4':
246        { 'count' : """
247               struct ipv4_flow_key_t flow_key = {};
248               __builtin_memcpy(&flow_key.saddr, args->saddr, sizeof(flow_key.saddr));
249               __builtin_memcpy(&flow_key.daddr, args->daddr, sizeof(flow_key.daddr));
250               flow_key.lport = lport;
251               flow_key.dport = dport;
252               ipv4_count.increment(flow_key);
253               """,
254          'trace' : """
255               struct ipv4_data_t data4 = {};
256               data4.pid = pid;
257               data4.lport = lport;
258               data4.dport = dport;
259               data4.type = RETRANSMIT;
260               data4.ip = 4;
261               data4.seq = seq;
262               data4.state = state;
263               __builtin_memcpy(&data4.saddr, args->saddr, sizeof(data4.saddr));
264               __builtin_memcpy(&data4.daddr, args->daddr, sizeof(data4.daddr));
265               ipv4_events.perf_submit(args, &data4, sizeof(data4));
266               """
267               },
268        'ipv6':
269        { 'count' : """
270               struct ipv6_flow_key_t flow_key = {};
271               __builtin_memcpy(&flow_key.saddr, args->saddr_v6, sizeof(flow_key.saddr));
272               __builtin_memcpy(&flow_key.daddr, args->daddr_v6, sizeof(flow_key.daddr));
273               flow_key.lport = lport;
274               flow_key.dport = dport;
275               ipv6_count.increment(flow_key);
276               """,
277          'trace' : """
278               struct ipv6_data_t data6 = {};
279               data6.pid = pid;
280               data6.lport = lport;
281               data6.dport = dport;
282               data6.type = RETRANSMIT;
283               data6.ip = 6;
284               data6.seq = seq;
285               data6.state = state;
286               __builtin_memcpy(&data6.saddr, args->saddr_v6, sizeof(data6.saddr));
287               __builtin_memcpy(&data6.daddr, args->daddr_v6, sizeof(data6.daddr));
288               ipv6_events.perf_submit(args, &data6, sizeof(data6));
289               """
290               }
291        }
292
293count_core_base = """
294        COUNT_STRUCT.increment(flow_key);
295"""
296
297if BPF.tracepoint_exists("tcp", "tcp_retransmit_skb"):
298    if args.count:
299        bpf_text_tracepoint = bpf_text_tracepoint.replace("IPV4_CODE", struct_init_tracepoint['ipv4']['count'])
300        bpf_text_tracepoint = bpf_text_tracepoint.replace("IPV6_CODE", struct_init_tracepoint['ipv6']['count'])
301    else:
302        bpf_text_tracepoint = bpf_text_tracepoint.replace("IPV4_CODE", struct_init_tracepoint['ipv4']['trace'])
303        bpf_text_tracepoint = bpf_text_tracepoint.replace("IPV6_CODE", struct_init_tracepoint['ipv6']['trace'])
304    bpf_text += bpf_text_tracepoint
305
306if args.lossprobe or not BPF.tracepoint_exists("tcp", "tcp_retransmit_skb"):
307    bpf_text += bpf_text_kprobe
308    if args.count:
309        bpf_text = bpf_text.replace("IPV4_INIT", struct_init['ipv4']['count'])
310        bpf_text = bpf_text.replace("IPV6_INIT", struct_init['ipv6']['count'])
311        bpf_text = bpf_text.replace("IPV4_CORE", count_core_base.replace("COUNT_STRUCT", 'ipv4_count'))
312        bpf_text = bpf_text.replace("IPV6_CORE", count_core_base.replace("COUNT_STRUCT", 'ipv6_count'))
313    else:
314        bpf_text = bpf_text.replace("IPV4_INIT", struct_init['ipv4']['trace'])
315        bpf_text = bpf_text.replace("IPV6_INIT", struct_init['ipv6']['trace'])
316        bpf_text = bpf_text.replace("IPV4_CORE", "ipv4_events.perf_submit(ctx, &data4, sizeof(data4));")
317        bpf_text = bpf_text.replace("IPV6_CORE", "ipv6_events.perf_submit(ctx, &data6, sizeof(data6));")
318    if args.lossprobe:
319        bpf_text += bpf_text_kprobe_tlp
320    if not BPF.tracepoint_exists("tcp", "tcp_retransmit_skb"):
321        bpf_text += bpf_text_kprobe_retransmit
322if args.ipv4:
323    bpf_text = bpf_text.replace('FILTER_FAMILY',
324        'if (family != AF_INET) { return 0; }')
325elif args.ipv6:
326    bpf_text = bpf_text.replace('FILTER_FAMILY',
327        'if (family != AF_INET6) { return 0; }')
328else:
329    bpf_text = bpf_text.replace('FILTER_FAMILY', '')
330if debug or args.ebpf:
331    print(bpf_text)
332    if args.ebpf:
333        exit()
334
335# from bpf_text:
336type = {}
337type[1] = 'R'
338type[2] = 'L'
339
340# from include/net/tcp_states.h:
341tcpstate = {}
342tcpstate[1] = 'ESTABLISHED'
343tcpstate[2] = 'SYN_SENT'
344tcpstate[3] = 'SYN_RECV'
345tcpstate[4] = 'FIN_WAIT1'
346tcpstate[5] = 'FIN_WAIT2'
347tcpstate[6] = 'TIME_WAIT'
348tcpstate[7] = 'CLOSE'
349tcpstate[8] = 'CLOSE_WAIT'
350tcpstate[9] = 'LAST_ACK'
351tcpstate[10] = 'LISTEN'
352tcpstate[11] = 'CLOSING'
353tcpstate[12] = 'NEW_SYN_RECV'
354
355# process event
356def print_ipv4_event(cpu, data, size):
357    event = b["ipv4_events"].event(data)
358    print("%-8s %-7d %-2d %-20s %1s> %-20s" % (
359        strftime("%H:%M:%S"), event.pid, event.ip,
360        "%s:%d" % (inet_ntop(AF_INET, pack('I', event.saddr)), event.lport),
361        type[event.type],
362        "%s:%s" % (inet_ntop(AF_INET, pack('I', event.daddr)), event.dport)),
363        end='')
364    if args.sequence:
365        print(" %-12s %s" % (tcpstate[event.state], event.seq))
366    else:
367        print(" %s" % (tcpstate[event.state]))
368
369def print_ipv6_event(cpu, data, size):
370    event = b["ipv6_events"].event(data)
371    print("%-8s %-7d %-2d %-20s %1s> %-20s" % (
372        strftime("%H:%M:%S"), event.pid, event.ip,
373        "%s:%d" % (inet_ntop(AF_INET6, event.saddr), event.lport),
374        type[event.type],
375        "%s:%d" % (inet_ntop(AF_INET6, event.daddr), event.dport)),
376        end='')
377    if args.sequence:
378        print(" %-12s %s" % (tcpstate[event.state], event.seq))
379    else:
380        print(" %s" % (tcpstate[event.state]))
381
382def depict_cnt(counts_tab, l3prot='ipv4'):
383    for k, v in sorted(counts_tab.items(), key=lambda counts: counts[1].value):
384        depict_key = ""
385        ep_fmt = "[%s]#%d"
386        if l3prot == 'ipv4':
387            depict_key = "%-20s <-> %-20s" % (ep_fmt % (inet_ntop(AF_INET, pack('I', k.saddr)), k.lport),
388                                              ep_fmt % (inet_ntop(AF_INET, pack('I', k.daddr)), k.dport))
389        else:
390            depict_key = "%-20s <-> %-20s" % (ep_fmt % (inet_ntop(AF_INET6, k.saddr), k.lport),
391                                              ep_fmt % (inet_ntop(AF_INET6, k.daddr), k.dport))
392
393        print ("%s %10d" % (depict_key, v.value))
394
395# initialize BPF
396b = BPF(text=bpf_text)
397if not BPF.tracepoint_exists("tcp", "tcp_retransmit_skb"):
398    b.attach_kprobe(event="tcp_retransmit_skb", fn_name="trace_retransmit")
399if args.lossprobe:
400    b.attach_kprobe(event="tcp_send_loss_probe", fn_name="trace_tlp")
401
402print("Tracing retransmits ... Hit Ctrl-C to end")
403if args.count:
404    try:
405        while 1:
406            sleep(99999999)
407    except BaseException:
408        pass
409
410    # header
411    print("\n%-25s %-25s %-10s" % (
412        "LADDR:LPORT", "RADDR:RPORT", "RETRANSMITS"))
413    depict_cnt(b.get_table("ipv4_count"))
414    depict_cnt(b.get_table("ipv6_count"), l3prot='ipv6')
415# read events
416else:
417    # header
418    print("%-8s %-7s %-2s %-20s %1s> %-20s" % ("TIME", "PID", "IP",
419        "LADDR:LPORT", "T", "RADDR:RPORT"), end='')
420    if args.sequence:
421        print(" %-12s %-10s" % ("STATE", "SEQ"))
422    else:
423        print(" %-4s" % ("STATE"))
424    b["ipv4_events"].open_perf_buffer(print_ipv4_event)
425    b["ipv6_events"].open_perf_buffer(print_ipv6_event)
426    while 1:
427        try:
428            b.perf_buffer_poll()
429        except KeyboardInterrupt:
430            exit()
431