1#!/usr/bin/env python 2# -*- coding: utf-8 -*- 3# @lint-avoid-python-3-compatibility-imports 4# 5# tcpstates Trace the TCP session state changes with durations. 6# For Linux, uses BCC, BPF. Embedded C. 7# 8# USAGE: tcpstates [-h] [-C] [-S] [interval [count]] [-4 | -6] 9# 10# This uses the sock:inet_sock_set_state tracepoint, added to Linux 4.16. 11# Linux 4.16 also adds more state transitions so that they can be traced. 12# 13# Copyright 2018 Netflix, Inc. 14# Licensed under the Apache License, Version 2.0 (the "License") 15# 16# 20-Mar-2018 Brendan Gregg Created this. 17 18from __future__ import print_function 19from bcc import BPF 20import argparse 21from socket import inet_ntop, AF_INET, AF_INET6 22from time import strftime, time 23from os import getuid 24 25# arguments 26examples = """examples: 27 ./tcpstates # trace all TCP state changes 28 ./tcpstates -t # include timestamp column 29 ./tcpstates -T # include time column (HH:MM:SS) 30 ./tcpstates -w # wider columns (fit IPv6) 31 ./tcpstates -stT # csv output, with times & timestamps 32 ./tcpstates -Y # log events to the systemd journal 33 ./tcpstates -L 80 # only trace local port 80 34 ./tcpstates -L 80,81 # only trace local ports 80 and 81 35 ./tcpstates -D 80 # only trace remote port 80 36 ./tcpstates -4 # trace IPv4 family only 37 ./tcpstates -6 # trace IPv6 family only 38""" 39parser = argparse.ArgumentParser( 40 description="Trace TCP session state changes and durations", 41 formatter_class=argparse.RawDescriptionHelpFormatter, 42 epilog=examples) 43parser.add_argument("-T", "--time", action="store_true", 44 help="include time column on output (HH:MM:SS)") 45parser.add_argument("-t", "--timestamp", action="store_true", 46 help="include timestamp on output (seconds)") 47parser.add_argument("-w", "--wide", action="store_true", 48 help="wide column output (fits IPv6 addresses)") 49parser.add_argument("-s", "--csv", action="store_true", 50 help="comma separated values output") 51parser.add_argument("-L", "--localport", 52 help="comma-separated list of local ports to trace.") 53parser.add_argument("-D", "--remoteport", 54 help="comma-separated list of remote ports to trace.") 55parser.add_argument("--ebpf", action="store_true", 56 help=argparse.SUPPRESS) 57parser.add_argument("-Y", "--journal", action="store_true", 58 help="log session state changes to the systemd journal") 59group = parser.add_mutually_exclusive_group() 60group.add_argument("-4", "--ipv4", action="store_true", 61 help="trace IPv4 family only") 62group.add_argument("-6", "--ipv6", action="store_true", 63 help="trace IPv6 family only") 64args = parser.parse_args() 65debug = 0 66 67# define BPF program 68bpf_header = """ 69#include <uapi/linux/ptrace.h> 70#include <linux/tcp.h> 71#include <net/sock.h> 72#include <bcc/proto.h> 73 74BPF_HASH(last, struct sock *, u64); 75 76// separate data structs for ipv4 and ipv6 77struct ipv4_data_t { 78 u64 ts_us; 79 u64 skaddr; 80 u32 saddr[1]; 81 u32 daddr[1]; 82 u64 span_us; 83 u32 pid; 84 u16 lport; 85 u16 dport; 86 int oldstate; 87 int newstate; 88 char task[TASK_COMM_LEN]; 89}; 90BPF_PERF_OUTPUT(ipv4_events); 91 92struct ipv6_data_t { 93 u64 ts_us; 94 u64 skaddr; 95 u32 saddr[4]; 96 u32 daddr[4]; 97 u64 span_us; 98 u32 pid; 99 u16 lport; 100 u16 dport; 101 int oldstate; 102 int newstate; 103 char task[TASK_COMM_LEN]; 104}; 105BPF_PERF_OUTPUT(ipv6_events); 106""" 107 108bpf_text_tracepoint = """ 109TRACEPOINT_PROBE(sock, inet_sock_set_state) 110{ 111 if (args->protocol != IPPROTO_TCP) 112 return 0; 113 114 u32 pid = bpf_get_current_pid_tgid() >> 32; 115 // sk is used as a UUID 116 struct sock *sk = (struct sock *)args->skaddr; 117 118 // lport is either used in a filter here, or later 119 u16 lport = args->sport; 120 FILTER_LPORT 121 122 // dport is either used in a filter here, or later 123 u16 dport = args->dport; 124 FILTER_DPORT 125 126 // calculate delta 127 u64 *tsp, delta_us; 128 tsp = last.lookup(&sk); 129 if (tsp == 0) 130 delta_us = 0; 131 else 132 delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; 133 u16 family = args->family; 134 FILTER_FAMILY 135 136 // workaround to avoid llvm optimization which will cause context ptr args modified 137 int tcp_newstate = args->newstate; 138 139 if (args->family == AF_INET) { 140 struct ipv4_data_t data4 = { 141 .span_us = delta_us, 142 .oldstate = args->oldstate, 143 .newstate = args->newstate }; 144 data4.skaddr = (u64)args->skaddr; 145 data4.ts_us = bpf_ktime_get_ns() / 1000; 146 __builtin_memcpy(&data4.saddr, args->saddr, sizeof(data4.saddr)); 147 __builtin_memcpy(&data4.daddr, args->daddr, sizeof(data4.daddr)); 148 data4.lport = lport; 149 data4.dport = dport; 150 data4.pid = pid; 151 152 bpf_get_current_comm(&data4.task, sizeof(data4.task)); 153 ipv4_events.perf_submit(args, &data4, sizeof(data4)); 154 155 } else /* 6 */ { 156 struct ipv6_data_t data6 = { 157 .span_us = delta_us, 158 .oldstate = args->oldstate, 159 .newstate = args->newstate }; 160 data6.skaddr = (u64)args->skaddr; 161 data6.ts_us = bpf_ktime_get_ns() / 1000; 162 __builtin_memcpy(&data6.saddr, args->saddr_v6, sizeof(data6.saddr)); 163 __builtin_memcpy(&data6.daddr, args->daddr_v6, sizeof(data6.daddr)); 164 data6.lport = lport; 165 data6.dport = dport; 166 data6.pid = pid; 167 bpf_get_current_comm(&data6.task, sizeof(data6.task)); 168 ipv6_events.perf_submit(args, &data6, sizeof(data6)); 169 } 170 171 if (tcp_newstate == TCP_CLOSE) { 172 last.delete(&sk); 173 } else { 174 u64 ts = bpf_ktime_get_ns(); 175 last.update(&sk, &ts); 176 } 177 178 return 0; 179} 180""" 181 182bpf_text_kprobe = """ 183int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state) 184{ 185 u32 pid = bpf_get_current_pid_tgid() >> 32; 186 // sk is used as a UUID 187 188 // lport is either used in a filter here, or later 189 u16 lport = sk->__sk_common.skc_num; 190 FILTER_LPORT 191 192 // dport is either used in a filter here, or later 193 u16 dport = sk->__sk_common.skc_dport; 194 dport = ntohs(dport); 195 FILTER_DPORT 196 197 // calculate delta 198 u64 *tsp, delta_us; 199 tsp = last.lookup(&sk); 200 if (tsp == 0) 201 delta_us = 0; 202 else 203 delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; 204 205 u16 family = sk->__sk_common.skc_family; 206 FILTER_FAMILY 207 208 if (family == AF_INET) { 209 struct ipv4_data_t data4 = { 210 .span_us = delta_us, 211 .oldstate = sk->__sk_common.skc_state, 212 .newstate = state }; 213 data4.skaddr = (u64)sk; 214 data4.ts_us = bpf_ktime_get_ns() / 1000; 215 data4.saddr = sk->__sk_common.skc_rcv_saddr; 216 data4.daddr = sk->__sk_common.skc_daddr; 217 data4.lport = lport; 218 data4.dport = dport; 219 data4.pid = pid; 220 221 bpf_get_current_comm(&data4.task, sizeof(data4.task)); 222 ipv4_events.perf_submit(ctx, &data4, sizeof(data4)); 223 224 } else /* 6 */ { 225 struct ipv6_data_t data6 = { 226 .span_us = delta_us, 227 .oldstate = sk->__sk_common.skc_state, 228 .newstate = state }; 229 data6.skaddr = (u64)sk; 230 data6.ts_us = bpf_ktime_get_ns() / 1000; 231 bpf_probe_read_kernel(&data6.saddr, sizeof(data6.saddr), 232 sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); 233 bpf_probe_read_kernel(&data6.daddr, sizeof(data6.daddr), 234 sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32); 235 data6.lport = lport; 236 data6.dport = dport; 237 data6.pid = pid; 238 bpf_get_current_comm(&data6.task, sizeof(data6.task)); 239 ipv6_events.perf_submit(ctx, &data6, sizeof(data6)); 240 } 241 242 if (state == TCP_CLOSE) { 243 last.delete(&sk); 244 } else { 245 u64 ts = bpf_ktime_get_ns(); 246 last.update(&sk, &ts); 247 } 248 249 return 0; 250 251}; 252""" 253 254bpf_text = bpf_header 255if BPF.tracepoint_exists("sock", "inet_sock_set_state"): 256 bpf_text += bpf_text_tracepoint 257else: 258 bpf_text += bpf_text_kprobe 259 260# code substitutions 261if args.remoteport: 262 dports = [int(dport) for dport in args.remoteport.split(',')] 263 dports_if = ' && '.join(['dport != %d' % dport for dport in dports]) 264 bpf_text = bpf_text.replace('FILTER_DPORT', 265 'if (%s) { last.delete(&sk); return 0; }' % dports_if) 266if args.localport: 267 lports = [int(lport) for lport in args.localport.split(',')] 268 lports_if = ' && '.join(['lport != %d' % lport for lport in lports]) 269 bpf_text = bpf_text.replace('FILTER_LPORT', 270 'if (%s) { last.delete(&sk); return 0; }' % lports_if) 271if args.ipv4: 272 bpf_text = bpf_text.replace('FILTER_FAMILY', 273 'if (family != AF_INET) { return 0; }') 274elif args.ipv6: 275 bpf_text = bpf_text.replace('FILTER_FAMILY', 276 'if (family != AF_INET6) { return 0; }') 277bpf_text = bpf_text.replace('FILTER_FAMILY', '') 278bpf_text = bpf_text.replace('FILTER_DPORT', '') 279bpf_text = bpf_text.replace('FILTER_LPORT', '') 280 281if debug or args.ebpf: 282 print(bpf_text) 283 if args.ebpf: 284 exit() 285 286# 287# Setup output formats 288# 289# Don't change the default output (next 2 lines): this fits in 80 chars. I 290# know it doesn't have NS or UIDs etc. I know. If you really, really, really 291# need to add columns, columns that solve real actual problems, I'd start by 292# adding an extended mode (-x) to included those columns. 293# 294header_string = "%-16s %-5s %-10.10s %s%-15s %-5s %-15s %-5s %-11s -> %-11s %s" 295format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " + 296 "-> %-11s %.3f") 297if args.wide: 298 header_string = ("%-16s %-5s %-16.16s %-2s %-39s %-5s %-39s %-5s %-11s " + 299 "-> %-11s %s") 300 format_string = ("%-16x %-5d %-16.16s %-2s %-39s %-5s %-39s %-5d %-11s " + 301 "-> %-11s %.3f") 302if args.csv: 303 header_string = "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s" 304 format_string = "%x,%d,%s,%s,%s,%s,%s,%d,%s,%s,%.3f" 305 306if args.journal: 307 try: 308 from systemd import journal 309 except ImportError: 310 print("ERROR: Journal logging requires the systemd.journal module") 311 exit(1) 312 313 314def tcpstate2str(state): 315 # from include/net/tcp_states.h: 316 tcpstate = { 317 1: "ESTABLISHED", 318 2: "SYN_SENT", 319 3: "SYN_RECV", 320 4: "FIN_WAIT1", 321 5: "FIN_WAIT2", 322 6: "TIME_WAIT", 323 7: "CLOSE", 324 8: "CLOSE_WAIT", 325 9: "LAST_ACK", 326 10: "LISTEN", 327 11: "CLOSING", 328 12: "NEW_SYN_RECV", 329 } 330 331 if state in tcpstate: 332 return tcpstate[state] 333 else: 334 return str(state) 335 336def journal_fields(event, addr_family): 337 addr_pfx = 'IPV4' 338 if addr_family == AF_INET6: 339 addr_pfx = 'IPV6' 340 341 fields = { 342 # Standard fields described in systemd.journal-fields(7). journal.send 343 # will fill in CODE_LINE, CODE_FILE, and CODE_FUNC for us. If we're 344 # root and specify OBJECT_PID, systemd-journald will add other OBJECT_* 345 # fields for us. 346 'SYSLOG_IDENTIFIER': 'tcpstates', 347 'PRIORITY': 5, 348 '_SOURCE_REALTIME_TIMESTAMP': time() * 1000000, 349 'OBJECT_PID': str(event.pid), 350 'OBJECT_COMM': event.task.decode('utf-8', 'replace'), 351 # Custom fields, aka "stuff we sort of made up". 352 'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, event.saddr), 353 'OBJECT_TCP_SOURCE_PORT': str(event.lport), 354 'OBJECT_' + addr_pfx + '_DESTINATION_ADDRESS': inet_ntop(addr_family, event.daddr), 355 'OBJECT_TCP_DESTINATION_PORT': str(event.dport), 356 'OBJECT_TCP_OLD_STATE': tcpstate2str(event.oldstate), 357 'OBJECT_TCP_NEW_STATE': tcpstate2str(event.newstate), 358 'OBJECT_TCP_SPAN_TIME': str(event.span_us) 359 } 360 361 msg_format_string = (u"%(OBJECT_COMM)s " + 362 u"%(OBJECT_" + addr_pfx + "_SOURCE_ADDRESS)s " + 363 u"%(OBJECT_TCP_SOURCE_PORT)s → " + 364 u"%(OBJECT_" + addr_pfx + "_DESTINATION_ADDRESS)s " + 365 u"%(OBJECT_TCP_DESTINATION_PORT)s " + 366 u"%(OBJECT_TCP_OLD_STATE)s → %(OBJECT_TCP_NEW_STATE)s") 367 fields['MESSAGE'] = msg_format_string % (fields) 368 369 if getuid() == 0: 370 del fields['OBJECT_COMM'] # Handled by systemd-journald 371 372 return fields 373 374# process event 375def print_event(event, addr_family): 376 global start_ts 377 if args.time: 378 if args.csv: 379 print("%s," % strftime("%H:%M:%S"), end="") 380 else: 381 print("%-8s " % strftime("%H:%M:%S"), end="") 382 if args.timestamp: 383 if start_ts == 0: 384 start_ts = event.ts_us 385 delta_s = (float(event.ts_us) - start_ts) / 1000000 386 if args.csv: 387 print("%.6f," % delta_s, end="") 388 else: 389 print("%-9.6f " % delta_s, end="") 390 if addr_family == AF_INET: 391 version = "4" 392 else: 393 version = "6" 394 print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'), 395 version if args.wide or args.csv else "", 396 inet_ntop(addr_family, event.saddr), event.lport, 397 inet_ntop(addr_family, event.daddr), event.dport, 398 tcpstate2str(event.oldstate), tcpstate2str(event.newstate), 399 float(event.span_us) / 1000)) 400 if args.journal: 401 journal.send(**journal_fields(event, addr_family)) 402 403def print_ipv4_event(cpu, data, size): 404 event = b["ipv4_events"].event(data) 405 print_event(event, AF_INET) 406 407def print_ipv6_event(cpu, data, size): 408 event = b["ipv6_events"].event(data) 409 print_event(event, AF_INET6) 410 411# initialize BPF 412b = BPF(text=bpf_text) 413 414# header 415if args.time: 416 if args.csv: 417 print("%s," % ("TIME"), end="") 418 else: 419 print("%-8s " % ("TIME"), end="") 420if args.timestamp: 421 if args.csv: 422 print("%s," % ("TIME(s)"), end="") 423 else: 424 print("%-9s " % ("TIME(s)"), end="") 425print(header_string % ("SKADDR", "C-PID", "C-COMM", 426 "IP" if args.wide or args.csv else "", 427 "LADDR", "LPORT", "RADDR", "RPORT", 428 "OLDSTATE", "NEWSTATE", "MS")) 429 430start_ts = 0 431 432# read events 433b["ipv4_events"].open_perf_buffer(print_ipv4_event, page_cnt=64) 434b["ipv6_events"].open_perf_buffer(print_ipv6_event, page_cnt=64) 435while 1: 436 try: 437 b.perf_buffer_poll() 438 except KeyboardInterrupt: 439 exit() 440