1#!/usr/bin/env python 2# @lint-avoid-python-3-compatibility-imports 3# 4# tcpdrop Trace TCP kernel-dropped packets/segments. 5# For Linux, uses BCC, eBPF. Embedded C. 6# 7# This provides information such as packet details, socket state, and kernel 8# stack trace for packets/segments that were dropped via tcp_drop(). 9# 10# USAGE: tcpdrop [-4 | -6] [-h] 11# 12# This uses dynamic tracing of kernel functions, and will need to be updated 13# to match kernel changes. 14# 15# Copyright 2018 Netflix, Inc. 16# Licensed under the Apache License, Version 2.0 (the "License") 17# 18# 30-May-2018 Brendan Gregg Created this. 19# 15-Jun-2022 Rong Tao Add tracepoint:skb:kfree_skb 20 21from __future__ import print_function 22from bcc import BPF 23import argparse 24from time import strftime 25from socket import inet_ntop, AF_INET, AF_INET6 26from struct import pack 27from time import sleep 28from bcc import tcp 29 30# arguments 31examples = """examples: 32 ./tcpdrop # trace kernel TCP drops 33 ./tcpdrop -4 # trace IPv4 family only 34 ./tcpdrop -6 # trace IPv6 family only 35""" 36parser = argparse.ArgumentParser( 37 description="Trace TCP drops by the kernel", 38 formatter_class=argparse.RawDescriptionHelpFormatter, 39 epilog=examples) 40group = parser.add_mutually_exclusive_group() 41group.add_argument("-4", "--ipv4", action="store_true", 42 help="trace IPv4 family only") 43group.add_argument("-6", "--ipv6", action="store_true", 44 help="trace IPv6 family only") 45parser.add_argument("--ebpf", action="store_true", 46 help=argparse.SUPPRESS) 47args = parser.parse_args() 48debug = 0 49 50# define BPF program 51bpf_text = """ 52#include <uapi/linux/ptrace.h> 53#include <uapi/linux/tcp.h> 54#include <uapi/linux/ip.h> 55#include <net/sock.h> 56#include <bcc/proto.h> 57 58BPF_STACK_TRACE(stack_traces, 1024); 59 60// separate data structs for ipv4 and ipv6 61struct ipv4_data_t { 62 u32 pid; 63 u64 ip; 64 u32 saddr; 65 u32 daddr; 66 u16 sport; 67 u16 dport; 68 u8 state; 69 u8 tcpflags; 70 u32 stack_id; 71}; 72BPF_PERF_OUTPUT(ipv4_events); 73 74struct ipv6_data_t { 75 u32 pid; 76 u64 ip; 77 unsigned __int128 saddr; 78 unsigned __int128 daddr; 79 u16 sport; 80 u16 dport; 81 u8 state; 82 u8 tcpflags; 83 u32 stack_id; 84}; 85BPF_PERF_OUTPUT(ipv6_events); 86 87static struct tcphdr *skb_to_tcphdr(const struct sk_buff *skb) 88{ 89 // unstable API. verify logic in tcp_hdr() -> skb_transport_header(). 90 return (struct tcphdr *)(skb->head + skb->transport_header); 91} 92 93static inline struct iphdr *skb_to_iphdr(const struct sk_buff *skb) 94{ 95 // unstable API. verify logic in ip_hdr() -> skb_network_header(). 96 return (struct iphdr *)(skb->head + skb->network_header); 97} 98 99// from include/net/tcp.h: 100#ifndef tcp_flag_byte 101#define tcp_flag_byte(th) (((u_int8_t *)th)[13]) 102#endif 103 104static int __trace_tcp_drop(void *ctx, struct sock *sk, struct sk_buff *skb) 105{ 106 if (sk == NULL) 107 return 0; 108 u32 pid = bpf_get_current_pid_tgid() >> 32; 109 110 // pull in details from the packet headers and the sock struct 111 u16 family = sk->__sk_common.skc_family; 112 char state = sk->__sk_common.skc_state; 113 u16 sport = 0, dport = 0; 114 struct tcphdr *tcp = skb_to_tcphdr(skb); 115 struct iphdr *ip = skb_to_iphdr(skb); 116 u8 tcpflags = ((u_int8_t *)tcp)[13]; 117 sport = tcp->source; 118 dport = tcp->dest; 119 sport = ntohs(sport); 120 dport = ntohs(dport); 121 122 FILTER_FAMILY 123 124 if (family == AF_INET) { 125 struct ipv4_data_t data4 = {}; 126 data4.pid = pid; 127 data4.ip = 4; 128 data4.saddr = ip->saddr; 129 data4.daddr = ip->daddr; 130 data4.dport = dport; 131 data4.sport = sport; 132 data4.state = state; 133 data4.tcpflags = tcpflags; 134 data4.stack_id = stack_traces.get_stackid(ctx, 0); 135 ipv4_events.perf_submit(ctx, &data4, sizeof(data4)); 136 137 } else if (family == AF_INET6) { 138 struct ipv6_data_t data6 = {}; 139 data6.pid = pid; 140 data6.ip = 6; 141 // The remote address (skc_v6_daddr) was the source 142 bpf_probe_read_kernel(&data6.saddr, sizeof(data6.saddr), 143 sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32); 144 // The local address (skc_v6_rcv_saddr) was the destination 145 bpf_probe_read_kernel(&data6.daddr, sizeof(data6.daddr), 146 sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); 147 data6.dport = dport; 148 data6.sport = sport; 149 data6.state = state; 150 data6.tcpflags = tcpflags; 151 data6.stack_id = stack_traces.get_stackid(ctx, 0); 152 ipv6_events.perf_submit(ctx, &data6, sizeof(data6)); 153 } 154 // else drop 155 156 return 0; 157} 158 159int trace_tcp_drop(struct pt_regs *ctx, struct sock *sk, struct sk_buff *skb) 160{ 161 return __trace_tcp_drop(ctx, sk, skb); 162} 163""" 164 165bpf_kfree_skb_text = """ 166#include <linux/skbuff.h> 167 168TRACEPOINT_PROBE(skb, kfree_skb) { 169 struct sk_buff *skb = args->skbaddr; 170 struct sock *sk = skb->sk; 171 enum skb_drop_reason reason = args->reason; 172 173 // SKB_NOT_DROPPED_YET, 174 // SKB_DROP_REASON_NOT_SPECIFIED, 175 if (reason > SKB_DROP_REASON_NOT_SPECIFIED) { 176 return __trace_tcp_drop(args, sk, skb); 177 } 178 179 return 0; 180} 181""" 182 183if debug or args.ebpf: 184 print(bpf_text) 185 if args.ebpf: 186 exit() 187if args.ipv4: 188 bpf_text = bpf_text.replace('FILTER_FAMILY', 189 'if (family != AF_INET) { return 0; }') 190elif args.ipv6: 191 bpf_text = bpf_text.replace('FILTER_FAMILY', 192 'if (family != AF_INET6) { return 0; }') 193else: 194 bpf_text = bpf_text.replace('FILTER_FAMILY', '') 195 196# process event 197def print_ipv4_event(cpu, data, size): 198 event = b["ipv4_events"].event(data) 199 print("%-8s %-7d %-2d %-20s > %-20s %s (%s)" % ( 200 strftime("%H:%M:%S"), event.pid, event.ip, 201 "%s:%d" % (inet_ntop(AF_INET, pack('I', event.saddr)), event.sport), 202 "%s:%s" % (inet_ntop(AF_INET, pack('I', event.daddr)), event.dport), 203 tcp.tcpstate[event.state], tcp.flags2str(event.tcpflags))) 204 for addr in stack_traces.walk(event.stack_id): 205 sym = b.ksym(addr, show_offset=True) 206 print("\t%s" % sym) 207 print("") 208 209def print_ipv6_event(cpu, data, size): 210 event = b["ipv6_events"].event(data) 211 print("%-8s %-7d %-2d %-20s > %-20s %s (%s)" % ( 212 strftime("%H:%M:%S"), event.pid, event.ip, 213 "%s:%d" % (inet_ntop(AF_INET6, event.saddr), event.sport), 214 "%s:%d" % (inet_ntop(AF_INET6, event.daddr), event.dport), 215 tcp.tcpstate[event.state], tcp.flags2str(event.tcpflags))) 216 for addr in stack_traces.walk(event.stack_id): 217 sym = b.ksym(addr, show_offset=True) 218 print("\t%s" % sym) 219 print("") 220 221if BPF.tracepoint_exists("skb", "kfree_skb"): 222 if BPF.kernel_struct_has_field("trace_event_raw_kfree_skb", "reason") == 1: 223 bpf_text += bpf_kfree_skb_text 224 225# initialize BPF 226b = BPF(text=bpf_text) 227 228if b.get_kprobe_functions(b"tcp_drop"): 229 b.attach_kprobe(event="tcp_drop", fn_name="trace_tcp_drop") 230elif b.tracepoint_exists("skb", "kfree_skb"): 231 print("WARNING: tcp_drop() kernel function not found or traceable. " 232 "Use tracpoint:skb:kfree_skb instead.") 233else: 234 print("ERROR: tcp_drop() kernel function and tracpoint:skb:kfree_skb" 235 " not found or traceable. " 236 "The kernel might be too old or the the function has been inlined.") 237 exit() 238stack_traces = b.get_table("stack_traces") 239 240# header 241print("%-8s %-7s %-2s %-20s > %-20s %s (%s)" % ("TIME", "PID", "IP", 242 "SADDR:SPORT", "DADDR:DPORT", "STATE", "FLAGS")) 243 244# read events 245b["ipv4_events"].open_perf_buffer(print_ipv4_event) 246b["ipv6_events"].open_perf_buffer(print_ipv6_event) 247while 1: 248 try: 249 b.perf_buffer_poll() 250 except KeyboardInterrupt: 251 exit() 252