1#!/usr/bin/env python 2# 3# bindsnoop Trace IPv4 and IPv6 binds()s. 4# For Linux, uses BCC, eBPF. Embedded C. 5# 6# based on tcpconnect utility from Brendan Gregg's suite. 7# 8# USAGE: bindsnoop [-h] [-t] [-E] [-p PID] [-P PORT[,PORT ...]] [-w] 9# [--count] [--cgroupmap mappath] [--mntnsmap mappath] 10# 11# bindsnoop reports socket options set before the bind call 12# that would impact this system call behavior: 13# SOL_IP IP_FREEBIND F.... 14# SOL_IP IP_TRANSPARENT .T... 15# SOL_IP IP_BIND_ADDRESS_NO_PORT ..N.. 16# SOL_SOCKET SO_REUSEADDR ...R. 17# SOL_SOCKET SO_REUSEPORT ....r 18# 19# SO_BINDTODEVICE interface is reported as "BOUND_IF" index 20# 21# This uses dynamic tracing of kernel functions, and will need to be updated 22# to match kernel changes. 23# 24# Copyright (c) 2020-present Facebook. 25# Licensed under the Apache License, Version 2.0 (the "License") 26# 27# 14-Feb-2020 Pavel Dubovitsky Created this. 28 29from __future__ import print_function, absolute_import, unicode_literals 30from bcc import BPF 31from bcc.containers import filter_by_containers 32from bcc.utils import printb 33import argparse 34import re 35from os import strerror 36from socket import ( 37 inet_ntop, AF_INET, AF_INET6, __all__ as socket_all, __dict__ as socket_dct 38) 39from struct import pack 40from time import sleep 41 42# arguments 43examples = """examples: 44 ./bindsnoop # trace all TCP bind()s 45 ./bindsnoop -t # include timestamps 46 ./bindsnoop -w # wider columns (fit IPv6) 47 ./bindsnoop -p 181 # only trace PID 181 48 ./bindsnoop -P 80 # only trace port 80 49 ./bindsnoop -P 80,81 # only trace port 80 and 81 50 ./bindsnoop -U # include UID 51 ./bindsnoop -u 1000 # only trace UID 1000 52 ./bindsnoop -E # report bind errors 53 ./bindsnoop --count # count bind per src ip 54 ./bindsnoop --cgroupmap mappath # only trace cgroups in this BPF map 55 ./bindsnoop --mntnsmap mappath # only trace mount namespaces in the map 56 57it is reporting socket options set before the bins call 58impacting system call behavior: 59 SOL_IP IP_FREEBIND F.... 60 SOL_IP IP_TRANSPARENT .T... 61 SOL_IP IP_BIND_ADDRESS_NO_PORT ..N.. 62 SOL_SOCKET SO_REUSEADDR ...R. 63 SOL_SOCKET SO_REUSEPORT ....r 64 65 SO_BINDTODEVICE interface is reported as "IF" index 66""" 67parser = argparse.ArgumentParser( 68 description="Trace TCP binds", 69 formatter_class=argparse.RawDescriptionHelpFormatter, 70 epilog=examples) 71parser.add_argument("-t", "--timestamp", action="store_true", 72 help="include timestamp on output") 73parser.add_argument("-w", "--wide", action="store_true", 74 help="wide column output (fits IPv6 addresses)") 75parser.add_argument("-p", "--pid", 76 help="trace this PID only") 77parser.add_argument("-P", "--port", 78 help="comma-separated list of ports to trace.") 79parser.add_argument("-E", "--errors", action="store_true", 80 help="include errors in the output.") 81parser.add_argument("-U", "--print-uid", action="store_true", 82 help="include UID on output") 83parser.add_argument("-u", "--uid", 84 help="trace this UID only") 85parser.add_argument("--count", action="store_true", 86 help="count binds per src ip and port") 87parser.add_argument("--cgroupmap", 88 help="trace cgroups in this BPF map only") 89parser.add_argument("--mntnsmap", 90 help="trace mount namespaces in this BPF map only") 91parser.add_argument("--ebpf", action="store_true", 92 help=argparse.SUPPRESS) 93parser.add_argument("--debug-source", action="store_true", 94 help=argparse.SUPPRESS) 95args = parser.parse_args() 96 97# define BPF program 98bpf_text = """ 99#include <uapi/linux/ptrace.h> 100#pragma clang diagnostic push 101#pragma clang diagnostic ignored "-Wtautological-compare" 102#include <net/sock.h> 103#pragma clang diagnostic pop 104#include <net/inet_sock.h> 105#include <net/net_namespace.h> 106#include <bcc/proto.h> 107 108BPF_HASH(currsock, u32, struct socket *); 109 110// separate data structs for ipv4 and ipv6 111struct ipv4_bind_data_t { 112 u64 ts_us; 113 u32 pid; 114 u32 uid; 115 u64 ip; 116 u32 saddr; 117 u32 bound_dev_if; 118 int return_code; 119 u16 sport; 120 u8 socket_options; 121 u8 protocol; 122 char task[TASK_COMM_LEN]; 123}; 124BPF_PERF_OUTPUT(ipv4_bind_events); 125 126struct ipv6_bind_data_t { 127 // int128 would be aligned on 16 bytes boundary, better to go first 128 unsigned __int128 saddr; 129 u64 ts_us; 130 u32 pid; 131 u32 uid; 132 u64 ip; 133 u32 bound_dev_if; 134 int return_code; 135 u16 sport; 136 u8 socket_options; 137 u8 protocol; 138 char task[TASK_COMM_LEN]; 139}; 140BPF_PERF_OUTPUT(ipv6_bind_events); 141 142// separate flow keys per address family 143struct ipv4_flow_key_t { 144 u32 saddr; 145 u16 sport; 146}; 147BPF_HASH(ipv4_count, struct ipv4_flow_key_t); 148 149struct ipv6_flow_key_t { 150 unsigned __int128 saddr; 151 u16 sport; 152}; 153BPF_HASH(ipv6_count, struct ipv6_flow_key_t); 154 155// bind options for event reporting 156union bind_options { 157 u8 data; 158 struct { 159 u8 freebind:1; 160 u8 transparent:1; 161 u8 bind_address_no_port:1; 162 u8 reuseaddress:1; 163 u8 reuseport:1; 164 } fields; 165}; 166 167// TODO: add reporting for the original bind arguments 168int bindsnoop_entry(struct pt_regs *ctx, struct socket *socket) 169{ 170 u64 pid_tgid = bpf_get_current_pid_tgid(); 171 u32 pid = pid_tgid >> 32; 172 u32 tid = pid_tgid; 173 FILTER_PID 174 175 u32 uid = bpf_get_current_uid_gid(); 176 177 FILTER_UID 178 179 if (container_should_be_filtered()) { 180 return 0; 181 } 182 183 // stash the sock ptr for lookup on return 184 currsock.update(&tid, &socket); 185 186 return 0; 187}; 188 189 190static int bindsnoop_return(struct pt_regs *ctx, short ipver) 191{ 192 int ret = PT_REGS_RC(ctx); 193 u64 pid_tgid = bpf_get_current_pid_tgid(); 194 u32 pid = pid_tgid >> 32; 195 u32 tid = pid_tgid; 196 197 struct socket **skpp; 198 skpp = currsock.lookup(&tid); 199 if (skpp == 0) { 200 return 0; // missed entry 201 } 202 203 int ignore_errors = 1; 204 FILTER_ERRORS 205 if (ret != 0 && ignore_errors) { 206 // failed to bind 207 currsock.delete(&tid); 208 return 0; 209 } 210 211 // pull in details 212 struct socket *skp_ = *skpp; 213 struct sock *skp = skp_->sk; 214 215 struct inet_sock *sockp = (struct inet_sock *)skp; 216 217 u16 sport = 0; 218 bpf_probe_read_kernel(&sport, sizeof(sport), &sockp->inet_sport); 219 sport = ntohs(sport); 220 221 FILTER_PORT 222 223 union bind_options opts = {0}; 224 u8 bitfield; 225 // fetching freebind, transparent, and bind_address_no_port bitfields 226 // via the next struct member, rcv_tos 227 bitfield = (u8) *(&sockp->rcv_tos - 2) & 0xFF; 228 // IP_FREEBIND (sockp->freebind) 229 opts.fields.freebind = bitfield >> 2 & 0x01; 230 // IP_TRANSPARENT (sockp->transparent) 231 opts.fields.transparent = bitfield >> 5 & 0x01; 232 // IP_BIND_ADDRESS_NO_PORT (sockp->bind_address_no_port) 233 opts.fields.bind_address_no_port = *(&sockp->rcv_tos - 1) & 0x01; 234 235 // SO_REUSEADDR and SO_REUSEPORT are bitfields that 236 // cannot be accessed directly, fetched via the next struct member, 237 // __sk_common.skc_bound_dev_if 238 bitfield = *((u8*)&skp->__sk_common.skc_bound_dev_if - 1); 239 // SO_REUSEADDR (skp->reuse) 240 // it is 4 bit, but we are interested in the lowest one 241 opts.fields.reuseaddress = bitfield & 0x0F; 242 // SO_REUSEPORT (skp->reuseport) 243 opts.fields.reuseport = bitfield >> 4 & 0x01; 244 245 // workaround for reading the sk_protocol bitfield (from tcpaccept.py): 246 u16 protocol; 247 int gso_max_segs_offset = offsetof(struct sock, sk_gso_max_segs); 248 int sk_lingertime_offset = offsetof(struct sock, sk_lingertime); 249 250 // Since kernel v5.6 sk_protocol has its own u16 field 251 if (sk_lingertime_offset - gso_max_segs_offset == 2) 252 protocol = skp->sk_protocol; 253 else if (sk_lingertime_offset - gso_max_segs_offset == 4) 254 // 4.10+ with little endian 255#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 256 protocol = *(u8 *)((u64)&skp->sk_gso_max_segs - 3); 257 else 258 // pre-4.10 with little endian 259 protocol = *(u8 *)((u64)&skp->sk_wmem_queued - 3); 260#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 261 // 4.10+ with big endian 262 protocol = *(u8 *)((u64)&skp->sk_gso_max_segs - 1); 263 else 264 // pre-4.10 with big endian 265 protocol = *(u8 *)((u64)&skp->sk_wmem_queued - 1); 266#else 267# error "Fix your compiler's __BYTE_ORDER__?!" 268#endif 269 270 if (ipver == 4) { 271 IPV4_CODE 272 } else /* 6 */ { 273 IPV6_CODE 274 } 275 276 currsock.delete(&tid); 277 278 return 0; 279} 280 281int bindsnoop_v4_return(struct pt_regs *ctx) 282{ 283 return bindsnoop_return(ctx, 4); 284} 285 286int bindsnoop_v6_return(struct pt_regs *ctx) 287{ 288 return bindsnoop_return(ctx, 6); 289} 290""" 291 292struct_init = { 293 'ipv4': { 294 'count': """ 295 struct ipv4_flow_key_t flow_key = {}; 296 flow_key.saddr = skp->__sk_common.skc_rcv_saddr; 297 flow_key.sport = sport; 298 ipv4_count.increment(flow_key);""", 299 'trace': """ 300 struct ipv4_bind_data_t data4 = {.pid = pid, .ip = ipver}; 301 data4.uid = bpf_get_current_uid_gid(); 302 data4.ts_us = bpf_ktime_get_ns() / 1000; 303 bpf_probe_read_kernel( 304 &data4.saddr, sizeof(data4.saddr), &sockp->inet_saddr); 305 data4.return_code = ret; 306 data4.sport = sport; 307 data4.bound_dev_if = skp->__sk_common.skc_bound_dev_if; 308 data4.socket_options = opts.data; 309 data4.protocol = protocol; 310 bpf_get_current_comm(&data4.task, sizeof(data4.task)); 311 ipv4_bind_events.perf_submit(ctx, &data4, sizeof(data4));""" 312 }, 313 'ipv6': { 314 'count': """ 315 struct ipv6_flow_key_t flow_key = {}; 316 bpf_probe_read_kernel(&flow_key.saddr, sizeof(flow_key.saddr), 317 skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); 318 flow_key.sport = sport; 319 ipv6_count.increment(flow_key);""", 320 'trace': """ 321 struct ipv6_bind_data_t data6 = {.pid = pid, .ip = ipver}; 322 data6.uid = bpf_get_current_uid_gid(); 323 data6.ts_us = bpf_ktime_get_ns() / 1000; 324 bpf_probe_read_kernel(&data6.saddr, sizeof(data6.saddr), 325 skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); 326 data6.return_code = ret; 327 data6.sport = sport; 328 data6.bound_dev_if = skp->__sk_common.skc_bound_dev_if; 329 data6.socket_options = opts.data; 330 data6.protocol = protocol; 331 bpf_get_current_comm(&data6.task, sizeof(data6.task)); 332 ipv6_bind_events.perf_submit(ctx, &data6, sizeof(data6));""" 333 }, 334} 335 336# code substitutions 337if args.count: 338 bpf_text = bpf_text.replace("IPV4_CODE", struct_init['ipv4']['count']) 339 bpf_text = bpf_text.replace("IPV6_CODE", struct_init['ipv6']['count']) 340else: 341 bpf_text = bpf_text.replace("IPV4_CODE", struct_init['ipv4']['trace']) 342 bpf_text = bpf_text.replace("IPV6_CODE", struct_init['ipv6']['trace']) 343 344if args.pid: 345 bpf_text = bpf_text.replace('FILTER_PID', 346 'if (pid != %s) { return 0; }' % args.pid) 347if args.port: 348 sports = [int(sport) for sport in args.port.split(',')] 349 sports_if = ' && '.join(['sport != %d' % sport for sport in sports]) 350 bpf_text = bpf_text.replace('FILTER_PORT', 351 'if (%s) { currsock.delete(&tid); return 0; }' % sports_if) 352if args.uid: 353 bpf_text = bpf_text.replace('FILTER_UID', 354 'if (uid != %s) { return 0; }' % args.uid) 355if args.errors: 356 bpf_text = bpf_text.replace('FILTER_ERRORS', 'ignore_errors = 0;') 357bpf_text = filter_by_containers(args) + bpf_text 358bpf_text = bpf_text.replace('FILTER_PID', '') 359bpf_text = bpf_text.replace('FILTER_PORT', '') 360bpf_text = bpf_text.replace('FILTER_UID', '') 361bpf_text = bpf_text.replace('FILTER_ERRORS', '') 362 363# selecting output format - 80 characters or wide, fitting IPv6 addresses 364header_fmt = "%8s %-12.12s %-4s %-15s %-5s %5s %2s" 365output_fmt = b"%8d %-12.12s %-4.4s %-15.15s %5d %-5s %2d" 366error_header_fmt = "%3s " 367error_output_fmt = b"%3s " 368error_value_fmt = str 369if args.wide: 370 header_fmt = "%10s %-12.12s %-4s %-39s %-5s %5s %2s" 371 output_fmt = b"%10d %-12.12s %-4s %-39s %5d %-5s %2d" 372 error_header_fmt = "%-25s " 373 error_output_fmt = b"%-25s " 374 error_value_fmt = strerror 375 376if args.ebpf: 377 print(bpf_text) 378 exit() 379 380# L4 protocol resolver 381class L4Proto: 382 def __init__(self): 383 self.num2str = {} 384 proto_re = re.compile("IPPROTO_(.*)") 385 for attr in socket_all: 386 proto_match = proto_re.match(attr) 387 if proto_match: 388 self.num2str[socket_dct[attr]] = proto_match.group(1) 389 390 def proto2str(self, proto): 391 return self.num2str.get(proto, "UNKNOWN") 392 393l4 = L4Proto() 394 395# bind options: 396# SOL_IP IP_FREEBIND F.... 397# SOL_IP IP_TRANSPARENT .T... 398# SOL_IP IP_BIND_ADDRESS_NO_PORT ..N.. 399# SOL_SOCKET SO_REUSEADDR ...R. 400# SOL_SOCKET SO_REUSEPORT ....r 401def opts2str(bitfield): 402 str_options = "" 403 bit = 1 404 for opt in "FTNRr": 405 str_options += opt if bitfield & bit else "." 406 bit *= 2 407 return str_options.encode() 408 409 410# process events 411def print_ipv4_bind_event(cpu, data, size): 412 event = b["ipv4_bind_events"].event(data) 413 global start_ts 414 if args.timestamp: 415 if start_ts == 0: 416 start_ts = event.ts_us 417 printb(b"%-9.6f " % ((float(event.ts_us) - start_ts) / 1000000), nl="") 418 if args.print_uid: 419 printb(b"%6d " % event.uid, nl="") 420 if args.errors: 421 printb( 422 error_output_fmt % error_value_fmt(event.return_code).encode(), 423 nl="", 424 ) 425 printb(output_fmt % (event.pid, event.task, 426 l4.proto2str(event.protocol).encode(), 427 inet_ntop(AF_INET, pack("I", event.saddr)).encode(), 428 event.sport, opts2str(event.socket_options), event.bound_dev_if)) 429 430 431def print_ipv6_bind_event(cpu, data, size): 432 event = b["ipv6_bind_events"].event(data) 433 global start_ts 434 if args.timestamp: 435 if start_ts == 0: 436 start_ts = event.ts_us 437 printb(b"%-9.6f " % ((float(event.ts_us) - start_ts) / 1000000), nl="") 438 if args.print_uid: 439 printb(b"%6d " % event.uid, nl="") 440 if args.errors: 441 printb( 442 error_output_fmt % error_value_fmt(event.return_code).encode(), 443 nl="", 444 ) 445 printb(output_fmt % (event.pid, event.task, 446 l4.proto2str(event.protocol).encode(), 447 inet_ntop(AF_INET6, event.saddr).encode(), 448 event.sport, opts2str(event.socket_options), event.bound_dev_if)) 449 450 451def depict_cnt(counts_tab, l3prot='ipv4'): 452 for k, v in sorted( 453 counts_tab.items(), key=lambda counts: counts[1].value, reverse=True 454 ): 455 depict_key = "" 456 if l3prot == 'ipv4': 457 depict_key = "%-32s %20s" % ( 458 (inet_ntop(AF_INET, pack('I', k.saddr))), k.sport 459 ) 460 else: 461 depict_key = "%-32s %20s" % ( 462 (inet_ntop(AF_INET6, k.saddr)), k.sport 463 ) 464 print("%s %-10d" % (depict_key, v.value)) 465 466 467# initialize BPF 468b = BPF(text=bpf_text) 469b.attach_kprobe(event="inet_bind", fn_name="bindsnoop_entry") 470b.attach_kprobe(event="inet6_bind", fn_name="bindsnoop_entry") 471b.attach_kretprobe(event="inet_bind", fn_name="bindsnoop_v4_return") 472b.attach_kretprobe(event="inet6_bind", fn_name="bindsnoop_v6_return") 473 474print("Tracing binds ... Hit Ctrl-C to end") 475if args.count: 476 try: 477 while 1: 478 sleep(99999999) 479 except KeyboardInterrupt: 480 pass 481 482 # header 483 print("\n%-32s %20s %-10s" % ( 484 "LADDR", "LPORT", "BINDS")) 485 depict_cnt(b["ipv4_count"]) 486 depict_cnt(b["ipv6_count"], l3prot='ipv6') 487# read events 488else: 489 # header 490 if args.timestamp: 491 print("%-9s " % ("TIME(s)"), end="") 492 if args.print_uid: 493 print("%6s " % ("UID"), end="") 494 if args.errors: 495 print(error_header_fmt % ("RC"), end="") 496 print(header_fmt % ("PID", "COMM", "PROT", "ADDR", "PORT", "OPTS", "IF")) 497 498 start_ts = 0 499 500 # read events 501 b["ipv4_bind_events"].open_perf_buffer(print_ipv4_bind_event) 502 b["ipv6_bind_events"].open_perf_buffer(print_ipv6_bind_event) 503 while 1: 504 try: 505 b.perf_buffer_poll() 506 except KeyboardInterrupt: 507 exit() 508