xref: /aosp_15_r20/external/bcc/tools/bindsnoop.py (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1#!/usr/bin/env python
2#
3# bindsnoop       Trace IPv4 and IPv6 binds()s.
4#               For Linux, uses BCC, eBPF. Embedded C.
5#
6# based on tcpconnect utility from Brendan Gregg's suite.
7#
8# USAGE: bindsnoop [-h] [-t] [-E] [-p PID] [-P PORT[,PORT ...]] [-w]
9#             [--count] [--cgroupmap mappath] [--mntnsmap mappath]
10#
11# bindsnoop reports socket options set before the bind call
12# that would impact this system call behavior:
13# SOL_IP     IP_FREEBIND              F....
14# SOL_IP     IP_TRANSPARENT           .T...
15# SOL_IP     IP_BIND_ADDRESS_NO_PORT  ..N..
16# SOL_SOCKET SO_REUSEADDR             ...R.
17# SOL_SOCKET SO_REUSEPORT             ....r
18#
19# SO_BINDTODEVICE interface is reported as "BOUND_IF" index
20#
21# This uses dynamic tracing of kernel functions, and will need to be updated
22# to match kernel changes.
23#
24# Copyright (c) 2020-present Facebook.
25# Licensed under the Apache License, Version 2.0 (the "License")
26#
27# 14-Feb-2020   Pavel Dubovitsky   Created this.
28
29from __future__ import print_function, absolute_import, unicode_literals
30from bcc import BPF
31from bcc.containers import filter_by_containers
32from bcc.utils import printb
33import argparse
34import re
35from os import strerror
36from socket import (
37    inet_ntop, AF_INET, AF_INET6, __all__ as socket_all, __dict__ as socket_dct
38)
39from struct import pack
40from time import sleep
41
42# arguments
43examples = """examples:
44    ./bindsnoop           # trace all TCP bind()s
45    ./bindsnoop -t        # include timestamps
46    ./bindsnoop -w        # wider columns (fit IPv6)
47    ./bindsnoop -p 181    # only trace PID 181
48    ./bindsnoop -P 80     # only trace port 80
49    ./bindsnoop -P 80,81  # only trace port 80 and 81
50    ./bindsnoop -U        # include UID
51    ./bindsnoop -u 1000   # only trace UID 1000
52    ./bindsnoop -E        # report bind errors
53    ./bindsnoop --count   # count bind per src ip
54    ./bindsnoop --cgroupmap mappath  # only trace cgroups in this BPF map
55    ./bindsnoop --mntnsmap  mappath  # only trace mount namespaces in the map
56
57it is reporting socket options set before the bins call
58impacting system call behavior:
59 SOL_IP     IP_FREEBIND              F....
60 SOL_IP     IP_TRANSPARENT           .T...
61 SOL_IP     IP_BIND_ADDRESS_NO_PORT  ..N..
62 SOL_SOCKET SO_REUSEADDR             ...R.
63 SOL_SOCKET SO_REUSEPORT             ....r
64
65 SO_BINDTODEVICE interface is reported as "IF" index
66"""
67parser = argparse.ArgumentParser(
68    description="Trace TCP binds",
69    formatter_class=argparse.RawDescriptionHelpFormatter,
70    epilog=examples)
71parser.add_argument("-t", "--timestamp", action="store_true",
72    help="include timestamp on output")
73parser.add_argument("-w", "--wide", action="store_true",
74    help="wide column output (fits IPv6 addresses)")
75parser.add_argument("-p", "--pid",
76    help="trace this PID only")
77parser.add_argument("-P", "--port",
78    help="comma-separated list of ports to trace.")
79parser.add_argument("-E", "--errors", action="store_true",
80    help="include errors in the output.")
81parser.add_argument("-U", "--print-uid", action="store_true",
82    help="include UID on output")
83parser.add_argument("-u", "--uid",
84    help="trace this UID only")
85parser.add_argument("--count", action="store_true",
86    help="count binds per src ip and port")
87parser.add_argument("--cgroupmap",
88    help="trace cgroups in this BPF map only")
89parser.add_argument("--mntnsmap",
90    help="trace mount namespaces in this BPF map only")
91parser.add_argument("--ebpf", action="store_true",
92    help=argparse.SUPPRESS)
93parser.add_argument("--debug-source", action="store_true",
94    help=argparse.SUPPRESS)
95args = parser.parse_args()
96
97# define BPF program
98bpf_text = """
99#include <uapi/linux/ptrace.h>
100#pragma clang diagnostic push
101#pragma clang diagnostic ignored "-Wtautological-compare"
102#include <net/sock.h>
103#pragma clang diagnostic pop
104#include <net/inet_sock.h>
105#include <net/net_namespace.h>
106#include <bcc/proto.h>
107
108BPF_HASH(currsock, u32, struct socket *);
109
110// separate data structs for ipv4 and ipv6
111struct ipv4_bind_data_t {
112    u64 ts_us;
113    u32 pid;
114    u32 uid;
115    u64 ip;
116    u32 saddr;
117    u32 bound_dev_if;
118    int return_code;
119    u16 sport;
120    u8 socket_options;
121    u8 protocol;
122    char task[TASK_COMM_LEN];
123};
124BPF_PERF_OUTPUT(ipv4_bind_events);
125
126struct ipv6_bind_data_t {
127    // int128 would be aligned on 16 bytes boundary, better to go first
128    unsigned __int128 saddr;
129    u64 ts_us;
130    u32 pid;
131    u32 uid;
132    u64 ip;
133    u32 bound_dev_if;
134    int return_code;
135    u16 sport;
136    u8 socket_options;
137    u8 protocol;
138    char task[TASK_COMM_LEN];
139};
140BPF_PERF_OUTPUT(ipv6_bind_events);
141
142// separate flow keys per address family
143struct ipv4_flow_key_t {
144    u32 saddr;
145    u16 sport;
146};
147BPF_HASH(ipv4_count, struct ipv4_flow_key_t);
148
149struct ipv6_flow_key_t {
150    unsigned __int128 saddr;
151    u16 sport;
152};
153BPF_HASH(ipv6_count, struct ipv6_flow_key_t);
154
155// bind options for event reporting
156union bind_options {
157    u8 data;
158    struct {
159        u8 freebind:1;
160        u8 transparent:1;
161        u8 bind_address_no_port:1;
162        u8 reuseaddress:1;
163        u8 reuseport:1;
164    } fields;
165};
166
167// TODO: add reporting for the original bind arguments
168int bindsnoop_entry(struct pt_regs *ctx, struct socket *socket)
169{
170    u64 pid_tgid = bpf_get_current_pid_tgid();
171    u32 pid = pid_tgid >> 32;
172    u32 tid = pid_tgid;
173    FILTER_PID
174
175    u32 uid = bpf_get_current_uid_gid();
176
177    FILTER_UID
178
179    if (container_should_be_filtered()) {
180        return 0;
181    }
182
183    // stash the sock ptr for lookup on return
184    currsock.update(&tid, &socket);
185
186    return 0;
187};
188
189
190static int bindsnoop_return(struct pt_regs *ctx, short ipver)
191{
192    int ret = PT_REGS_RC(ctx);
193    u64 pid_tgid = bpf_get_current_pid_tgid();
194    u32 pid = pid_tgid >> 32;
195    u32 tid = pid_tgid;
196
197    struct socket **skpp;
198    skpp = currsock.lookup(&tid);
199    if (skpp == 0) {
200        return 0;   // missed entry
201    }
202
203    int ignore_errors = 1;
204    FILTER_ERRORS
205    if (ret != 0 && ignore_errors) {
206        // failed to bind
207        currsock.delete(&tid);
208        return 0;
209    }
210
211    // pull in details
212    struct socket *skp_ = *skpp;
213    struct sock *skp = skp_->sk;
214
215    struct inet_sock *sockp = (struct inet_sock *)skp;
216
217    u16 sport = 0;
218    bpf_probe_read_kernel(&sport, sizeof(sport), &sockp->inet_sport);
219    sport = ntohs(sport);
220
221    FILTER_PORT
222
223    union bind_options opts = {0};
224    u8 bitfield;
225    // fetching freebind, transparent, and bind_address_no_port bitfields
226    // via the next struct member, rcv_tos
227    bitfield = (u8) *(&sockp->rcv_tos - 2) & 0xFF;
228    // IP_FREEBIND (sockp->freebind)
229    opts.fields.freebind = bitfield >> 2 & 0x01;
230    // IP_TRANSPARENT (sockp->transparent)
231    opts.fields.transparent = bitfield >> 5 & 0x01;
232    // IP_BIND_ADDRESS_NO_PORT (sockp->bind_address_no_port)
233    opts.fields.bind_address_no_port = *(&sockp->rcv_tos - 1) & 0x01;
234
235    // SO_REUSEADDR and SO_REUSEPORT are bitfields that
236    // cannot be accessed directly, fetched via the next struct member,
237    // __sk_common.skc_bound_dev_if
238    bitfield = *((u8*)&skp->__sk_common.skc_bound_dev_if - 1);
239    // SO_REUSEADDR (skp->reuse)
240    // it is 4 bit, but we are interested in the lowest one
241    opts.fields.reuseaddress = bitfield & 0x0F;
242    // SO_REUSEPORT (skp->reuseport)
243    opts.fields.reuseport = bitfield >> 4 & 0x01;
244
245    // workaround for reading the sk_protocol bitfield (from tcpaccept.py):
246    u16 protocol;
247    int gso_max_segs_offset = offsetof(struct sock, sk_gso_max_segs);
248    int sk_lingertime_offset = offsetof(struct sock, sk_lingertime);
249
250    // Since kernel v5.6 sk_protocol has its own u16 field
251    if (sk_lingertime_offset - gso_max_segs_offset == 2)
252        protocol = skp->sk_protocol;
253    else if (sk_lingertime_offset - gso_max_segs_offset == 4)
254        // 4.10+ with little endian
255#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
256        protocol = *(u8 *)((u64)&skp->sk_gso_max_segs - 3);
257    else
258        // pre-4.10 with little endian
259        protocol = *(u8 *)((u64)&skp->sk_wmem_queued - 3);
260#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
261        // 4.10+ with big endian
262        protocol = *(u8 *)((u64)&skp->sk_gso_max_segs - 1);
263    else
264        // pre-4.10 with big endian
265        protocol = *(u8 *)((u64)&skp->sk_wmem_queued - 1);
266#else
267# error "Fix your compiler's __BYTE_ORDER__?!"
268#endif
269
270    if (ipver == 4) {
271        IPV4_CODE
272    } else /* 6 */ {
273        IPV6_CODE
274    }
275
276    currsock.delete(&tid);
277
278    return 0;
279}
280
281int bindsnoop_v4_return(struct pt_regs *ctx)
282{
283    return bindsnoop_return(ctx, 4);
284}
285
286int bindsnoop_v6_return(struct pt_regs *ctx)
287{
288    return bindsnoop_return(ctx, 6);
289}
290"""
291
292struct_init = {
293    'ipv4': {
294        'count': """
295               struct ipv4_flow_key_t flow_key = {};
296               flow_key.saddr = skp->__sk_common.skc_rcv_saddr;
297               flow_key.sport = sport;
298               ipv4_count.increment(flow_key);""",
299        'trace': """
300               struct ipv4_bind_data_t data4 = {.pid = pid, .ip = ipver};
301               data4.uid = bpf_get_current_uid_gid();
302               data4.ts_us = bpf_ktime_get_ns() / 1000;
303               bpf_probe_read_kernel(
304                 &data4.saddr, sizeof(data4.saddr), &sockp->inet_saddr);
305               data4.return_code = ret;
306               data4.sport = sport;
307               data4.bound_dev_if = skp->__sk_common.skc_bound_dev_if;
308               data4.socket_options = opts.data;
309               data4.protocol = protocol;
310               bpf_get_current_comm(&data4.task, sizeof(data4.task));
311               ipv4_bind_events.perf_submit(ctx, &data4, sizeof(data4));"""
312    },
313    'ipv6': {
314        'count': """
315               struct ipv6_flow_key_t flow_key = {};
316               bpf_probe_read_kernel(&flow_key.saddr, sizeof(flow_key.saddr),
317                   skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
318               flow_key.sport = sport;
319               ipv6_count.increment(flow_key);""",
320        'trace': """
321               struct ipv6_bind_data_t data6 = {.pid = pid, .ip = ipver};
322               data6.uid = bpf_get_current_uid_gid();
323               data6.ts_us = bpf_ktime_get_ns() / 1000;
324               bpf_probe_read_kernel(&data6.saddr, sizeof(data6.saddr),
325                   skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
326               data6.return_code = ret;
327               data6.sport = sport;
328               data6.bound_dev_if = skp->__sk_common.skc_bound_dev_if;
329               data6.socket_options = opts.data;
330               data6.protocol = protocol;
331               bpf_get_current_comm(&data6.task, sizeof(data6.task));
332               ipv6_bind_events.perf_submit(ctx, &data6, sizeof(data6));"""
333    },
334}
335
336# code substitutions
337if args.count:
338    bpf_text = bpf_text.replace("IPV4_CODE", struct_init['ipv4']['count'])
339    bpf_text = bpf_text.replace("IPV6_CODE", struct_init['ipv6']['count'])
340else:
341    bpf_text = bpf_text.replace("IPV4_CODE", struct_init['ipv4']['trace'])
342    bpf_text = bpf_text.replace("IPV6_CODE", struct_init['ipv6']['trace'])
343
344if args.pid:
345    bpf_text = bpf_text.replace('FILTER_PID',
346        'if (pid != %s) { return 0; }' % args.pid)
347if args.port:
348    sports = [int(sport) for sport in args.port.split(',')]
349    sports_if = ' && '.join(['sport != %d' % sport for sport in sports])
350    bpf_text = bpf_text.replace('FILTER_PORT',
351        'if (%s) { currsock.delete(&tid); return 0; }' % sports_if)
352if args.uid:
353    bpf_text = bpf_text.replace('FILTER_UID',
354        'if (uid != %s) { return 0; }' % args.uid)
355if args.errors:
356    bpf_text = bpf_text.replace('FILTER_ERRORS', 'ignore_errors = 0;')
357bpf_text = filter_by_containers(args) + bpf_text
358bpf_text = bpf_text.replace('FILTER_PID', '')
359bpf_text = bpf_text.replace('FILTER_PORT', '')
360bpf_text = bpf_text.replace('FILTER_UID', '')
361bpf_text = bpf_text.replace('FILTER_ERRORS', '')
362
363# selecting output format - 80 characters or wide, fitting IPv6 addresses
364header_fmt = "%8s %-12.12s %-4s %-15s %-5s %5s %2s"
365output_fmt = b"%8d %-12.12s %-4.4s %-15.15s %5d %-5s %2d"
366error_header_fmt = "%3s "
367error_output_fmt = b"%3s "
368error_value_fmt = str
369if args.wide:
370    header_fmt = "%10s %-12.12s %-4s %-39s %-5s %5s %2s"
371    output_fmt = b"%10d %-12.12s %-4s %-39s %5d %-5s %2d"
372    error_header_fmt = "%-25s "
373    error_output_fmt = b"%-25s "
374    error_value_fmt = strerror
375
376if args.ebpf:
377    print(bpf_text)
378    exit()
379
380# L4 protocol resolver
381class L4Proto:
382    def __init__(self):
383        self.num2str = {}
384        proto_re = re.compile("IPPROTO_(.*)")
385        for attr in socket_all:
386            proto_match = proto_re.match(attr)
387            if proto_match:
388                self.num2str[socket_dct[attr]] = proto_match.group(1)
389
390    def proto2str(self, proto):
391        return self.num2str.get(proto, "UNKNOWN")
392
393l4 = L4Proto()
394
395# bind options:
396# SOL_IP     IP_FREEBIND              F....
397# SOL_IP     IP_TRANSPARENT           .T...
398# SOL_IP     IP_BIND_ADDRESS_NO_PORT  ..N..
399# SOL_SOCKET SO_REUSEADDR             ...R.
400# SOL_SOCKET SO_REUSEPORT             ....r
401def opts2str(bitfield):
402    str_options = ""
403    bit = 1
404    for opt in "FTNRr":
405        str_options += opt if bitfield & bit else "."
406        bit *= 2
407    return str_options.encode()
408
409
410# process events
411def print_ipv4_bind_event(cpu, data, size):
412    event = b["ipv4_bind_events"].event(data)
413    global start_ts
414    if args.timestamp:
415        if start_ts == 0:
416            start_ts = event.ts_us
417        printb(b"%-9.6f " % ((float(event.ts_us) - start_ts) / 1000000), nl="")
418    if args.print_uid:
419        printb(b"%6d " % event.uid, nl="")
420    if args.errors:
421        printb(
422            error_output_fmt % error_value_fmt(event.return_code).encode(),
423            nl="",
424        )
425    printb(output_fmt % (event.pid, event.task,
426        l4.proto2str(event.protocol).encode(),
427        inet_ntop(AF_INET, pack("I", event.saddr)).encode(),
428        event.sport, opts2str(event.socket_options), event.bound_dev_if))
429
430
431def print_ipv6_bind_event(cpu, data, size):
432    event = b["ipv6_bind_events"].event(data)
433    global start_ts
434    if args.timestamp:
435        if start_ts == 0:
436            start_ts = event.ts_us
437        printb(b"%-9.6f " % ((float(event.ts_us) - start_ts) / 1000000), nl="")
438    if args.print_uid:
439        printb(b"%6d " % event.uid, nl="")
440    if args.errors:
441        printb(
442            error_output_fmt % error_value_fmt(event.return_code).encode(),
443            nl="",
444        )
445    printb(output_fmt % (event.pid, event.task,
446        l4.proto2str(event.protocol).encode(),
447        inet_ntop(AF_INET6, event.saddr).encode(),
448        event.sport, opts2str(event.socket_options), event.bound_dev_if))
449
450
451def depict_cnt(counts_tab, l3prot='ipv4'):
452    for k, v in sorted(
453        counts_tab.items(), key=lambda counts: counts[1].value, reverse=True
454    ):
455        depict_key = ""
456        if l3prot == 'ipv4':
457            depict_key = "%-32s %20s" % (
458                (inet_ntop(AF_INET, pack('I', k.saddr))), k.sport
459            )
460        else:
461            depict_key = "%-32s %20s" % (
462                (inet_ntop(AF_INET6, k.saddr)), k.sport
463            )
464        print("%s     %-10d" % (depict_key, v.value))
465
466
467# initialize BPF
468b = BPF(text=bpf_text)
469b.attach_kprobe(event="inet_bind", fn_name="bindsnoop_entry")
470b.attach_kprobe(event="inet6_bind", fn_name="bindsnoop_entry")
471b.attach_kretprobe(event="inet_bind", fn_name="bindsnoop_v4_return")
472b.attach_kretprobe(event="inet6_bind", fn_name="bindsnoop_v6_return")
473
474print("Tracing binds ... Hit Ctrl-C to end")
475if args.count:
476    try:
477        while 1:
478            sleep(99999999)
479    except KeyboardInterrupt:
480        pass
481
482    # header
483    print("\n%-32s %20s     %-10s" % (
484        "LADDR", "LPORT", "BINDS"))
485    depict_cnt(b["ipv4_count"])
486    depict_cnt(b["ipv6_count"], l3prot='ipv6')
487# read events
488else:
489    # header
490    if args.timestamp:
491        print("%-9s " % ("TIME(s)"), end="")
492    if args.print_uid:
493        print("%6s " % ("UID"), end="")
494    if args.errors:
495        print(error_header_fmt % ("RC"), end="")
496    print(header_fmt % ("PID", "COMM", "PROT", "ADDR", "PORT", "OPTS", "IF"))
497
498    start_ts = 0
499
500    # read events
501    b["ipv4_bind_events"].open_perf_buffer(print_ipv4_bind_event)
502    b["ipv6_bind_events"].open_perf_buffer(print_ipv6_bind_event)
503    while 1:
504        try:
505            b.perf_buffer_poll()
506        except KeyboardInterrupt:
507            exit()
508