xref: /aosp_15_r20/external/bcc/tools/tcpconnlat.py (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1#!/usr/bin/env python
2# @lint-avoid-python-3-compatibility-imports
3#
4# tcpconnlat    Trace TCP active connection latency (connect).
5#               For Linux, uses BCC, eBPF. Embedded C.
6#
7# USAGE: tcpconnlat [-h] [-t] [-p PID] [-4 | -6]
8#
9# This uses dynamic tracing of kernel functions, and will need to be updated
10# to match kernel changes.
11#
12# Copyright 2016 Netflix, Inc.
13# Licensed under the Apache License, Version 2.0 (the "License")
14#
15# 19-Feb-2016   Brendan Gregg   Created this.
16# 15-Mar-2021   Suresh Kumar    Added LPORT option
17
18from __future__ import print_function
19from bcc import BPF
20from socket import inet_ntop, AF_INET, AF_INET6
21from struct import pack
22import argparse
23
24# arg validation
25def positive_float(val):
26    try:
27        ival = float(val)
28    except ValueError:
29        raise argparse.ArgumentTypeError("must be a float")
30
31    if ival < 0:
32        raise argparse.ArgumentTypeError("must be positive")
33    return ival
34
35# arguments
36examples = """examples:
37    ./tcpconnlat           # trace all TCP connect()s
38    ./tcpconnlat 1         # trace connection latency slower than 1 ms
39    ./tcpconnlat 0.1       # trace connection latency slower than 100 us
40    ./tcpconnlat -t        # include timestamps
41    ./tcpconnlat -p 181    # only trace PID 181
42    ./tcpconnlat -L        # include LPORT while printing outputs
43    ./tcpconnlat -4        # trace IPv4 family only
44    ./tcpconnlat -6        # trace IPv6 family only
45"""
46parser = argparse.ArgumentParser(
47    description="Trace TCP connects and show connection latency",
48    formatter_class=argparse.RawDescriptionHelpFormatter,
49    epilog=examples)
50parser.add_argument("-t", "--timestamp", action="store_true",
51    help="include timestamp on output")
52parser.add_argument("-p", "--pid",
53    help="trace this PID only")
54parser.add_argument("-L", "--lport", action="store_true",
55    help="include LPORT on output")
56group = parser.add_mutually_exclusive_group()
57group.add_argument("-4", "--ipv4", action="store_true",
58    help="trace IPv4 family only")
59group.add_argument("-6", "--ipv6", action="store_true",
60    help="trace IPv6 family only")
61parser.add_argument("duration_ms", nargs="?", default=0,
62    type=positive_float,
63    help="minimum duration to trace (ms)")
64parser.add_argument("-v", "--verbose", action="store_true",
65    help="print the BPF program for debugging purposes")
66parser.add_argument("--ebpf", action="store_true",
67    help=argparse.SUPPRESS)
68args = parser.parse_args()
69
70if args.duration_ms:
71    # support fractions but round to nearest microsecond
72    duration_us = int(args.duration_ms * 1000)
73else:
74    duration_us = 0   # default is show all
75
76debug = 0
77
78# define BPF program
79bpf_text = """
80#include <uapi/linux/ptrace.h>
81#include <net/sock.h>
82#include <net/tcp_states.h>
83#include <bcc/proto.h>
84
85struct info_t {
86    u64 ts;
87    u32 pid;
88    char task[TASK_COMM_LEN];
89};
90BPF_HASH(start, struct sock *, struct info_t);
91
92// separate data structs for ipv4 and ipv6
93struct ipv4_data_t {
94    u64 ts_us;
95    u32 pid;
96    u32 saddr;
97    u32 daddr;
98    u64 ip;
99    u16 lport;
100    u16 dport;
101    u64 delta_us;
102    char task[TASK_COMM_LEN];
103};
104BPF_PERF_OUTPUT(ipv4_events);
105
106struct ipv6_data_t {
107    u64 ts_us;
108    u32 pid;
109    unsigned __int128 saddr;
110    unsigned __int128 daddr;
111    u64 ip;
112    u16 lport;
113    u16 dport;
114    u64 delta_us;
115    char task[TASK_COMM_LEN];
116};
117BPF_PERF_OUTPUT(ipv6_events);
118
119int trace_connect(struct pt_regs *ctx, struct sock *sk)
120{
121    u32 pid = bpf_get_current_pid_tgid() >> 32;
122    FILTER
123    struct info_t info = {.pid = pid};
124    info.ts = bpf_ktime_get_ns();
125    bpf_get_current_comm(&info.task, sizeof(info.task));
126    start.update(&sk, &info);
127    return 0;
128};
129
130// See tcp_v4_do_rcv() and tcp_v6_do_rcv(). So TCP_ESTBALISHED and TCP_LISTEN
131// are fast path and processed elsewhere, and leftovers are processed by
132// tcp_rcv_state_process(). We can trace this for handshake completion.
133// This should all be switched to static tracepoints when available.
134int trace_tcp_rcv_state_process(struct pt_regs *ctx, struct sock *skp)
135{
136    // will be in TCP_SYN_SENT for handshake
137    if (skp->__sk_common.skc_state != TCP_SYN_SENT)
138        return 0;
139
140    // check start and calculate delta
141    struct info_t *infop = start.lookup(&skp);
142    if (infop == 0) {
143        return 0;   // missed entry or filtered
144    }
145
146    u64 ts = infop->ts;
147    u64 now = bpf_ktime_get_ns();
148
149    u64 delta_us = (now - ts) / 1000ul;
150
151#ifdef MIN_LATENCY
152    if ( delta_us < DURATION_US ) {
153        return 0; // connect latency is below latency filter minimum
154    }
155#endif
156
157    // pull in details
158    u16 family = 0, lport = 0, dport = 0;
159    family = skp->__sk_common.skc_family;
160    lport = skp->__sk_common.skc_num;
161    dport = skp->__sk_common.skc_dport;
162
163    // emit to appropriate data path
164    if (family == AF_INET) {
165        struct ipv4_data_t data4 = {.pid = infop->pid, .ip = 4};
166        data4.ts_us = now / 1000;
167        data4.saddr = skp->__sk_common.skc_rcv_saddr;
168        data4.daddr = skp->__sk_common.skc_daddr;
169        data4.lport = lport;
170        data4.dport = ntohs(dport);
171        data4.delta_us = delta_us;
172        __builtin_memcpy(&data4.task, infop->task, sizeof(data4.task));
173        ipv4_events.perf_submit(ctx, &data4, sizeof(data4));
174
175    } else /* AF_INET6 */ {
176        struct ipv6_data_t data6 = {.pid = infop->pid, .ip = 6};
177        data6.ts_us = now / 1000;
178        bpf_probe_read_kernel(&data6.saddr, sizeof(data6.saddr),
179            skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
180        bpf_probe_read_kernel(&data6.daddr, sizeof(data6.daddr),
181            skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
182        data6.lport = lport;
183        data6.dport = ntohs(dport);
184        data6.delta_us = delta_us;
185        __builtin_memcpy(&data6.task, infop->task, sizeof(data6.task));
186        ipv6_events.perf_submit(ctx, &data6, sizeof(data6));
187    }
188
189    start.delete(&skp);
190
191    return 0;
192}
193"""
194
195if duration_us > 0:
196    bpf_text = "#define MIN_LATENCY\n" + bpf_text
197    bpf_text = bpf_text.replace('DURATION_US', str(duration_us))
198
199# code substitutions
200if args.pid:
201    bpf_text = bpf_text.replace('FILTER',
202        'if (pid != %s) { return 0; }' % args.pid)
203else:
204    bpf_text = bpf_text.replace('FILTER', '')
205if debug or args.verbose or args.ebpf:
206    print(bpf_text)
207    if args.ebpf:
208        exit()
209
210# initialize BPF
211b = BPF(text=bpf_text)
212if args.ipv4:
213    b.attach_kprobe(event="tcp_v4_connect", fn_name="trace_connect")
214elif args.ipv6:
215    b.attach_kprobe(event="tcp_v6_connect", fn_name="trace_connect")
216else:
217    b.attach_kprobe(event="tcp_v4_connect", fn_name="trace_connect")
218    b.attach_kprobe(event="tcp_v6_connect", fn_name="trace_connect")
219
220b.attach_kprobe(event="tcp_rcv_state_process",
221    fn_name="trace_tcp_rcv_state_process")
222
223# process event
224start_ts = 0
225
226def print_ipv4_event(cpu, data, size):
227    event = b["ipv4_events"].event(data)
228    global start_ts
229    if args.timestamp:
230        if start_ts == 0:
231            start_ts = event.ts_us
232        print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="")
233    if args.lport:
234        print("%-7d %-12.12s %-2d %-16s %-6d %-16s %-5d %.2f" % (event.pid,
235            event.task.decode('utf-8', 'replace'), event.ip,
236            inet_ntop(AF_INET, pack("I", event.saddr)), event.lport,
237            inet_ntop(AF_INET, pack("I", event.daddr)), event.dport,
238            float(event.delta_us) / 1000))
239    else:
240        print("%-7d %-12.12s %-2d %-16s %-16s %-5d %.2f" % (event.pid,
241            event.task.decode('utf-8', 'replace'), event.ip,
242            inet_ntop(AF_INET, pack("I", event.saddr)),
243            inet_ntop(AF_INET, pack("I", event.daddr)), event.dport,
244            float(event.delta_us) / 1000))
245
246def print_ipv6_event(cpu, data, size):
247    event = b["ipv6_events"].event(data)
248    global start_ts
249    if args.timestamp:
250        if start_ts == 0:
251            start_ts = event.ts_us
252        print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="")
253    if args.lport:
254        print("%-7d %-12.12s %-2d %-16s %-6d %-16s %-5d %.2f" % (event.pid,
255            event.task.decode('utf-8', 'replace'), event.ip,
256            inet_ntop(AF_INET6, event.saddr), event.lport,
257            inet_ntop(AF_INET6, event.daddr),
258            event.dport, float(event.delta_us) / 1000))
259    else:
260        print("%-7d %-12.12s %-2d %-16s %-16s %-5d %.2f" % (event.pid,
261            event.task.decode('utf-8', 'replace'), event.ip,
262            inet_ntop(AF_INET6, event.saddr), inet_ntop(AF_INET6, event.daddr),
263            event.dport, float(event.delta_us) / 1000))
264
265# header
266if args.timestamp:
267    print("%-9s" % ("TIME(s)"), end="")
268if args.lport:
269    print("%-7s %-12s %-2s %-16s %-6s %-16s %-5s %s" % ("PID", "COMM",
270        "IP", "SADDR", "LPORT", "DADDR", "DPORT", "LAT(ms)"))
271else:
272    print("%-7s %-12s %-2s %-16s %-16s %-5s %s" % ("PID", "COMM", "IP",
273        "SADDR", "DADDR", "DPORT", "LAT(ms)"))
274
275# read events
276if args.ipv4:
277    b["ipv4_events"].open_perf_buffer(print_ipv4_event)
278elif args.ipv6:
279    b["ipv6_events"].open_perf_buffer(print_ipv6_event)
280else:
281    b["ipv4_events"].open_perf_buffer(print_ipv4_event)
282    b["ipv6_events"].open_perf_buffer(print_ipv6_event)
283
284while 1:
285    try:
286        b.perf_buffer_poll()
287    except KeyboardInterrupt:
288        exit()
289