xref: /aosp_15_r20/external/bcc/tools/tcplife.lua (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1#!/usr/bin/env bcc-lua
2--[[
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14
1518-Mar-2017  Simon Liu Created this.
16--]]
17
18local ffi = require("ffi")
19local bit = require("bit")
20
21ffi.cdef[[
22const char *inet_ntop(int af, const void *src, char *dst, int size);
23uint16_t ntohs(uint16_t netshort);
24]]
25
26local program = [[
27#include <uapi/linux/ptrace.h>
28#include <linux/tcp.h>
29#include <net/sock.h>
30#include <bcc/proto.h>
31
32BPF_HASH(birth, struct sock *, u64);
33
34// separate data structs for ipv4 and ipv6
35struct ipv4_data_t {
36    // XXX: switch some to u32's when supported
37    u64 ts_us;
38    u64 pid;
39    u64 saddr;
40    u64 daddr;
41    u64 ports;
42    u64 rx_b;
43    u64 tx_b;
44    u64 span_us;
45    char task[TASK_COMM_LEN];
46};
47BPF_PERF_OUTPUT(ipv4_events);
48
49struct ipv6_data_t {
50    u64 ts_us;
51    u64 pid;
52    u64 saddr[2];
53    u64 daddr[2];
54    u64 ports;
55    u64 rx_b;
56    u64 tx_b;
57    u64 span_us;
58    char task[TASK_COMM_LEN];
59};
60BPF_PERF_OUTPUT(ipv6_events);
61
62struct id_t {
63    u32 pid;
64    char task[TASK_COMM_LEN];
65};
66BPF_HASH(whoami, struct sock *, struct id_t);
67
68int trace_tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
69{
70    bpf_trace_printk("tcp_set_stat");
71    u32 pid = bpf_get_current_pid_tgid() >> 32;
72
73    // lport is either used in a filter here, or later
74    u16 lport = sk->__sk_common.skc_num;
75    FILTER_LPORT
76
77    // dport is either used in a filter here, or later
78    u16 dport = sk->__sk_common.skc_dport;
79    FILTER_DPORT
80
81    /*
82     * This tool includes PID and comm context. It's best effort, and may
83     * be wrong in some situations. It currently works like this:
84     * - record timestamp on any state < TCP_FIN_WAIT1
85     * - cache task context on:
86     *       TCP_SYN_SENT: tracing from client
87     *       TCP_LAST_ACK: client-closed from server
88     * - do output on TCP_CLOSE:
89     *       fetch task context if cached, or use current task
90     */
91
92    // capture birth time
93    if (state < TCP_FIN_WAIT1) {
94        /*
95         * Matching just ESTABLISHED may be sufficient, provided no code-path
96         * sets ESTABLISHED without a tcp_set_state() call. Until we know
97         * that for sure, match all early states to increase chances a
98         * timestamp is set.
99         * Note that this needs to be set before the PID filter later on,
100         * since the PID isn't reliable for these early stages, so we must
101         * save all timestamps and do the PID filter later when we can.
102         */
103        u64 ts = bpf_ktime_get_ns();
104        birth.update(&sk, &ts);
105    }
106
107    // record PID & comm on SYN_SENT
108    if (state == TCP_SYN_SENT || state == TCP_LAST_ACK) {
109        // now we can PID filter, both here and a little later on for CLOSE
110        FILTER_PID
111        struct id_t me = {.pid = pid};
112        bpf_get_current_comm(&me.task, sizeof(me.task));
113        whoami.update(&sk, &me);
114    }
115
116    if (state != TCP_CLOSE)
117        return 0;
118
119    // calculate lifespan
120    u64 *tsp, delta_us;
121    tsp = birth.lookup(&sk);
122    if (tsp == 0) {
123        whoami.delete(&sk);     // may not exist
124        return 0;               // missed create
125    }
126    delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
127    birth.delete(&sk);
128
129    // fetch possible cached data, and filter
130    struct id_t *mep;
131    mep = whoami.lookup(&sk);
132    if (mep != 0)
133        pid = mep->pid;
134    FILTER_PID
135
136    // get throughput stats. see tcp_get_info().
137    u64 rx_b = 0, tx_b = 0, sport = 0;
138    struct tcp_sock *tp = (struct tcp_sock *)sk;
139    rx_b = tp->bytes_received;
140    tx_b = tp->bytes_acked;
141
142    u16 family = sk->__sk_common.skc_family;
143
144    if (family == AF_INET) {
145        struct ipv4_data_t data4 = {.span_us = delta_us,
146            .rx_b = rx_b, .tx_b = tx_b};
147        data4.ts_us = bpf_ktime_get_ns() / 1000;
148        data4.saddr = sk->__sk_common.skc_rcv_saddr;
149        data4.daddr = sk->__sk_common.skc_daddr;
150        // a workaround until data4 compiles with separate lport/dport
151        data4.pid = pid;
152        data4.ports = ntohs(dport) + ((0ULL + lport) << 32);
153        if (mep == 0) {
154            bpf_get_current_comm(&data4.task, sizeof(data4.task));
155        } else {
156            bpf_probe_read_kernel(&data4.task, sizeof(data4.task), (void *)mep->task);
157        }
158        ipv4_events.perf_submit(ctx, &data4, sizeof(data4));
159
160    } else /* 6 */ {
161        struct ipv6_data_t data6 = {.span_us = delta_us,
162            .rx_b = rx_b, .tx_b = tx_b};
163        data6.ts_us = bpf_ktime_get_ns() / 1000;
164        bpf_probe_read_kernel(&data6.saddr, sizeof(data6.saddr),
165            sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
166        bpf_probe_read_kernel(&data6.daddr, sizeof(data6.daddr),
167            sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
168        // a workaround until data6 compiles with separate lport/dport
169        data6.ports = ntohs(dport) + ((0ULL + lport) << 32);
170        data6.pid = pid;
171        if (mep == 0) {
172            bpf_get_current_comm(&data6.task, sizeof(data6.task));
173        } else {
174            bpf_probe_read_kernel(&data6.task, sizeof(data6.task), (void *)mep->task);
175        }
176        ipv6_events.perf_submit(ctx, &data6, sizeof(data6));
177    }
178
179    if (mep != 0)
180        whoami.delete(&sk);
181
182    return 0;
183}
184]]
185
186local debug = false
187local start_ts = 0
188
189local inet_addresslen = #"255.255.255.255"
190local inet6_addresslen = #"ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"
191local AF_INET = 2
192local AF_INET6 = 10
193
194local header_string = "%-5s %-10.10s %s%-15s %-5s %-15s %-5s %5s %5s %s"
195local format_string = "%-5d %-10.10s %s%-15s %-5d %-15s %-5d %5d %5d %.2f"
196local ip_string = ""
197local ip_version = false
198local arg_timestamp = false
199local arg_csv = false
200local arg_time = false
201
202local examples = [[examples:
203    ./tcplife           # trace all TCP connect()s
204    ./tcplife -t        # include time column (HH:MM:SS)
205    ./tcplife -w        # wider columns (fit IPv6)
206    ./tcplife -stT      # csv output, with times & timestamps
207    ./tcplife -p 181    # only trace PID 181
208    ./tcplife -L 80     # only trace local port 80
209    ./tcplife -L 80,81  # only trace local ports 80 and 81
210    ./tcplife -D 80     # only trace remote port 80
211]]
212
213local function split(str,sep)
214   local t = {}
215   for w in string.gmatch(str, '([^,]+)') do
216      table.insert(t, w)
217   end
218   return t
219end
220
221local function inet_ntop(af, addr, len)
222   local addr_dst = ffi.new("char[?]", len)
223   local addr_src
224   if af == AF_INET then
225      addr_src = ffi.new("uint64_t[1]", addr)
226   else
227      addr_src = ffi.new("uint64_t[2]", addr)
228   end
229   ffi.C.inet_ntop(af, addr_src, addr_dst, len)
230   return ffi.string(addr_dst, len)
231end
232
233local function inet_ntohs(port)
234   local p = tonumber(port)
235   return ffi.C.ntohs(p)
236end
237
238local function print_ipv4_event(cpu, event)
239
240   local event_pid = tonumber(event.pid)
241   local event_task = ffi.string(event.task)
242   local event_ports = tonumber(event.ports)
243   local event_tx_b = tonumber(event.tx_b)
244   local event_rx_b = tonumber(event.rx_b)
245   local event_span_us = tonumber(event.span_us)
246   local event_ts_us = tonumber(event.ts_us)
247   local event_saddr = inet_ntop(AF_INET, tonumber(event.saddr), inet_addresslen)
248   local event_daddr = inet_ntop(AF_INET, tonumber(event.daddr), inet_addresslen)
249   if arg_time then
250      if arg_csv then
251         io.write("%s," % os.date("%H:%M:%S"))
252      else
253         io.write("%-8s " % os.date("%H:%M:%S"))
254      end
255   end
256   if arg_timestamp then
257      if start_ts == 0 then
258         start_ts = event_ts_us
259      end
260      local delta_s = (event_ts_us - start_ts) / 1000000
261      if arg.csv then
262         io.write("%.6f," % delta_s)
263      else
264         io.write("%-9.6f " % delta_s)
265      end
266   end
267   local iv = ""
268   if ip_version then
269      iv = "4"
270   end
271   print(string.format(format_string, event_pid, event_task, iv,
272                       event_saddr, bit.rshift(event_ports,32),
273                       event_daddr, bit.band(event_ports,0xffffffff),
274                       (event_tx_b / 1024), (event_rx_b / 1024), event_span_us/ 1000))
275end
276
277
278local function print_ipv6_event(cpu, event)
279   local event_pid = tonumber(event.pid)
280   local event_task = ffi.string(event.task)
281   local event_ports = tonumber(event.ports)
282   local event_tx_b = tonumber(event.tx_b)
283   local event_rx_b = tonumber(event.rx_b)
284   local event_span_us = tonumber(event.span_us)
285   local event_ts_us = tonumber(event.ts_us)
286   local event_saddr = inet_ntop(AF_INET6, {tonumber(event.saddr[0]), tonumber(event.saddr[1])}, inet6_addresslen)
287   local event_daddr = inet_ntop(AF_INET6, {tonumber(event.daddr[0]), tonumber(event.daddr[1])}, inet6_addresslen)
288   if arg_time then
289      if arg_csv then
290         io.write("%s," % os.date("%H:%M:%S"))
291      else
292         io.write("%-8s " % os.date("%H:%M:%S"))
293      end
294   end
295   if arg_timestamp then
296      if start_ts == 0 then
297         start_ts = event_ts_us
298      end
299      local delta_s = (event_ts_us - start_ts) / 1000000
300      if arg.csv then
301         io.write("%.6f," % delta_s)
302      else
303         io.write("%-9.6f " % delta_s)
304      end
305   end
306   local iv = ""
307   if ip_version then
308      iv = "6"
309   end
310   print(string.format(format_string, event_pid, event_task, iv,
311                       event_saddr, bit.rshift(event_ports,32),
312                       event_daddr, bit.band(event_ports,0xffffffff),
313                       (event_tx_b / 1024), (event_rx_b / 1024), event_span_us/ 1000))
314end
315
316local function parse_arg(utils)
317   local parser = utils.argparse("tcplife",
318                                 "Trace the lifespan of TCP sessions and summarize", examples)
319
320   parser:flag("-T --time", "include time column on output (HH:MM:SS)")
321   parser:flag("-t --timestamp", "include timestamp on output (seconds)")
322   parser:flag("-w --wide", "wide column output (fits IPv6 addresses)")
323   parser:flag("-s --csv", "comma separated values output")
324   parser:option("-p --pid", "trace this PID only"):convert(tonumber)
325   parser:option("-L --localport", "comma-separated list of local ports to trace.")
326   parser:option("-D --remoteport", "comma-separated list of remote ports to trace.")
327
328   local args = parser:parse()
329   if args.pid then
330      local filter = 'if (pid != %d) { return 0; }' % args.pid
331      program = program.gsub('FILTER_PID', filter)
332   end
333
334   if args.remoteport then
335      local dports = split(args.remoteport, ",")
336      local dports_if = ""
337      for i,d in ipairs(dports) do
338         if dports_if == "" then
339            dports_if = 'dport != %d' % inet_ntohs(d)
340         else
341            dports_if = dports_if .. ' && ' .. ('dport != %d' % inet_ntohs(d))
342         end
343      end
344      local filter = "if (%s) { birth.delete(&sk); return 0; }" % dports_if
345      program = program:gsub('FILTER_DPORT', filter)
346   end
347   if args.localport then
348      local lports = split(args.localport,",")
349      local lports_if = ""
350      for i,l in ipairs(lports) do
351         if lports_if == "" then
352            lports_if = 'lport != %d' % inet_ntohs(l)
353         else
354            lports_if = lports_if .. ' && ' .. ('lport != %d' % inet_ntohs(l))
355         end
356      end
357      local filter = "if (%s) { birth.delete(&sk); return 0; }" % lports_if
358      program = program:gsub('FILTER_LPORT', filter)
359   end
360   program = program:gsub('FILTER_PID', '')
361   program = program:gsub('FILTER_DPORT', '')
362   program = program:gsub('FILTER_LPORT', '')
363
364   if args.wide then
365      header_string = "%-5s %-16.16s %-2s %-39s %-5s %-39s %-5s %6s %6s %s"
366      format_string = "%-5d %-16.16s %-2s %-39s %-5s %-39s %-5d %6d %6d %.2f"
367      ip_string = "IP"
368      ip_version = true
369   end
370   if args.csv then
371      header_string = "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s"
372      format_string = "%d,%s,%s,%s,%s,%s,%d,%d,%d,%.2f"
373      ip_string = "IP"
374      ip_version = true
375      arg_csv = true
376   end
377
378   if args.time then
379      arg_time = true
380      if args.csv then
381         io.write("%s," % ("TIME"))
382      else
383         io.write("%-8s " % ("TIME"))
384      end
385   end
386
387   if args.timestamp then
388      arg_timestamp = true
389      if args.csv then
390         io.write("%s," % ("TIME(s)"))
391      else
392         io.write("%-9s " % ("TIME(s)"))
393      end
394   end
395
396end
397
398return function(BPF, utils)
399   parse_arg(utils)
400   if debug then
401      print(program)
402   end
403
404   local bpf = BPF:new{text=program}
405   bpf:attach_kprobe{event="tcp_set_state", fn_name="trace_tcp_set_state"}
406   print(header_string % {"PID", "COMM",
407                          ip_string, "LADDR",
408                          "LPORT", "RADDR", "RPORT", "TX_KB", "RX_KB", "MS"})
409   local TASK_COMM_LEN = 16 -- linux/sched.h
410   bpf:get_table("ipv4_events"):open_perf_buffer(print_ipv4_event, [[
411    struct {
412      uint64_t ts_us;
413      uint64_t pid;
414      uint64_t saddr;
415      uint64_t daddr;
416      uint64_t ports;
417      uint64_t rx_b;
418      uint64_t tx_b;
419      uint64_t span_us;
420      char task[$];
421    }
422   ]], {TASK_COMM_LEN}, 64)
423   bpf:get_table("ipv6_events"):open_perf_buffer(print_ipv6_event, [[
424    struct {
425      uint64_t ts_us;
426      uint64_t pid;
427      uint64_t saddr[2];
428      uint64_t daddr[2];
429      uint64_t ports;
430      uint64_t rx_b;
431      uint64_t tx_b;
432      uint64_t span_us;
433      char task[$];
434    }
435   ]], {TASK_COMM_LEN}, 64)
436
437   bpf:perf_buffer_poll_loop()
438end
439