xref: /aosp_15_r20/external/bcc/tools/opensnoop.py (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1#!/usr/bin/env python
2# @lint-avoid-python-3-compatibility-imports
3#
4# opensnoop Trace open() syscalls.
5#           For Linux, uses BCC, eBPF. Embedded C.
6#
7# USAGE: opensnoop [-h] [-T] [-U] [-x] [-p PID] [-t TID]
8#                  [--cgroupmap CGROUPMAP] [--mntnsmap MNTNSMAP] [-u UID]
9#                  [-d DURATION] [-n NAME] [-F] [-e] [-f FLAG_FILTER]
10#                  [-b BUFFER_PAGES]
11#
12# Copyright (c) 2015 Brendan Gregg.
13# Licensed under the Apache License, Version 2.0 (the "License")
14#
15# 17-Sep-2015   Brendan Gregg   Created this.
16# 29-Apr-2016   Allan McAleavy  Updated for BPF_PERF_OUTPUT.
17# 08-Oct-2016   Dina Goldshtein Support filtering by PID and TID.
18# 28-Dec-2018   Tim Douglas     Print flags argument, enable filtering
19# 06-Jan-2019   Takuma Kume     Support filtering by UID
20# 21-Aug-2022   Rocky Xing      Support showing full path for an open file.
21# 06-Sep-2022   Rocky Xing      Support setting size of the perf ring buffer.
22
23from __future__ import print_function
24from bcc import ArgString, BPF
25from bcc.containers import filter_by_containers
26from bcc.utils import printb
27import argparse
28from collections import defaultdict
29from datetime import datetime, timedelta
30import os
31
32# arguments
33examples = """examples:
34    ./opensnoop                        # trace all open() syscalls
35    ./opensnoop -T                     # include timestamps
36    ./opensnoop -U                     # include UID
37    ./opensnoop -x                     # only show failed opens
38    ./opensnoop -p 181                 # only trace PID 181
39    ./opensnoop -t 123                 # only trace TID 123
40    ./opensnoop -u 1000                # only trace UID 1000
41    ./opensnoop -d 10                  # trace for 10 seconds only
42    ./opensnoop -n main                # only print process names containing "main"
43    ./opensnoop -e                     # show extended fields
44    ./opensnoop -f O_WRONLY -f O_RDWR  # only print calls for writing
45    ./opensnoop -F                     # show full path for an open file with relative path
46    ./opensnoop --cgroupmap mappath    # only trace cgroups in this BPF map
47    ./opensnoop --mntnsmap mappath     # only trace mount namespaces in the map
48"""
49parser = argparse.ArgumentParser(
50    description="Trace open() syscalls",
51    formatter_class=argparse.RawDescriptionHelpFormatter,
52    epilog=examples)
53parser.add_argument("-T", "--timestamp", action="store_true",
54    help="include timestamp on output")
55parser.add_argument("-U", "--print-uid", action="store_true",
56    help="print UID column")
57parser.add_argument("-x", "--failed", action="store_true",
58    help="only show failed opens")
59parser.add_argument("-p", "--pid",
60    help="trace this PID only")
61parser.add_argument("-t", "--tid",
62    help="trace this TID only")
63parser.add_argument("--cgroupmap",
64    help="trace cgroups in this BPF map only")
65parser.add_argument("--mntnsmap",
66    help="trace mount namespaces in this BPF map only")
67parser.add_argument("-u", "--uid",
68    help="trace this UID only")
69parser.add_argument("-d", "--duration",
70    help="total duration of trace in seconds")
71parser.add_argument("-n", "--name",
72    type=ArgString,
73    help="only print process names containing this name")
74parser.add_argument("--ebpf", action="store_true",
75    help=argparse.SUPPRESS)
76parser.add_argument("-e", "--extended_fields", action="store_true",
77    help="show extended fields")
78parser.add_argument("-f", "--flag_filter", action="append",
79    help="filter on flags argument (e.g., O_WRONLY)")
80parser.add_argument("-F", "--full-path", action="store_true",
81    help="show full path for an open file with relative path")
82parser.add_argument("-b", "--buffer-pages", type=int, default=64,
83    help="size of the perf ring buffer "
84        "(must be a power of two number of pages and defaults to 64)")
85args = parser.parse_args()
86debug = 0
87if args.duration:
88    args.duration = timedelta(seconds=int(args.duration))
89flag_filter_mask = 0
90for flag in args.flag_filter or []:
91    if not flag.startswith('O_'):
92        exit("Bad flag: %s" % flag)
93    try:
94        flag_filter_mask |= getattr(os, flag)
95    except AttributeError:
96        exit("Bad flag: %s" % flag)
97
98# define BPF program
99bpf_text = """
100#include <uapi/linux/ptrace.h>
101#include <uapi/linux/limits.h>
102#include <linux/sched.h>
103#ifdef FULLPATH
104#include <linux/fs_struct.h>
105#include <linux/dcache.h>
106
107#define MAX_ENTRIES 32
108
109enum event_type {
110    EVENT_ENTRY,
111    EVENT_END,
112};
113#endif
114
115struct val_t {
116    u64 id;
117    char comm[TASK_COMM_LEN];
118    const char *fname;
119    int flags; // EXTENDED_STRUCT_MEMBER
120};
121
122struct data_t {
123    u64 id;
124    u64 ts;
125    u32 uid;
126    int ret;
127    char comm[TASK_COMM_LEN];
128#ifdef FULLPATH
129    enum event_type type;
130#endif
131    char name[NAME_MAX];
132    int flags; // EXTENDED_STRUCT_MEMBER
133};
134
135BPF_PERF_OUTPUT(events);
136"""
137
138bpf_text_kprobe = """
139BPF_HASH(infotmp, u64, struct val_t);
140
141int trace_return(struct pt_regs *ctx)
142{
143    u64 id = bpf_get_current_pid_tgid();
144    struct val_t *valp;
145    struct data_t data = {};
146
147    u64 tsp = bpf_ktime_get_ns();
148
149    valp = infotmp.lookup(&id);
150    if (valp == 0) {
151        // missed entry
152        return 0;
153    }
154
155    bpf_probe_read_kernel(&data.comm, sizeof(data.comm), valp->comm);
156    bpf_probe_read_user_str(&data.name, sizeof(data.name), (void *)valp->fname);
157    data.id = valp->id;
158    data.ts = tsp / 1000;
159    data.uid = bpf_get_current_uid_gid();
160    data.flags = valp->flags; // EXTENDED_STRUCT_MEMBER
161    data.ret = PT_REGS_RC(ctx);
162
163    SUBMIT_DATA
164
165    infotmp.delete(&id);
166
167    return 0;
168}
169"""
170
171bpf_text_kprobe_header_open = """
172int syscall__trace_entry_open(struct pt_regs *ctx, const char __user *filename, int flags)
173{
174"""
175
176bpf_text_kprobe_header_openat = """
177int syscall__trace_entry_openat(struct pt_regs *ctx, int dfd, const char __user *filename, int flags)
178{
179"""
180
181bpf_text_kprobe_header_openat2 = """
182#include <uapi/linux/openat2.h>
183int syscall__trace_entry_openat2(struct pt_regs *ctx, int dfd, const char __user *filename, struct open_how *how)
184{
185    int flags = how->flags;
186"""
187
188bpf_text_kprobe_body = """
189    struct val_t val = {};
190    u64 id = bpf_get_current_pid_tgid();
191    u32 pid = id >> 32; // PID is higher part
192    u32 tid = id;       // Cast and get the lower part
193    u32 uid = bpf_get_current_uid_gid();
194
195    PID_TID_FILTER
196    UID_FILTER
197    FLAGS_FILTER
198
199    if (container_should_be_filtered()) {
200        return 0;
201    }
202
203    if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
204        val.id = id;
205        val.fname = filename;
206        val.flags = flags; // EXTENDED_STRUCT_MEMBER
207        infotmp.update(&id, &val);
208    }
209
210    return 0;
211};
212"""
213
214bpf_text_kfunc_header_open = """
215#if defined(CONFIG_ARCH_HAS_SYSCALL_WRAPPER) && !defined(__s390x__)
216KRETFUNC_PROBE(FNNAME, struct pt_regs *regs, int ret)
217{
218    const char __user *filename = (char *)PT_REGS_PARM1(regs);
219    int flags = PT_REGS_PARM2(regs);
220#else
221KRETFUNC_PROBE(FNNAME, const char __user *filename, int flags, int ret)
222{
223#endif
224"""
225
226bpf_text_kfunc_header_openat = """
227#if defined(CONFIG_ARCH_HAS_SYSCALL_WRAPPER) && !defined(__s390x__)
228KRETFUNC_PROBE(FNNAME, struct pt_regs *regs, int ret)
229{
230    int dfd = PT_REGS_PARM1(regs);
231    const char __user *filename = (char *)PT_REGS_PARM2(regs);
232    int flags = PT_REGS_PARM3(regs);
233#else
234KRETFUNC_PROBE(FNNAME, int dfd, const char __user *filename, int flags, int ret)
235{
236#endif
237"""
238
239bpf_text_kfunc_header_openat2 = """
240#include <uapi/linux/openat2.h>
241#if defined(CONFIG_ARCH_HAS_SYSCALL_WRAPPER) && !defined(__s390x__)
242KRETFUNC_PROBE(FNNAME, struct pt_regs *regs, int ret)
243{
244    int dfd = PT_REGS_PARM1(regs);
245    const char __user *filename = (char *)PT_REGS_PARM2(regs);
246    struct open_how __user how;
247    int flags;
248
249    bpf_probe_read_user(&how, sizeof(struct open_how), (struct open_how*)PT_REGS_PARM3(regs));
250    flags = how.flags;
251#else
252KRETFUNC_PROBE(FNNAME, int dfd, const char __user *filename, struct open_how __user *how, int ret)
253{
254    int flags = how->flags;
255#endif
256"""
257
258bpf_text_kfunc_body = """
259    u64 id = bpf_get_current_pid_tgid();
260    u32 pid = id >> 32; // PID is higher part
261    u32 tid = id;       // Cast and get the lower part
262    u32 uid = bpf_get_current_uid_gid();
263
264    PID_TID_FILTER
265    UID_FILTER
266    FLAGS_FILTER
267    if (container_should_be_filtered()) {
268        return 0;
269    }
270
271    struct data_t data = {};
272    bpf_get_current_comm(&data.comm, sizeof(data.comm));
273
274    u64 tsp = bpf_ktime_get_ns();
275
276    bpf_probe_read_user_str(&data.name, sizeof(data.name), (void *)filename);
277    data.id    = id;
278    data.ts    = tsp / 1000;
279    data.uid   = bpf_get_current_uid_gid();
280    data.flags = flags; // EXTENDED_STRUCT_MEMBER
281    data.ret   = ret;
282
283    SUBMIT_DATA
284
285    return 0;
286}
287"""
288
289b = BPF(text='')
290# open and openat are always in place since 2.6.16
291fnname_open = b.get_syscall_prefix().decode() + 'open'
292fnname_openat = b.get_syscall_prefix().decode() + 'openat'
293fnname_openat2 = b.get_syscall_prefix().decode() + 'openat2'
294if b.ksymname(fnname_openat2) == -1:
295    fnname_openat2 = None
296
297if args.full_path:
298    bpf_text = "#define FULLPATH\n" + bpf_text
299
300is_support_kfunc = BPF.support_kfunc()
301if is_support_kfunc:
302    bpf_text += bpf_text_kfunc_header_open.replace('FNNAME', fnname_open)
303    bpf_text += bpf_text_kfunc_body
304
305    bpf_text += bpf_text_kfunc_header_openat.replace('FNNAME', fnname_openat)
306    bpf_text += bpf_text_kfunc_body
307
308    if fnname_openat2:
309        bpf_text += bpf_text_kfunc_header_openat2.replace('FNNAME', fnname_openat2)
310        bpf_text += bpf_text_kfunc_body
311else:
312    bpf_text += bpf_text_kprobe
313
314    bpf_text += bpf_text_kprobe_header_open
315    bpf_text += bpf_text_kprobe_body
316
317    bpf_text += bpf_text_kprobe_header_openat
318    bpf_text += bpf_text_kprobe_body
319
320    if fnname_openat2:
321        bpf_text += bpf_text_kprobe_header_openat2
322        bpf_text += bpf_text_kprobe_body
323
324if args.tid:  # TID trumps PID
325    bpf_text = bpf_text.replace('PID_TID_FILTER',
326        'if (tid != %s) { return 0; }' % args.tid)
327elif args.pid:
328    bpf_text = bpf_text.replace('PID_TID_FILTER',
329        'if (pid != %s) { return 0; }' % args.pid)
330else:
331    bpf_text = bpf_text.replace('PID_TID_FILTER', '')
332if args.uid:
333    bpf_text = bpf_text.replace('UID_FILTER',
334        'if (uid != %s) { return 0; }' % args.uid)
335else:
336    bpf_text = bpf_text.replace('UID_FILTER', '')
337bpf_text = filter_by_containers(args) + bpf_text
338if args.flag_filter:
339    bpf_text = bpf_text.replace('FLAGS_FILTER',
340        'if (!(flags & %d)) { return 0; }' % flag_filter_mask)
341else:
342    bpf_text = bpf_text.replace('FLAGS_FILTER', '')
343if not (args.extended_fields or args.flag_filter):
344    bpf_text = '\n'.join(x for x in bpf_text.split('\n')
345        if 'EXTENDED_STRUCT_MEMBER' not in x)
346
347if args.full_path:
348    bpf_text = bpf_text.replace('SUBMIT_DATA', """
349    data.type = EVENT_ENTRY;
350    events.perf_submit(ctx, &data, sizeof(data));
351
352    if (data.name[0] != '/') { // relative path
353        struct task_struct *task;
354        struct dentry *dentry;
355        int i;
356
357        task = (struct task_struct *)bpf_get_current_task_btf();
358        dentry = task->fs->pwd.dentry;
359
360        for (i = 1; i < MAX_ENTRIES; i++) {
361            bpf_probe_read_kernel(&data.name, sizeof(data.name), (void *)dentry->d_name.name);
362            data.type = EVENT_ENTRY;
363            events.perf_submit(ctx, &data, sizeof(data));
364
365            if (dentry == dentry->d_parent) { // root directory
366                break;
367            }
368
369            dentry = dentry->d_parent;
370        }
371    }
372
373    data.type = EVENT_END;
374    events.perf_submit(ctx, &data, sizeof(data));
375    """)
376else:
377    bpf_text = bpf_text.replace('SUBMIT_DATA', """
378    events.perf_submit(ctx, &data, sizeof(data));
379    """)
380
381if debug or args.ebpf:
382    print(bpf_text)
383    if args.ebpf:
384        exit()
385
386# initialize BPF
387b = BPF(text=bpf_text)
388if not is_support_kfunc:
389    b.attach_kprobe(event=fnname_open, fn_name="syscall__trace_entry_open")
390    b.attach_kretprobe(event=fnname_open, fn_name="trace_return")
391
392    b.attach_kprobe(event=fnname_openat, fn_name="syscall__trace_entry_openat")
393    b.attach_kretprobe(event=fnname_openat, fn_name="trace_return")
394
395    if fnname_openat2:
396        b.attach_kprobe(event=fnname_openat2, fn_name="syscall__trace_entry_openat2")
397        b.attach_kretprobe(event=fnname_openat2, fn_name="trace_return")
398
399initial_ts = 0
400
401# header
402if args.timestamp:
403    print("%-14s" % ("TIME(s)"), end="")
404if args.print_uid:
405    print("%-6s" % ("UID"), end="")
406print("%-6s %-16s %4s %3s " %
407      ("TID" if args.tid else "PID", "COMM", "FD", "ERR"), end="")
408if args.extended_fields:
409    print("%-9s" % ("FLAGS"), end="")
410print("PATH")
411
412class EventType(object):
413    EVENT_ENTRY = 0
414    EVENT_END = 1
415
416entries = defaultdict(list)
417
418# process event
419def print_event(cpu, data, size):
420    event = b["events"].event(data)
421    global initial_ts
422
423    if not args.full_path or event.type == EventType.EVENT_END:
424        skip = False
425
426        # split return value into FD and errno columns
427        if event.ret >= 0:
428            fd_s = event.ret
429            err = 0
430        else:
431            fd_s = -1
432            err = - event.ret
433
434        if not initial_ts:
435            initial_ts = event.ts
436
437        if args.failed and (event.ret >= 0):
438            skip = True
439
440        if args.name and bytes(args.name) not in event.comm:
441            skip = True
442
443        if not skip:
444            if args.timestamp:
445                delta = event.ts - initial_ts
446                printb(b"%-14.9f" % (float(delta) / 1000000), nl="")
447
448            if args.print_uid:
449                printb(b"%-6d" % event.uid, nl="")
450
451            printb(b"%-6d %-16s %4d %3d " %
452                   (event.id & 0xffffffff if args.tid else event.id >> 32,
453                    event.comm, fd_s, err), nl="")
454
455            if args.extended_fields:
456                printb(b"%08o " % event.flags, nl="")
457
458            if not args.full_path:
459                printb(b"%s" % event.name)
460            else:
461                paths = entries[event.id]
462                paths.reverse()
463                printb(b"%s" % os.path.join(*paths))
464
465        if args.full_path:
466            try:
467                del(entries[event.id])
468            except Exception:
469                pass
470    elif event.type == EventType.EVENT_ENTRY:
471        entries[event.id].append(event.name)
472
473# loop with callback to print_event
474b["events"].open_perf_buffer(print_event, page_cnt=args.buffer_pages)
475start_time = datetime.now()
476while not args.duration or datetime.now() - start_time < args.duration:
477    try:
478        b.perf_buffer_poll()
479    except KeyboardInterrupt:
480        exit()
481