1#!/usr/bin/env python 2# @lint-avoid-python-3-compatibility-imports 3# 4# opensnoop Trace open() syscalls. 5# For Linux, uses BCC, eBPF. Embedded C. 6# 7# USAGE: opensnoop [-h] [-T] [-U] [-x] [-p PID] [-t TID] 8# [--cgroupmap CGROUPMAP] [--mntnsmap MNTNSMAP] [-u UID] 9# [-d DURATION] [-n NAME] [-F] [-e] [-f FLAG_FILTER] 10# [-b BUFFER_PAGES] 11# 12# Copyright (c) 2015 Brendan Gregg. 13# Licensed under the Apache License, Version 2.0 (the "License") 14# 15# 17-Sep-2015 Brendan Gregg Created this. 16# 29-Apr-2016 Allan McAleavy Updated for BPF_PERF_OUTPUT. 17# 08-Oct-2016 Dina Goldshtein Support filtering by PID and TID. 18# 28-Dec-2018 Tim Douglas Print flags argument, enable filtering 19# 06-Jan-2019 Takuma Kume Support filtering by UID 20# 21-Aug-2022 Rocky Xing Support showing full path for an open file. 21# 06-Sep-2022 Rocky Xing Support setting size of the perf ring buffer. 22 23from __future__ import print_function 24from bcc import ArgString, BPF 25from bcc.containers import filter_by_containers 26from bcc.utils import printb 27import argparse 28from collections import defaultdict 29from datetime import datetime, timedelta 30import os 31 32# arguments 33examples = """examples: 34 ./opensnoop # trace all open() syscalls 35 ./opensnoop -T # include timestamps 36 ./opensnoop -U # include UID 37 ./opensnoop -x # only show failed opens 38 ./opensnoop -p 181 # only trace PID 181 39 ./opensnoop -t 123 # only trace TID 123 40 ./opensnoop -u 1000 # only trace UID 1000 41 ./opensnoop -d 10 # trace for 10 seconds only 42 ./opensnoop -n main # only print process names containing "main" 43 ./opensnoop -e # show extended fields 44 ./opensnoop -f O_WRONLY -f O_RDWR # only print calls for writing 45 ./opensnoop -F # show full path for an open file with relative path 46 ./opensnoop --cgroupmap mappath # only trace cgroups in this BPF map 47 ./opensnoop --mntnsmap mappath # only trace mount namespaces in the map 48""" 49parser = argparse.ArgumentParser( 50 description="Trace open() syscalls", 51 formatter_class=argparse.RawDescriptionHelpFormatter, 52 epilog=examples) 53parser.add_argument("-T", "--timestamp", action="store_true", 54 help="include timestamp on output") 55parser.add_argument("-U", "--print-uid", action="store_true", 56 help="print UID column") 57parser.add_argument("-x", "--failed", action="store_true", 58 help="only show failed opens") 59parser.add_argument("-p", "--pid", 60 help="trace this PID only") 61parser.add_argument("-t", "--tid", 62 help="trace this TID only") 63parser.add_argument("--cgroupmap", 64 help="trace cgroups in this BPF map only") 65parser.add_argument("--mntnsmap", 66 help="trace mount namespaces in this BPF map only") 67parser.add_argument("-u", "--uid", 68 help="trace this UID only") 69parser.add_argument("-d", "--duration", 70 help="total duration of trace in seconds") 71parser.add_argument("-n", "--name", 72 type=ArgString, 73 help="only print process names containing this name") 74parser.add_argument("--ebpf", action="store_true", 75 help=argparse.SUPPRESS) 76parser.add_argument("-e", "--extended_fields", action="store_true", 77 help="show extended fields") 78parser.add_argument("-f", "--flag_filter", action="append", 79 help="filter on flags argument (e.g., O_WRONLY)") 80parser.add_argument("-F", "--full-path", action="store_true", 81 help="show full path for an open file with relative path") 82parser.add_argument("-b", "--buffer-pages", type=int, default=64, 83 help="size of the perf ring buffer " 84 "(must be a power of two number of pages and defaults to 64)") 85args = parser.parse_args() 86debug = 0 87if args.duration: 88 args.duration = timedelta(seconds=int(args.duration)) 89flag_filter_mask = 0 90for flag in args.flag_filter or []: 91 if not flag.startswith('O_'): 92 exit("Bad flag: %s" % flag) 93 try: 94 flag_filter_mask |= getattr(os, flag) 95 except AttributeError: 96 exit("Bad flag: %s" % flag) 97 98# define BPF program 99bpf_text = """ 100#include <uapi/linux/ptrace.h> 101#include <uapi/linux/limits.h> 102#include <linux/sched.h> 103#ifdef FULLPATH 104#include <linux/fs_struct.h> 105#include <linux/dcache.h> 106 107#define MAX_ENTRIES 32 108 109enum event_type { 110 EVENT_ENTRY, 111 EVENT_END, 112}; 113#endif 114 115struct val_t { 116 u64 id; 117 char comm[TASK_COMM_LEN]; 118 const char *fname; 119 int flags; // EXTENDED_STRUCT_MEMBER 120}; 121 122struct data_t { 123 u64 id; 124 u64 ts; 125 u32 uid; 126 int ret; 127 char comm[TASK_COMM_LEN]; 128#ifdef FULLPATH 129 enum event_type type; 130#endif 131 char name[NAME_MAX]; 132 int flags; // EXTENDED_STRUCT_MEMBER 133}; 134 135BPF_PERF_OUTPUT(events); 136""" 137 138bpf_text_kprobe = """ 139BPF_HASH(infotmp, u64, struct val_t); 140 141int trace_return(struct pt_regs *ctx) 142{ 143 u64 id = bpf_get_current_pid_tgid(); 144 struct val_t *valp; 145 struct data_t data = {}; 146 147 u64 tsp = bpf_ktime_get_ns(); 148 149 valp = infotmp.lookup(&id); 150 if (valp == 0) { 151 // missed entry 152 return 0; 153 } 154 155 bpf_probe_read_kernel(&data.comm, sizeof(data.comm), valp->comm); 156 bpf_probe_read_user_str(&data.name, sizeof(data.name), (void *)valp->fname); 157 data.id = valp->id; 158 data.ts = tsp / 1000; 159 data.uid = bpf_get_current_uid_gid(); 160 data.flags = valp->flags; // EXTENDED_STRUCT_MEMBER 161 data.ret = PT_REGS_RC(ctx); 162 163 SUBMIT_DATA 164 165 infotmp.delete(&id); 166 167 return 0; 168} 169""" 170 171bpf_text_kprobe_header_open = """ 172int syscall__trace_entry_open(struct pt_regs *ctx, const char __user *filename, int flags) 173{ 174""" 175 176bpf_text_kprobe_header_openat = """ 177int syscall__trace_entry_openat(struct pt_regs *ctx, int dfd, const char __user *filename, int flags) 178{ 179""" 180 181bpf_text_kprobe_header_openat2 = """ 182#include <uapi/linux/openat2.h> 183int syscall__trace_entry_openat2(struct pt_regs *ctx, int dfd, const char __user *filename, struct open_how *how) 184{ 185 int flags = how->flags; 186""" 187 188bpf_text_kprobe_body = """ 189 struct val_t val = {}; 190 u64 id = bpf_get_current_pid_tgid(); 191 u32 pid = id >> 32; // PID is higher part 192 u32 tid = id; // Cast and get the lower part 193 u32 uid = bpf_get_current_uid_gid(); 194 195 PID_TID_FILTER 196 UID_FILTER 197 FLAGS_FILTER 198 199 if (container_should_be_filtered()) { 200 return 0; 201 } 202 203 if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) { 204 val.id = id; 205 val.fname = filename; 206 val.flags = flags; // EXTENDED_STRUCT_MEMBER 207 infotmp.update(&id, &val); 208 } 209 210 return 0; 211}; 212""" 213 214bpf_text_kfunc_header_open = """ 215#if defined(CONFIG_ARCH_HAS_SYSCALL_WRAPPER) && !defined(__s390x__) 216KRETFUNC_PROBE(FNNAME, struct pt_regs *regs, int ret) 217{ 218 const char __user *filename = (char *)PT_REGS_PARM1(regs); 219 int flags = PT_REGS_PARM2(regs); 220#else 221KRETFUNC_PROBE(FNNAME, const char __user *filename, int flags, int ret) 222{ 223#endif 224""" 225 226bpf_text_kfunc_header_openat = """ 227#if defined(CONFIG_ARCH_HAS_SYSCALL_WRAPPER) && !defined(__s390x__) 228KRETFUNC_PROBE(FNNAME, struct pt_regs *regs, int ret) 229{ 230 int dfd = PT_REGS_PARM1(regs); 231 const char __user *filename = (char *)PT_REGS_PARM2(regs); 232 int flags = PT_REGS_PARM3(regs); 233#else 234KRETFUNC_PROBE(FNNAME, int dfd, const char __user *filename, int flags, int ret) 235{ 236#endif 237""" 238 239bpf_text_kfunc_header_openat2 = """ 240#include <uapi/linux/openat2.h> 241#if defined(CONFIG_ARCH_HAS_SYSCALL_WRAPPER) && !defined(__s390x__) 242KRETFUNC_PROBE(FNNAME, struct pt_regs *regs, int ret) 243{ 244 int dfd = PT_REGS_PARM1(regs); 245 const char __user *filename = (char *)PT_REGS_PARM2(regs); 246 struct open_how __user how; 247 int flags; 248 249 bpf_probe_read_user(&how, sizeof(struct open_how), (struct open_how*)PT_REGS_PARM3(regs)); 250 flags = how.flags; 251#else 252KRETFUNC_PROBE(FNNAME, int dfd, const char __user *filename, struct open_how __user *how, int ret) 253{ 254 int flags = how->flags; 255#endif 256""" 257 258bpf_text_kfunc_body = """ 259 u64 id = bpf_get_current_pid_tgid(); 260 u32 pid = id >> 32; // PID is higher part 261 u32 tid = id; // Cast and get the lower part 262 u32 uid = bpf_get_current_uid_gid(); 263 264 PID_TID_FILTER 265 UID_FILTER 266 FLAGS_FILTER 267 if (container_should_be_filtered()) { 268 return 0; 269 } 270 271 struct data_t data = {}; 272 bpf_get_current_comm(&data.comm, sizeof(data.comm)); 273 274 u64 tsp = bpf_ktime_get_ns(); 275 276 bpf_probe_read_user_str(&data.name, sizeof(data.name), (void *)filename); 277 data.id = id; 278 data.ts = tsp / 1000; 279 data.uid = bpf_get_current_uid_gid(); 280 data.flags = flags; // EXTENDED_STRUCT_MEMBER 281 data.ret = ret; 282 283 SUBMIT_DATA 284 285 return 0; 286} 287""" 288 289b = BPF(text='') 290# open and openat are always in place since 2.6.16 291fnname_open = b.get_syscall_prefix().decode() + 'open' 292fnname_openat = b.get_syscall_prefix().decode() + 'openat' 293fnname_openat2 = b.get_syscall_prefix().decode() + 'openat2' 294if b.ksymname(fnname_openat2) == -1: 295 fnname_openat2 = None 296 297if args.full_path: 298 bpf_text = "#define FULLPATH\n" + bpf_text 299 300is_support_kfunc = BPF.support_kfunc() 301if is_support_kfunc: 302 bpf_text += bpf_text_kfunc_header_open.replace('FNNAME', fnname_open) 303 bpf_text += bpf_text_kfunc_body 304 305 bpf_text += bpf_text_kfunc_header_openat.replace('FNNAME', fnname_openat) 306 bpf_text += bpf_text_kfunc_body 307 308 if fnname_openat2: 309 bpf_text += bpf_text_kfunc_header_openat2.replace('FNNAME', fnname_openat2) 310 bpf_text += bpf_text_kfunc_body 311else: 312 bpf_text += bpf_text_kprobe 313 314 bpf_text += bpf_text_kprobe_header_open 315 bpf_text += bpf_text_kprobe_body 316 317 bpf_text += bpf_text_kprobe_header_openat 318 bpf_text += bpf_text_kprobe_body 319 320 if fnname_openat2: 321 bpf_text += bpf_text_kprobe_header_openat2 322 bpf_text += bpf_text_kprobe_body 323 324if args.tid: # TID trumps PID 325 bpf_text = bpf_text.replace('PID_TID_FILTER', 326 'if (tid != %s) { return 0; }' % args.tid) 327elif args.pid: 328 bpf_text = bpf_text.replace('PID_TID_FILTER', 329 'if (pid != %s) { return 0; }' % args.pid) 330else: 331 bpf_text = bpf_text.replace('PID_TID_FILTER', '') 332if args.uid: 333 bpf_text = bpf_text.replace('UID_FILTER', 334 'if (uid != %s) { return 0; }' % args.uid) 335else: 336 bpf_text = bpf_text.replace('UID_FILTER', '') 337bpf_text = filter_by_containers(args) + bpf_text 338if args.flag_filter: 339 bpf_text = bpf_text.replace('FLAGS_FILTER', 340 'if (!(flags & %d)) { return 0; }' % flag_filter_mask) 341else: 342 bpf_text = bpf_text.replace('FLAGS_FILTER', '') 343if not (args.extended_fields or args.flag_filter): 344 bpf_text = '\n'.join(x for x in bpf_text.split('\n') 345 if 'EXTENDED_STRUCT_MEMBER' not in x) 346 347if args.full_path: 348 bpf_text = bpf_text.replace('SUBMIT_DATA', """ 349 data.type = EVENT_ENTRY; 350 events.perf_submit(ctx, &data, sizeof(data)); 351 352 if (data.name[0] != '/') { // relative path 353 struct task_struct *task; 354 struct dentry *dentry; 355 int i; 356 357 task = (struct task_struct *)bpf_get_current_task_btf(); 358 dentry = task->fs->pwd.dentry; 359 360 for (i = 1; i < MAX_ENTRIES; i++) { 361 bpf_probe_read_kernel(&data.name, sizeof(data.name), (void *)dentry->d_name.name); 362 data.type = EVENT_ENTRY; 363 events.perf_submit(ctx, &data, sizeof(data)); 364 365 if (dentry == dentry->d_parent) { // root directory 366 break; 367 } 368 369 dentry = dentry->d_parent; 370 } 371 } 372 373 data.type = EVENT_END; 374 events.perf_submit(ctx, &data, sizeof(data)); 375 """) 376else: 377 bpf_text = bpf_text.replace('SUBMIT_DATA', """ 378 events.perf_submit(ctx, &data, sizeof(data)); 379 """) 380 381if debug or args.ebpf: 382 print(bpf_text) 383 if args.ebpf: 384 exit() 385 386# initialize BPF 387b = BPF(text=bpf_text) 388if not is_support_kfunc: 389 b.attach_kprobe(event=fnname_open, fn_name="syscall__trace_entry_open") 390 b.attach_kretprobe(event=fnname_open, fn_name="trace_return") 391 392 b.attach_kprobe(event=fnname_openat, fn_name="syscall__trace_entry_openat") 393 b.attach_kretprobe(event=fnname_openat, fn_name="trace_return") 394 395 if fnname_openat2: 396 b.attach_kprobe(event=fnname_openat2, fn_name="syscall__trace_entry_openat2") 397 b.attach_kretprobe(event=fnname_openat2, fn_name="trace_return") 398 399initial_ts = 0 400 401# header 402if args.timestamp: 403 print("%-14s" % ("TIME(s)"), end="") 404if args.print_uid: 405 print("%-6s" % ("UID"), end="") 406print("%-6s %-16s %4s %3s " % 407 ("TID" if args.tid else "PID", "COMM", "FD", "ERR"), end="") 408if args.extended_fields: 409 print("%-9s" % ("FLAGS"), end="") 410print("PATH") 411 412class EventType(object): 413 EVENT_ENTRY = 0 414 EVENT_END = 1 415 416entries = defaultdict(list) 417 418# process event 419def print_event(cpu, data, size): 420 event = b["events"].event(data) 421 global initial_ts 422 423 if not args.full_path or event.type == EventType.EVENT_END: 424 skip = False 425 426 # split return value into FD and errno columns 427 if event.ret >= 0: 428 fd_s = event.ret 429 err = 0 430 else: 431 fd_s = -1 432 err = - event.ret 433 434 if not initial_ts: 435 initial_ts = event.ts 436 437 if args.failed and (event.ret >= 0): 438 skip = True 439 440 if args.name and bytes(args.name) not in event.comm: 441 skip = True 442 443 if not skip: 444 if args.timestamp: 445 delta = event.ts - initial_ts 446 printb(b"%-14.9f" % (float(delta) / 1000000), nl="") 447 448 if args.print_uid: 449 printb(b"%-6d" % event.uid, nl="") 450 451 printb(b"%-6d %-16s %4d %3d " % 452 (event.id & 0xffffffff if args.tid else event.id >> 32, 453 event.comm, fd_s, err), nl="") 454 455 if args.extended_fields: 456 printb(b"%08o " % event.flags, nl="") 457 458 if not args.full_path: 459 printb(b"%s" % event.name) 460 else: 461 paths = entries[event.id] 462 paths.reverse() 463 printb(b"%s" % os.path.join(*paths)) 464 465 if args.full_path: 466 try: 467 del(entries[event.id]) 468 except Exception: 469 pass 470 elif event.type == EventType.EVENT_ENTRY: 471 entries[event.id].append(event.name) 472 473# loop with callback to print_event 474b["events"].open_perf_buffer(print_event, page_cnt=args.buffer_pages) 475start_time = datetime.now() 476while not args.duration or datetime.now() - start_time < args.duration: 477 try: 478 b.perf_buffer_poll() 479 except KeyboardInterrupt: 480 exit() 481