1#!/usr/bin/env python 2# 3# syscount Summarize syscall counts and latencies. 4# 5# USAGE: syscount [-h] [-p PID] [-t TID] [-i INTERVAL] [-d DURATION] [-T TOP] 6# [-x] [-e ERRNO] [-L] [-m] [-P] [-l] [--syscall SYSCALL] 7# 8# Copyright 2017, Sasha Goldshtein. 9# Licensed under the Apache License, Version 2.0 (the "License") 10# 11# 15-Feb-2017 Sasha Goldshtein Created this. 12# 16-May-2022 Rocky Xing Added TID filter support. 13# 26-Jul-2022 Rocky Xing Added syscall filter support. 14 15from time import sleep, strftime 16import argparse 17import errno 18import itertools 19import sys 20import signal 21from bcc import BPF 22from bcc.utils import printb 23from bcc.syscall import syscall_name, syscalls 24 25if sys.version_info.major < 3: 26 izip_longest = itertools.izip_longest 27else: 28 izip_longest = itertools.zip_longest 29 30# signal handler 31def signal_ignore(signal, frame): 32 print() 33 34def handle_errno(errstr): 35 try: 36 return abs(int(errstr)) 37 except ValueError: 38 pass 39 40 try: 41 return getattr(errno, errstr) 42 except AttributeError: 43 raise argparse.ArgumentTypeError("couldn't map %s to an errno" % errstr) 44 45 46parser = argparse.ArgumentParser( 47 description="Summarize syscall counts and latencies.") 48parser.add_argument("-p", "--pid", type=int, 49 help="trace only this pid") 50parser.add_argument("-t", "--tid", type=int, 51 help="trace only this tid") 52parser.add_argument("-c", "--ppid", type=int, 53 help="trace only child of this pid") 54parser.add_argument("-i", "--interval", type=int, 55 help="print summary at this interval (seconds)") 56parser.add_argument("-d", "--duration", type=int, 57 help="total duration of trace, in seconds") 58parser.add_argument("-T", "--top", type=int, default=10, 59 help="print only the top syscalls by count or latency") 60parser.add_argument("-x", "--failures", action="store_true", 61 help="trace only failed syscalls (return < 0)") 62parser.add_argument("-e", "--errno", type=handle_errno, 63 help="trace only syscalls that return this error (numeric or EPERM, etc.)") 64parser.add_argument("-L", "--latency", action="store_true", 65 help="collect syscall latency") 66parser.add_argument("-m", "--milliseconds", action="store_true", 67 help="display latency in milliseconds (default: microseconds)") 68parser.add_argument("-P", "--process", action="store_true", 69 help="count by process and not by syscall") 70parser.add_argument("-l", "--list", action="store_true", 71 help="print list of recognized syscalls and exit") 72parser.add_argument("--syscall", type=str, 73 help="trace this syscall only (use option -l to get all recognized syscalls)") 74parser.add_argument("--ebpf", action="store_true", 75 help=argparse.SUPPRESS) 76args = parser.parse_args() 77if args.duration and not args.interval: 78 args.interval = args.duration 79if not args.interval: 80 args.interval = 99999999 81 82syscall_nr = -1 83if args.syscall is not None: 84 syscall = bytes(args.syscall, 'utf-8') 85 for key, value in syscalls.items(): 86 if syscall == value: 87 syscall_nr = key 88 break 89 if syscall_nr == -1: 90 print("Error: syscall '%s' not found. Exiting." % args.syscall) 91 sys.exit(1) 92 93if args.list: 94 for grp in izip_longest(*(iter(sorted(syscalls.values())),) * 4): 95 print(" ".join(["%-22s" % s.decode() for s in grp if s is not None])) 96 sys.exit(0) 97 98text = """ 99#include <linux/sched.h> 100 101#ifdef LATENCY 102struct data_t { 103 u64 count; 104 u64 total_ns; 105}; 106 107BPF_HASH(start, u64, u64); 108BPF_HASH(data, u32, struct data_t); 109#else 110BPF_HASH(data, u32, u64); 111#endif 112 113#ifdef LATENCY 114TRACEPOINT_PROBE(raw_syscalls, sys_enter) { 115 u64 pid_tgid = bpf_get_current_pid_tgid(); 116 u32 pid = pid_tgid >> 32; 117 u32 tid = (u32)pid_tgid; 118 119#ifdef FILTER_SYSCALL_NR 120 if (args->id != FILTER_SYSCALL_NR) 121 return 0; 122#endif 123 124#ifdef FILTER_PID 125 if (pid != FILTER_PID) 126 return 0; 127#endif 128 129#ifdef FILTER_TID 130 if (tid != FILTER_TID) 131 return 0; 132#endif 133 134#ifdef FILTER_PPID 135 struct task_struct *task = (struct task_struct *)bpf_get_current_task(); 136 u32 ppid = task->real_parent->tgid; 137 if (ppid != FILTER_PPID) 138 return 0; 139#endif 140 141 u64 t = bpf_ktime_get_ns(); 142 start.update(&pid_tgid, &t); 143 return 0; 144} 145#endif 146 147TRACEPOINT_PROBE(raw_syscalls, sys_exit) { 148 u64 pid_tgid = bpf_get_current_pid_tgid(); 149 u32 pid = pid_tgid >> 32; 150 u32 tid = (u32)pid_tgid; 151 152#ifdef FILTER_SYSCALL_NR 153 if (args->id != FILTER_SYSCALL_NR) 154 return 0; 155#endif 156 157#ifdef FILTER_PID 158 if (pid != FILTER_PID) 159 return 0; 160#endif 161 162#ifdef FILTER_TID 163 if (tid != FILTER_TID) 164 return 0; 165#endif 166 167#ifdef FILTER_PPID 168 struct task_struct *task = (struct task_struct *)bpf_get_current_task(); 169 u32 ppid = task->real_parent->tgid; 170 if (ppid != FILTER_PPID) 171 return 0; 172#endif 173 174#ifdef FILTER_FAILED 175 if (args->ret >= 0) 176 return 0; 177#endif 178 179#ifdef FILTER_ERRNO 180 if (args->ret != -FILTER_ERRNO) 181 return 0; 182#endif 183 184#ifdef BY_PROCESS 185 u32 key = pid_tgid >> 32; 186#else 187 u32 key = args->id; 188#endif 189 190#ifdef LATENCY 191 struct data_t *val, zero = {}; 192 u64 *start_ns = start.lookup(&pid_tgid); 193 if (!start_ns) 194 return 0; 195 196 val = data.lookup_or_try_init(&key, &zero); 197 if (val) { 198 lock_xadd(&val->count, 1); 199 lock_xadd(&val->total_ns, bpf_ktime_get_ns() - *start_ns); 200 } 201#else 202 u64 *val, zero = 0; 203 val = data.lookup_or_try_init(&key, &zero); 204 if (val) { 205 lock_xadd(val, 1); 206 } 207#endif 208 return 0; 209} 210""" 211 212if args.pid: 213 text = ("#define FILTER_PID %d\n" % args.pid) + text 214elif args.tid: 215 text = ("#define FILTER_TID %d\n" % args.tid) + text 216elif args.ppid: 217 text = ("#define FILTER_PPID %d\n" % args.ppid) + text 218if args.failures: 219 text = "#define FILTER_FAILED\n" + text 220if args.errno: 221 text = "#define FILTER_ERRNO %d\n" % abs(args.errno) + text 222if args.latency: 223 text = "#define LATENCY\n" + text 224if args.process: 225 text = "#define BY_PROCESS\n" + text 226if args.syscall is not None: 227 text = ("#define FILTER_SYSCALL_NR %d\n" % syscall_nr) + text 228if args.ebpf: 229 print(text) 230 exit() 231 232bpf = BPF(text=text) 233 234def print_stats(): 235 if args.latency: 236 print_latency_stats() 237 else: 238 print_count_stats() 239 240agg_colname = "PID COMM" if args.process else "SYSCALL" 241time_colname = "TIME (ms)" if args.milliseconds else "TIME (us)" 242 243def comm_for_pid(pid): 244 try: 245 return open("/proc/%d/comm" % pid, "rb").read().strip() 246 except Exception: 247 return b"[unknown]" 248 249def agg_colval(key): 250 if args.process: 251 return b"%-6d %-15s" % (key.value, comm_for_pid(key.value)) 252 else: 253 return syscall_name(key.value) 254 255def print_count_stats(): 256 data = bpf["data"] 257 print("[%s]" % strftime("%H:%M:%S")) 258 print("%-22s %8s" % (agg_colname, "COUNT")) 259 for k, v in sorted(data.items(), key=lambda kv: -kv[1].value)[:args.top]: 260 if k.value == 0xFFFFFFFF: 261 continue # happens occasionally, we don't need it 262 printb(b"%-22s %8d" % (agg_colval(k), v.value)) 263 print("") 264 data.clear() 265 266def print_latency_stats(): 267 data = bpf["data"] 268 print("[%s]" % strftime("%H:%M:%S")) 269 print("%-22s %8s %16s" % (agg_colname, "COUNT", time_colname)) 270 for k, v in sorted(data.items(), 271 key=lambda kv: -kv[1].total_ns)[:args.top]: 272 if k.value == 0xFFFFFFFF: 273 continue # happens occasionally, we don't need it 274 printb((b"%-22s %8d " + (b"%16.6f" if args.milliseconds else b"%16.3f")) % 275 (agg_colval(k), v.count, 276 v.total_ns / (1e6 if args.milliseconds else 1e3))) 277 print("") 278 data.clear() 279 280if args.syscall is not None: 281 print("Tracing %ssyscall '%s'... Ctrl+C to quit." % 282 ("failed " if args.failures else "", args.syscall)) 283else: 284 print("Tracing %ssyscalls, printing top %d... Ctrl+C to quit." % 285 ("failed " if args.failures else "", args.top)) 286exiting = 0 if args.interval else 1 287seconds = 0 288while True: 289 try: 290 sleep(args.interval) 291 seconds += args.interval 292 except KeyboardInterrupt: 293 exiting = 1 294 signal.signal(signal.SIGINT, signal_ignore) 295 if args.duration and seconds >= args.duration: 296 exiting = 1 297 298 print_stats() 299 300 if exiting: 301 print("Detaching...") 302 exit() 303