1#!/usr/bin/env python 2# 3# stackcount Count events and their stack traces. 4# For Linux, uses BCC, eBPF. 5# 6# USAGE: stackcount.py [-h] [-p PID] [-c CPU] [-i INTERVAL] [-D DURATION] [-T] 7# [-r] [-s] [-P] [-K] [-U] [-v] [-d] [-f] [--debug] 8# 9# The pattern is a string with optional '*' wildcards, similar to file 10# globbing. If you'd prefer to use regular expressions, use the -r option. 11# 12# Copyright 2016 Netflix, Inc. 13# Licensed under the Apache License, Version 2.0 (the "License") 14# 15# 12-Jan-2016 Brendan Gregg Created this. 16# 09-Jul-2016 Sasha Goldshtein Generalized for uprobes and tracepoints. 17 18from __future__ import print_function 19from bcc import BPF, USDT 20from time import sleep, strftime 21import argparse 22import re 23import signal 24import sys 25import traceback 26 27debug = False 28 29class Probe(object): 30 def __init__(self, pattern, kernel_stack, user_stack, use_regex=False, 31 pid=None, per_pid=False, cpu=None): 32 """Init a new probe. 33 34 Init the probe from the pattern provided by the user. The supported 35 patterns mimic the 'trace' and 'argdist' tools, but are simpler because 36 we don't have to distinguish between probes and retprobes. 37 38 func -- probe a kernel function 39 lib:func -- probe a user-space function in the library 'lib' 40 p::func -- same thing as 'func' 41 p:lib:func -- same thing as 'lib:func' 42 t:cat:event -- probe a kernel tracepoint 43 u:lib:probe -- probe a USDT tracepoint 44 """ 45 self.kernel_stack = kernel_stack 46 self.user_stack = user_stack 47 parts = pattern.split(':') 48 if len(parts) == 1: 49 parts = ["p", "", parts[0]] 50 elif len(parts) == 2: 51 parts = ["p", parts[0], parts[1]] 52 elif len(parts) == 3: 53 if parts[0] == "t": 54 parts = ["t", "", "%s:%s" % tuple(parts[1:])] 55 if parts[0] not in ["p", "t", "u"]: 56 raise Exception("Type must be 'p', 't', or 'u', but got %s" % 57 parts[0]) 58 else: 59 raise Exception("Too many ':'-separated components in pattern %s" % 60 pattern) 61 62 (self.type, self.library, self.pattern) = parts 63 if not use_regex: 64 self.pattern = self.pattern.replace('*', '.*') 65 self.pattern = '^' + self.pattern + '$' 66 67 if (self.type == "p" and self.library) or self.type == "u": 68 libpath = BPF.find_library(self.library) 69 if libpath is None: 70 # This might be an executable (e.g. 'bash') 71 libpath = BPF.find_exe(self.library) 72 if libpath is None or len(libpath) == 0: 73 raise Exception("unable to find library %s" % self.library) 74 self.library = libpath 75 76 self.pid = pid 77 self.per_pid = per_pid 78 self.cpu = cpu 79 self.matched = 0 80 81 def is_kernel_probe(self): 82 return self.type == "t" or (self.type == "p" and self.library == "") 83 84 def attach(self): 85 if self.type == "p": 86 if self.library: 87 self.bpf.attach_uprobe(name=self.library, 88 sym_re=self.pattern, 89 fn_name="trace_count", 90 pid=self.pid or -1) 91 self.matched = self.bpf.num_open_uprobes() 92 else: 93 self.bpf.attach_kprobe(event_re=self.pattern, 94 fn_name="trace_count") 95 self.matched = self.bpf.num_open_kprobes() 96 elif self.type == "t": 97 self.bpf.attach_tracepoint(tp_re=self.pattern, 98 fn_name="trace_count") 99 self.matched = self.bpf.num_open_tracepoints() 100 elif self.type == "u": 101 pass # Nothing to do -- attach already happened in `load` 102 103 if self.matched == 0: 104 raise Exception("No functions matched by pattern %s" % 105 self.pattern) 106 107 def load(self): 108 ctx_name = "ctx" 109 stack_trace = "" 110 if self.user_stack: 111 stack_trace += """ 112 key.user_stack_id = stack_traces.get_stackid( 113 %s, BPF_F_USER_STACK 114 );""" % (ctx_name) 115 else: 116 stack_trace += "key.user_stack_id = -1;" 117 if self.kernel_stack: 118 stack_trace += """ 119 key.kernel_stack_id = stack_traces.get_stackid( 120 %s, 0 121 );""" % (ctx_name) 122 else: 123 stack_trace += "key.kernel_stack_id = -1;" 124 125 trace_count_text = """ 126int trace_count(void *ctx) { 127 FILTER 128 struct key_t key = {}; 129 key.tgid = GET_TGID; 130 STORE_COMM 131 %s 132 counts.atomic_increment(key); 133 return 0; 134} 135 """ 136 trace_count_text = trace_count_text % (stack_trace) 137 138 bpf_text = """#include <uapi/linux/ptrace.h> 139#include <linux/sched.h> 140 141struct key_t { 142 // no pid (thread ID) so that we do not needlessly split this key 143 u32 tgid; 144 int kernel_stack_id; 145 int user_stack_id; 146 char name[TASK_COMM_LEN]; 147}; 148 149BPF_HASH(counts, struct key_t); 150BPF_STACK_TRACE(stack_traces, 1024); 151 """ 152 153 filter_text = [] 154 # We really mean the tgid from the kernel's perspective, which is in 155 # the top 32 bits of bpf_get_current_pid_tgid(). 156 if self.is_kernel_probe() and self.pid: 157 filter_text.append('u32 pid; pid = bpf_get_current_pid_tgid() >> 32; ' + 158 'if (pid != %d) { return 0; }' % self.pid) 159 160 if self.is_kernel_probe() and self.cpu: 161 filter_text.append('struct task_struct *task; task = (struct task_struct*)bpf_get_current_task(); ' + 162 'if (task->cpu != %d) { return 0; }' % self.cpu) 163 164 trace_count_text = trace_count_text.replace('FILTER', '\n '.join(filter_text)) 165 166 # Do per-pid statistics iff -P is provided 167 if self.per_pid: 168 trace_count_text = trace_count_text.replace('GET_TGID', 169 'bpf_get_current_pid_tgid() >> 32') 170 trace_count_text = trace_count_text.replace('STORE_COMM', 171 'bpf_get_current_comm(&key.name, sizeof(key.name));') 172 else: 173 # skip splitting on PID so these aggregate 174 # together, and don't store the process name. 175 trace_count_text = trace_count_text.replace( 176 'GET_TGID', '0xffffffff') 177 trace_count_text = trace_count_text.replace('STORE_COMM', '') 178 179 self.usdt = None 180 if self.type == "u": 181 self.usdt = USDT(path=self.library, pid=self.pid) 182 for probe in self.usdt.enumerate_probes(): 183 if not self.pid and (probe.bin_path != self.library): 184 continue 185 if re.match(self.pattern, probe.name): 186 # This hack is required because the bpf_usdt_readarg 187 # functions generated need different function names for 188 # each attached probe. If we just stick to trace_count, 189 # we'd get multiple bpf_usdt_readarg helpers with the same 190 # name when enabling more than one USDT probe. 191 new_func = "trace_count_%d" % self.matched 192 bpf_text += trace_count_text.replace( 193 "trace_count", new_func) 194 self.usdt.enable_probe(probe.name, new_func) 195 self.matched += 1 196 if debug: 197 print(self.usdt.get_text()) 198 else: 199 bpf_text += trace_count_text 200 201 if debug: 202 print(bpf_text) 203 self.bpf = BPF(text=bpf_text, 204 usdt_contexts=[self.usdt] if self.usdt else []) 205 206class Tool(object): 207 def __init__(self): 208 examples = """examples: 209 ./stackcount submit_bio # count kernel stack traces for submit_bio 210 ./stackcount -d ip_output # include a user/kernel stack delimiter 211 ./stackcount -s ip_output # show symbol offsets 212 ./stackcount -sv ip_output # show offsets and raw addresses (verbose) 213 ./stackcount 'tcp_send*' # count stacks for funcs matching tcp_send* 214 ./stackcount -r '^tcp_send.*' # same as above, using regular expressions 215 ./stackcount -Ti 5 ip_output # output every 5 seconds, with timestamps 216 ./stackcount -p 185 ip_output # count ip_output stacks for PID 185 only 217 ./stackcount -c 1 put_prev_entity # count put_prev_entity stacks for CPU 1 only 218 ./stackcount -p 185 c:malloc # count stacks for malloc in PID 185 219 ./stackcount t:sched:sched_fork # count stacks for sched_fork tracepoint 220 ./stackcount -p 185 u:node:* # count stacks for all USDT probes in node 221 ./stackcount -K t:sched:sched_switch # kernel stacks only 222 ./stackcount -U t:sched:sched_switch # user stacks only 223 """ 224 parser = argparse.ArgumentParser( 225 description="Count events and their stack traces", 226 formatter_class=argparse.RawDescriptionHelpFormatter, 227 epilog=examples) 228 parser.add_argument("-p", "--pid", type=int, 229 help="trace this PID only") 230 parser.add_argument("-c", "--cpu", type=int, 231 help="trace this CPU only") 232 parser.add_argument("-i", "--interval", 233 help="summary interval, seconds") 234 parser.add_argument("-D", "--duration", 235 help="total duration of trace, seconds") 236 parser.add_argument("-T", "--timestamp", action="store_true", 237 help="include timestamp on output") 238 parser.add_argument("-r", "--regexp", action="store_true", 239 help="use regular expressions. Default is \"*\" wildcards only.") 240 parser.add_argument("-s", "--offset", action="store_true", 241 help="show address offsets") 242 parser.add_argument("-P", "--perpid", action="store_true", 243 help="display stacks separately for each process") 244 parser.add_argument("-K", "--kernel-stacks-only", 245 action="store_true", help="kernel stack only", default=False) 246 parser.add_argument("-U", "--user-stacks-only", 247 action="store_true", help="user stack only", default=False) 248 parser.add_argument("-v", "--verbose", action="store_true", 249 help="show raw addresses") 250 parser.add_argument("-d", "--delimited", action="store_true", 251 help="insert delimiter between kernel/user stacks") 252 parser.add_argument("-f", "--folded", action="store_true", 253 help="output folded format") 254 parser.add_argument("--debug", action="store_true", 255 help="print BPF program before starting (for debugging purposes)") 256 parser.add_argument("pattern", 257 help="search expression for events") 258 self.args = parser.parse_args() 259 global debug 260 debug = self.args.debug 261 262 if self.args.duration and not self.args.interval: 263 self.args.interval = self.args.duration 264 if not self.args.interval: 265 self.args.interval = 99999999 266 267 if self.args.kernel_stacks_only and self.args.user_stacks_only: 268 print("ERROR: -K and -U are mutually exclusive. If you want " + 269 "both stacks, that is the default.") 270 exit() 271 if not self.args.kernel_stacks_only and not self.args.user_stacks_only: 272 self.kernel_stack = True 273 self.user_stack = True 274 else: 275 self.kernel_stack = self.args.kernel_stacks_only 276 self.user_stack = self.args.user_stacks_only 277 278 # For tracing single processes in isolation, explicitly set perpid 279 # to True, if not already set. This is required to generate the correct 280 # BPF program that can store pid in the tgid field of the key_t object. 281 if self.args.pid is not None and self.args.pid > 0: 282 self.args.perpid = True 283 284 self.probe = Probe(self.args.pattern, 285 self.kernel_stack, self.user_stack, 286 self.args.regexp, self.args.pid, self.args.perpid, self.args.cpu) 287 self.need_delimiter = self.args.delimited and not ( 288 self.args.kernel_stacks_only or self.args.user_stacks_only) 289 290 def _print_kframe(self, addr): 291 print(" ", end="") 292 if self.args.verbose: 293 print("%-16x " % addr, end="") 294 if self.args.offset: 295 print("%s" % self.probe.bpf.ksym(addr, show_offset=True).decode()) 296 else: 297 print("%s" % self.probe.bpf.ksym(addr).decode()) 298 299 def _print_uframe(self, addr, pid): 300 print(" ", end="") 301 if self.args.verbose: 302 print("%-16x " % addr, end="") 303 if self.args.offset: 304 print("%s" % self.probe.bpf.sym(addr, pid, show_offset=True).decode()) 305 else: 306 print("%s" % self.probe.bpf.sym(addr, pid).decode()) 307 308 @staticmethod 309 def _signal_ignore(signal, frame): 310 print() 311 312 def _print_comm(self, comm, pid): 313 print(" %s [%d]" % (comm, pid)) 314 315 def run(self): 316 self.probe.load() 317 self.probe.attach() 318 if not self.args.folded: 319 print("Tracing %d functions for \"%s\"... Hit Ctrl-C to end." % 320 (self.probe.matched, self.args.pattern)) 321 b = self.probe.bpf 322 exiting = 0 if self.args.interval else 1 323 seconds = 0 324 while True: 325 try: 326 sleep(int(self.args.interval)) 327 seconds += int(self.args.interval) 328 except KeyboardInterrupt: 329 exiting = 1 330 # as cleanup can take many seconds, trap Ctrl-C: 331 signal.signal(signal.SIGINT, Tool._signal_ignore) 332 if self.args.duration and seconds >= int(self.args.duration): 333 exiting = 1 334 335 if not self.args.folded: 336 print() 337 if self.args.timestamp: 338 print("%-8s\n" % strftime("%H:%M:%S"), end="") 339 340 counts = self.probe.bpf["counts"] 341 stack_traces = self.probe.bpf["stack_traces"] 342 self.comm_cache = {} 343 for k, v in sorted(counts.items(), 344 key=lambda counts: counts[1].value): 345 user_stack = [] if k.user_stack_id < 0 else \ 346 stack_traces.walk(k.user_stack_id) 347 kernel_stack = [] if k.kernel_stack_id < 0 else \ 348 stack_traces.walk(k.kernel_stack_id) 349 350 if self.args.folded: 351 # print folded stack output 352 user_stack = list(user_stack) 353 kernel_stack = list(kernel_stack) 354 line = [k.name.decode('utf-8', 'replace')] + \ 355 [b.sym(addr, k.tgid).decode('utf-8', 'replace') for addr in 356 reversed(user_stack)] + \ 357 (self.need_delimiter and ["-"] or []) + \ 358 [b.ksym(addr).decode('utf-8', 'replace') for addr in reversed(kernel_stack)] 359 print("%s %d" % (";".join(line), v.value)) 360 else: 361 # print multi-line stack output 362 for addr in kernel_stack: 363 self._print_kframe(addr) 364 if self.need_delimiter: 365 print(" --") 366 for addr in user_stack: 367 self._print_uframe(addr, k.tgid) 368 if not self.args.pid and k.tgid != 0xffffffff: 369 self._print_comm(k.name, k.tgid) 370 print(" %d\n" % v.value) 371 counts.clear() 372 373 if exiting: 374 if not self.args.folded: 375 print("Detaching...") 376 exit() 377 378if __name__ == "__main__": 379 try: 380 Tool().run() 381 except Exception: 382 if debug: 383 traceback.print_exc() 384 elif sys.exc_info()[0] is not SystemExit: 385 print(sys.exc_info()[1]) 386