1#!/usr/bin/env python 2# @lint-avoid-python-3-compatibility-imports 3# 4# funclatency Time functions and print latency as a histogram. 5# For Linux, uses BCC, eBPF. 6# 7# USAGE: funclatency [-h] [-p PID] [-i INTERVAL] [-T] [-u] [-m] [-F] [-r] [-v] 8# pattern 9# 10# Run "funclatency -h" for full usage. 11# 12# The pattern is a string with optional '*' wildcards, similar to file 13# globbing. If you'd prefer to use regular expressions, use the -r option. 14# 15# Without the '-l' option, only the innermost calls will be recorded. 16# Use '-l LEVEL' to record the outermost n levels of nested/recursive functions. 17# 18# Copyright (c) 2015 Brendan Gregg. 19# Copyright (c) 2021 Chenyue Zhou. 20# Licensed under the Apache License, Version 2.0 (the "License") 21# 22# 20-Sep-2015 Brendan Gregg Created this. 23# 06-Oct-2016 Sasha Goldshtein Added user function support. 24# 14-Apr-2021 Chenyue Zhou Added nested or recursive function support. 25 26from __future__ import print_function 27from bcc import BPF 28from time import sleep, strftime 29import argparse 30import signal 31 32# arguments 33examples = """examples: 34 ./funclatency do_sys_open # time the do_sys_open() kernel function 35 ./funclatency c:read # time the read() C library function 36 ./funclatency -u vfs_read # time vfs_read(), in microseconds 37 ./funclatency -m do_nanosleep # time do_nanosleep(), in milliseconds 38 ./funclatency -i 2 -d 10 c:open # output every 2 seconds, for duration 10s 39 ./funclatency -mTi 5 vfs_read # output every 5 seconds, with timestamps 40 ./funclatency -p 181 vfs_read # time process 181 only 41 ./funclatency 'vfs_fstat*' # time both vfs_fstat() and vfs_fstatat() 42 ./funclatency 'c:*printf' # time the *printf family of functions 43 ./funclatency -F 'vfs_r*' # show one histogram per matched function 44""" 45parser = argparse.ArgumentParser( 46 description="Time functions and print latency as a histogram", 47 formatter_class=argparse.RawDescriptionHelpFormatter, 48 epilog=examples) 49parser.add_argument("-p", "--pid", type=int, 50 help="trace this PID only") 51parser.add_argument("-i", "--interval", type=int, 52 help="summary interval, in seconds") 53parser.add_argument("-d", "--duration", type=int, 54 help="total duration of trace, in seconds") 55parser.add_argument("-T", "--timestamp", action="store_true", 56 help="include timestamp on output") 57parser.add_argument("-u", "--microseconds", action="store_true", 58 help="microsecond histogram") 59parser.add_argument("-m", "--milliseconds", action="store_true", 60 help="millisecond histogram") 61parser.add_argument("-F", "--function", action="store_true", 62 help="show a separate histogram per function") 63parser.add_argument("-r", "--regexp", action="store_true", 64 help="use regular expressions. Default is \"*\" wildcards only.") 65parser.add_argument("-l", "--level", type=int, 66 help="set the level of nested or recursive functions") 67parser.add_argument("-v", "--verbose", action="store_true", 68 help="print the BPF program (for debugging purposes)") 69parser.add_argument("pattern", 70 help="search expression for functions") 71parser.add_argument("--ebpf", action="store_true", 72 help=argparse.SUPPRESS) 73args = parser.parse_args() 74if args.duration and not args.interval: 75 args.interval = args.duration 76if not args.interval: 77 args.interval = 99999999 78 79def bail(error): 80 print("Error: " + error) 81 exit(1) 82 83parts = args.pattern.split(':') 84if len(parts) == 1: 85 library = None 86 pattern = args.pattern 87elif len(parts) == 2: 88 library = parts[0] 89 libpath = BPF.find_library(library) or BPF.find_exe(library) 90 if not libpath: 91 bail("can't resolve library %s" % library) 92 library = libpath 93 pattern = parts[1] 94else: 95 bail("unrecognized pattern format '%s'" % args.pattern) 96 97if not args.regexp: 98 pattern = pattern.replace('*', '.*') 99 pattern = '^' + pattern + '$' 100 101# define BPF program 102bpf_text = """ 103#include <uapi/linux/ptrace.h> 104 105typedef struct ip_pid { 106 u64 ip; 107 u64 pid; 108} ip_pid_t; 109 110typedef struct hist_key { 111 ip_pid_t key; 112 u64 slot; 113} hist_key_t; 114 115TYPEDEF 116 117BPF_ARRAY(avg, u64, 2); 118STORAGE 119FUNCTION 120 121int trace_func_entry(struct pt_regs *ctx) 122{ 123 u64 pid_tgid = bpf_get_current_pid_tgid(); 124 u32 pid = pid_tgid; 125 u32 tgid = pid_tgid >> 32; 126 u64 ts = bpf_ktime_get_ns(); 127 128 FILTER 129 ENTRYSTORE 130 131 return 0; 132} 133 134int trace_func_return(struct pt_regs *ctx) 135{ 136 u64 *tsp, delta; 137 u64 pid_tgid = bpf_get_current_pid_tgid(); 138 u32 pid = pid_tgid; 139 u32 tgid = pid_tgid >> 32; 140 141 // calculate delta time 142 CALCULATE 143 144 u32 lat = 0; 145 u32 cnt = 1; 146 avg.atomic_increment(lat, delta); 147 avg.atomic_increment(cnt); 148 149 FACTOR 150 151 // store as histogram 152 STORE 153 154 return 0; 155} 156""" 157 158# do we need to store the IP and pid for each invocation? 159need_key = args.function or (library and not args.pid) 160 161# code substitutions 162if args.pid: 163 bpf_text = bpf_text.replace('FILTER', 164 'if (tgid != %d) { return 0; }' % args.pid) 165else: 166 bpf_text = bpf_text.replace('FILTER', '') 167if args.milliseconds: 168 bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000000;') 169 label = "msecs" 170elif args.microseconds: 171 bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000;') 172 label = "usecs" 173else: 174 bpf_text = bpf_text.replace('FACTOR', '') 175 label = "nsecs" 176if need_key: 177 pid = '-1' if not library else 'tgid' 178 179 if args.level and args.level > 1: 180 bpf_text = bpf_text.replace('TYPEDEF', 181 """ 182#define STACK_DEPTH %s 183 184typedef struct { 185 u64 ip; 186 u64 start_ts; 187} func_cache_t; 188 189/* LIFO */ 190typedef struct { 191 u32 head; 192 func_cache_t cache[STACK_DEPTH]; 193} func_stack_t; 194 """ % args.level) 195 196 bpf_text = bpf_text.replace('STORAGE', 197 """ 198BPF_HASH(func_stack, u32, func_stack_t); 199BPF_HISTOGRAM(dist, hist_key_t); 200 """) 201 202 bpf_text = bpf_text.replace('FUNCTION', 203 """ 204static inline int stack_pop(func_stack_t *stack, func_cache_t *cache) { 205 if (stack->head <= 0) { 206 return -1; 207 } 208 209 u32 index = --stack->head; 210 if (index < STACK_DEPTH) { 211 /* bound check */ 212 cache->ip = stack->cache[index].ip; 213 cache->start_ts = stack->cache[index].start_ts; 214 } 215 216 return 0; 217} 218 219static inline int stack_push(func_stack_t *stack, func_cache_t *cache) { 220 u32 index = stack->head; 221 222 if (index > STACK_DEPTH - 1) { 223 /* bound check */ 224 return -1; 225 } 226 227 stack->head++; 228 stack->cache[index].ip = cache->ip; 229 stack->cache[index].start_ts = cache->start_ts; 230 231 return 0; 232} 233 """) 234 235 bpf_text = bpf_text.replace('ENTRYSTORE', 236 """ 237 u64 ip = PT_REGS_IP(ctx); 238 func_cache_t cache = { 239 .ip = ip, 240 .start_ts = ts, 241 }; 242 243 func_stack_t *stack = func_stack.lookup(&pid); 244 if (!stack) { 245 func_stack_t new_stack = { 246 .head = 0, 247 }; 248 249 if (!stack_push(&new_stack, &cache)) { 250 func_stack.update(&pid, &new_stack); 251 } 252 253 return 0; 254 } 255 256 if (!stack_push(stack, &cache)) { 257 func_stack.update(&pid, stack); 258 } 259 """) 260 261 bpf_text = bpf_text.replace('CALCULATE', 262 """ 263 u64 ip, start_ts; 264 func_stack_t *stack = func_stack.lookup(&pid); 265 if (!stack) { 266 /* miss start */ 267 return 0; 268 } 269 270 func_cache_t cache = {}; 271 if (stack_pop(stack, &cache)) { 272 func_stack.delete(&pid); 273 274 return 0; 275 } 276 ip = cache.ip; 277 start_ts = cache.start_ts; 278 delta = bpf_ktime_get_ns() - start_ts; 279 """) 280 281 bpf_text = bpf_text.replace('STORE', 282 """ 283 hist_key_t key; 284 key.key.ip = ip; 285 key.key.pid = %s; 286 key.slot = bpf_log2l(delta); 287 dist.atomic_increment(key); 288 289 if (stack->head == 0) { 290 /* empty */ 291 func_stack.delete(&pid); 292 } 293 """ % pid) 294 295 else: 296 bpf_text = bpf_text.replace('STORAGE', 'BPF_HASH(ipaddr, u32);\n'\ 297 'BPF_HISTOGRAM(dist, hist_key_t);\n'\ 298 'BPF_HASH(start, u32);') 299 # stash the IP on entry, as on return it's kretprobe_trampoline: 300 bpf_text = bpf_text.replace('ENTRYSTORE', 301 'u64 ip = PT_REGS_IP(ctx); ipaddr.update(&pid, &ip);'\ 302 ' start.update(&pid, &ts);') 303 bpf_text = bpf_text.replace('STORE', 304 """ 305 u64 ip, *ipp = ipaddr.lookup(&pid); 306 if (ipp) { 307 ip = *ipp; 308 hist_key_t key; 309 key.key.ip = ip; 310 key.key.pid = %s; 311 key.slot = bpf_log2l(delta); 312 dist.atomic_increment(key); 313 ipaddr.delete(&pid); 314 } 315 """ % pid) 316else: 317 bpf_text = bpf_text.replace('STORAGE', 'BPF_HISTOGRAM(dist);\n'\ 318 'BPF_HASH(start, u32);') 319 bpf_text = bpf_text.replace('ENTRYSTORE', 'start.update(&pid, &ts);') 320 bpf_text = bpf_text.replace('STORE', 321 'dist.atomic_increment(bpf_log2l(delta));') 322 323bpf_text = bpf_text.replace('TYPEDEF', '') 324bpf_text = bpf_text.replace('FUNCTION', '') 325bpf_text = bpf_text.replace('CALCULATE', 326 """ 327 tsp = start.lookup(&pid); 328 if (tsp == 0) { 329 return 0; // missed start 330 } 331 delta = bpf_ktime_get_ns() - *tsp; 332 start.delete(&pid); 333 """) 334 335if args.verbose or args.ebpf: 336 print(bpf_text) 337 if args.ebpf: 338 exit() 339 340# signal handler 341def signal_ignore(signal, frame): 342 print() 343 344# load BPF program 345b = BPF(text=bpf_text) 346 347# attach probes 348if not library: 349 b.attach_kprobe(event_re=pattern, fn_name="trace_func_entry") 350 b.attach_kretprobe(event_re=pattern, fn_name="trace_func_return") 351 matched = b.num_open_kprobes() 352else: 353 b.attach_uprobe(name=library, sym_re=pattern, fn_name="trace_func_entry", 354 pid=args.pid or -1) 355 b.attach_uretprobe(name=library, sym_re=pattern, 356 fn_name="trace_func_return", pid=args.pid or -1) 357 matched = b.num_open_uprobes() 358 359if matched == 0: 360 print("0 functions matched by \"%s\". Exiting." % args.pattern) 361 exit() 362 363# header 364print("Tracing %d functions for \"%s\"... Hit Ctrl-C to end." % 365 (matched / 2, args.pattern)) 366 367# output 368def print_section(key): 369 if not library: 370 return BPF.sym(key[0], -1).decode('utf-8', 'replace') 371 else: 372 return "%s [%d]" % (BPF.sym(key[0], key[1]).decode('utf-8', 'replace'), key[1]) 373 374exiting = 0 if args.interval else 1 375seconds = 0 376dist = b.get_table("dist") 377while (1): 378 try: 379 sleep(args.interval) 380 seconds += args.interval 381 except KeyboardInterrupt: 382 exiting = 1 383 # as cleanup can take many seconds, trap Ctrl-C: 384 signal.signal(signal.SIGINT, signal_ignore) 385 if args.duration and seconds >= args.duration: 386 exiting = 1 387 388 print() 389 if args.timestamp: 390 print("%-8s\n" % strftime("%H:%M:%S"), end="") 391 392 if need_key: 393 dist.print_log2_hist(label, "Function", section_print_fn=print_section, 394 bucket_fn=lambda k: (k.ip, k.pid)) 395 else: 396 dist.print_log2_hist(label) 397 398 total = b['avg'][0].value 399 counts = b['avg'][1].value 400 if counts > 0: 401 if label == 'msecs': 402 total /= 1000000 403 elif label == 'usecs': 404 total /= 1000 405 avg = total/counts 406 print("\navg = %ld %s, total: %ld %s, count: %ld\n" %(total/counts, label, total, label, counts)) 407 408 dist.clear() 409 b['avg'].clear() 410 411 if exiting: 412 print("Detaching...") 413 exit() 414