1#!/usr/bin/env python 2# @lint-avoid-python-3-compatibility-imports 3# 4# nfsdist Summarize NFS operation latency 5# for Linux, uses BCC and eBPF 6# 7# USAGE: nfsdist [-h] [-T] [-m] [-p PID] [interval] [count] 8# 9# 4-Sep-2017 Samuel Nair created this 10 11from __future__ import print_function 12from bcc import BPF 13from time import sleep, strftime 14import argparse 15 16# arguments 17examples = """examples: 18 ./nfsdist # show operation latency as a histogram 19 ./nfsdist -p 181 # trace PID 181 only 20 ./nfsdist 1 10 # print 1 second summaries, 10 times 21 ./nfsdist -m 5 # 5s summaries, milliseconds 22""" 23parser = argparse.ArgumentParser( 24 description="Summarize NFS operation latency", 25 formatter_class=argparse.RawDescriptionHelpFormatter, 26 epilog=examples) 27parser.add_argument("-T", "--notimestamp", action="store_true", 28 help="don't include timestamp on interval output") 29parser.add_argument("-m", "--milliseconds", action="store_true", 30 help="output in milliseconds") 31parser.add_argument("-p", "--pid", 32 help="trace this PID only") 33parser.add_argument("interval", nargs="?", 34 help="output interval, in seconds") 35parser.add_argument("count", nargs="?", default=99999999, 36 help="number of outputs") 37parser.add_argument("--ebpf", action="store_true", 38 help=argparse.SUPPRESS) 39args = parser.parse_args() 40pid = args.pid 41countdown = int(args.count) 42if args.milliseconds: 43 factor = 1000000 44 label = "msecs" 45else: 46 factor = 1000 47 label = "usecs" 48 if args.interval and int(args.interval) == 0: 49 print("ERROR: interval 0. Exiting.") 50 exit() 51debug = 0 52 53# define BPF program 54bpf_text = """ 55#include <uapi/linux/ptrace.h> 56#include <linux/fs.h> 57#include <linux/sched.h> 58 59#define OP_NAME_LEN 8 60typedef struct dist_key { 61 char op[OP_NAME_LEN]; 62 u64 slot; 63} dist_key_t; 64 65BPF_HASH(start, u32); 66BPF_HISTOGRAM(dist, dist_key_t); 67 68// time operation 69int trace_entry(struct pt_regs *ctx) 70{ 71 u64 pid_tgid = bpf_get_current_pid_tgid(); 72 u32 pid = pid_tgid >> 32; 73 u32 tid = (u32)pid_tgid; 74 75 if (FILTER_PID) 76 return 0; 77 u64 ts = bpf_ktime_get_ns(); 78 start.update(&tid, &ts); 79 return 0; 80} 81 82static int trace_return(struct pt_regs *ctx, const char *op) 83{ 84 u64 *tsp; 85 u64 pid_tgid = bpf_get_current_pid_tgid(); 86 u32 pid = pid_tgid >> 32; 87 u32 tid = (u32)pid_tgid; 88 89 // fetch timestamp and calculate delta 90 tsp = start.lookup(&tid); 91 if (tsp == 0) { 92 return 0; // missed start or filtered 93 } 94 u64 delta = (bpf_ktime_get_ns() - *tsp) / FACTOR; 95 96 // store as histogram 97 dist_key_t key = {.slot = bpf_log2l(delta)}; 98 __builtin_memcpy(&key.op, op, sizeof(key.op)); 99 dist.atomic_increment(key); 100 101 start.delete(&tid); 102 return 0; 103} 104 105int trace_read_return(struct pt_regs *ctx) 106{ 107 char *op = "read"; 108 return trace_return(ctx, op); 109} 110 111int trace_write_return(struct pt_regs *ctx) 112{ 113 char *op = "write"; 114 return trace_return(ctx, op); 115} 116 117int trace_open_return(struct pt_regs *ctx) 118{ 119 char *op = "open"; 120 return trace_return(ctx, op); 121} 122 123int trace_getattr_return(struct pt_regs *ctx) 124{ 125 char *op = "getattr"; 126 return trace_return(ctx, op); 127} 128""" 129bpf_text = bpf_text.replace('FACTOR', str(factor)) 130if args.pid: 131 bpf_text = bpf_text.replace('FILTER_PID', 'pid != %s' % pid) 132else: 133 bpf_text = bpf_text.replace('FILTER_PID', '0') 134if debug or args.ebpf: 135 print(bpf_text) 136 if args.ebpf: 137 exit() 138 139# load BPF program 140b = BPF(text=bpf_text) 141 142# common file functions 143b.attach_kprobe(event="nfs_file_read", fn_name="trace_entry") 144b.attach_kprobe(event="nfs_file_write", fn_name="trace_entry") 145b.attach_kprobe(event="nfs_file_open", fn_name="trace_entry") 146b.attach_kprobe(event="nfs_getattr", fn_name="trace_entry") 147 148b.attach_kretprobe(event="nfs_file_read", fn_name="trace_read_return") 149b.attach_kretprobe(event="nfs_file_write", fn_name="trace_write_return") 150b.attach_kretprobe(event="nfs_file_open", fn_name="trace_open_return") 151b.attach_kretprobe(event="nfs_getattr", fn_name="trace_getattr_return") 152 153if BPF.get_kprobe_functions(b'nfs4_file_open'): 154 b.attach_kprobe(event="nfs4_file_open", fn_name="trace_entry") 155 b.attach_kretprobe(event="nfs4_file_open", fn_name="trace_open_return") 156else: 157 b.attach_kprobe(event="nfs_file_open", fn_name="trace_entry") 158 b.attach_kretprobe(event="nfs_file_open", fn_name="trace_open_return") 159 160print("Tracing NFS operation latency... Hit Ctrl-C to end.") 161 162# output 163exiting = 0 164dist = b.get_table("dist") 165while (1): 166 try: 167 if args.interval: 168 sleep(int(args.interval)) 169 else: 170 sleep(99999999) 171 except KeyboardInterrupt: 172 exiting = 1 173 174 print() 175 if args.interval and (not args.notimestamp): 176 print(strftime("%H:%M:%S:")) 177 178 dist.print_log2_hist(label, "operation", section_print_fn=bytes.decode) 179 dist.clear() 180 181 countdown -= 1 182 if exiting or countdown == 0: 183 exit() 184