1#!/usr/bin/env python 2# @lint-avoid-python-3-compatibility-imports 3# 4# btrfsdist Summarize btrfs operation latency. 5# For Linux, uses BCC, eBPF. 6# 7# USAGE: btrfsdist [-h] [-T] [-m] [-p PID] [interval] [count] 8# 9# Copyright 2016 Netflix, Inc. 10# Licensed under the Apache License, Version 2.0 (the "License") 11# 12# 15-Feb-2016 Brendan Gregg Created this. 13 14from __future__ import print_function 15from bcc import BPF 16from time import sleep, strftime 17import argparse 18 19# symbols 20kallsyms = "/proc/kallsyms" 21 22# arguments 23examples = """examples: 24 ./btrfsdist # show operation latency as a histogram 25 ./btrfsdist -p 181 # trace PID 181 only 26 ./btrfsdist 1 10 # print 1 second summaries, 10 times 27 ./btrfsdist -m 5 # 5s summaries, milliseconds 28""" 29parser = argparse.ArgumentParser( 30 description="Summarize btrfs operation latency", 31 formatter_class=argparse.RawDescriptionHelpFormatter, 32 epilog=examples) 33parser.add_argument("-T", "--notimestamp", action="store_true", 34 help="don't include timestamp on interval output") 35parser.add_argument("-m", "--milliseconds", action="store_true", 36 help="output in milliseconds") 37parser.add_argument("-p", "--pid", 38 help="trace this PID only") 39parser.add_argument("interval", nargs="?", 40 help="output interval, in seconds") 41parser.add_argument("count", nargs="?", default=99999999, 42 help="number of outputs") 43parser.add_argument("--ebpf", action="store_true", 44 help=argparse.SUPPRESS) 45args = parser.parse_args() 46pid = args.pid 47countdown = int(args.count) 48if args.milliseconds: 49 factor = 1000000 50 label = "msecs" 51else: 52 factor = 1000 53 label = "usecs" 54if args.interval and int(args.interval) == 0: 55 print("ERROR: interval 0. Exiting.") 56 exit() 57debug = 0 58 59# define BPF program 60bpf_text = """ 61#include <uapi/linux/ptrace.h> 62#include <linux/fs.h> 63#include <linux/sched.h> 64 65#define OP_NAME_LEN 8 66typedef struct dist_key { 67 char op[OP_NAME_LEN]; 68 u64 slot; 69} dist_key_t; 70BPF_HASH(start, u32); 71BPF_HISTOGRAM(dist, dist_key_t); 72 73// time operation 74int trace_entry(struct pt_regs *ctx) 75{ 76 u64 pid_tgid = bpf_get_current_pid_tgid(); 77 u32 pid = pid_tgid >> 32; 78 u32 tid = (u32)pid_tgid; 79 80 if (FILTER_PID) 81 return 0; 82 u64 ts = bpf_ktime_get_ns(); 83 start.update(&tid, &ts); 84 return 0; 85} 86 87// The current btrfs (Linux 4.5) uses generic_file_read_iter() instead of it's 88// own read function. So we need to trace that and then filter on btrfs, which 89// I do by checking file->f_op. 90int trace_read_entry(struct pt_regs *ctx, struct kiocb *iocb) 91{ 92 u64 pid_tgid = bpf_get_current_pid_tgid(); 93 u32 pid = pid_tgid >> 32; 94 u32 tid = (u32)pid_tgid; 95 96 if (FILTER_PID) 97 return 0; 98 99 // btrfs filter on file->f_op == btrfs_file_operations 100 struct file *fp = iocb->ki_filp; 101 if ((u64)fp->f_op != BTRFS_FILE_OPERATIONS) 102 return 0; 103 104 u64 ts = bpf_ktime_get_ns(); 105 start.update(&tid, &ts); 106 return 0; 107} 108 109// The current btrfs (Linux 4.5) uses generic_file_open(), instead of it's own 110// function. Same as with reads. Trace the generic path and filter: 111int trace_open_entry(struct pt_regs *ctx, struct inode *inode, 112 struct file *file) 113{ 114 u64 pid_tgid = bpf_get_current_pid_tgid(); 115 u32 pid = pid_tgid >> 32; 116 u32 tid = (u32)pid_tgid; 117 118 if (FILTER_PID) 119 return 0; 120 121 // btrfs filter on file->f_op == btrfs_file_operations 122 if ((u64)file->f_op != BTRFS_FILE_OPERATIONS) 123 return 0; 124 125 u64 ts = bpf_ktime_get_ns(); 126 start.update(&tid, &ts); 127 return 0; 128} 129 130static int trace_return(struct pt_regs *ctx, const char *op) 131{ 132 u64 *tsp; 133 u64 pid_tgid = bpf_get_current_pid_tgid(); 134 u32 pid = pid_tgid >> 32; 135 u32 tid = (u32)pid_tgid; 136 137 // fetch timestamp and calculate delta 138 tsp = start.lookup(&tid); 139 if (tsp == 0) { 140 return 0; // missed start or filtered 141 } 142 u64 delta = (bpf_ktime_get_ns() - *tsp) / FACTOR; 143 144 // store as histogram 145 dist_key_t key = {.slot = bpf_log2l(delta)}; 146 __builtin_memcpy(&key.op, op, sizeof(key.op)); 147 dist.atomic_increment(key); 148 149 start.delete(&tid); 150 return 0; 151} 152 153int trace_read_return(struct pt_regs *ctx) 154{ 155 char *op = "read"; 156 return trace_return(ctx, op); 157} 158 159int trace_write_return(struct pt_regs *ctx) 160{ 161 char *op = "write"; 162 return trace_return(ctx, op); 163} 164 165int trace_open_return(struct pt_regs *ctx) 166{ 167 char *op = "open"; 168 return trace_return(ctx, op); 169} 170 171int trace_fsync_return(struct pt_regs *ctx) 172{ 173 char *op = "fsync"; 174 return trace_return(ctx, op); 175} 176""" 177 178# code replacements 179with open(kallsyms) as syms: 180 ops = '' 181 for line in syms: 182 a = line.rstrip().split() 183 (addr, name) = (a[0], a[2]) 184 name = name.split("\t")[0] 185 if name == "btrfs_file_operations": 186 ops = "0x" + addr 187 break 188 if ops == '': 189 print("ERROR: no btrfs_file_operations in /proc/kallsyms. Exiting.") 190 print("HINT: the kernel should be built with CONFIG_KALLSYMS_ALL.") 191 exit() 192 bpf_text = bpf_text.replace('BTRFS_FILE_OPERATIONS', ops) 193bpf_text = bpf_text.replace('FACTOR', str(factor)) 194if args.pid: 195 bpf_text = bpf_text.replace('FILTER_PID', 'pid != %s' % pid) 196else: 197 bpf_text = bpf_text.replace('FILTER_PID', '0') 198if debug or args.ebpf: 199 print(bpf_text) 200 if args.ebpf: 201 exit() 202 203# load BPF program 204b = BPF(text=bpf_text) 205 206# Common file functions. See earlier comment about generic_file_read_iter(). 207b.attach_kprobe(event="generic_file_read_iter", fn_name="trace_read_entry") 208b.attach_kprobe(event="btrfs_file_write_iter", fn_name="trace_entry") 209b.attach_kprobe(event="generic_file_open", fn_name="trace_open_entry") 210b.attach_kprobe(event="btrfs_sync_file", fn_name="trace_entry") 211b.attach_kretprobe(event="generic_file_read_iter", fn_name="trace_read_return") 212b.attach_kretprobe(event="btrfs_file_write_iter", fn_name="trace_write_return") 213b.attach_kretprobe(event="generic_file_open", fn_name="trace_open_return") 214b.attach_kretprobe(event="btrfs_sync_file", fn_name="trace_fsync_return") 215 216print("Tracing btrfs operation latency... Hit Ctrl-C to end.") 217 218# output 219exiting = 0 220dist = b.get_table("dist") 221while (1): 222 try: 223 if args.interval: 224 sleep(int(args.interval)) 225 else: 226 sleep(99999999) 227 except KeyboardInterrupt: 228 exiting = 1 229 230 print() 231 if args.interval and (not args.notimestamp): 232 print(strftime("%H:%M:%S:")) 233 234 dist.print_log2_hist(label, "operation", section_print_fn=bytes.decode) 235 dist.clear() 236 237 countdown -= 1 238 if exiting or countdown == 0: 239 exit() 240