1#!/usr/bin/python 2# 3# wakeuptime Summarize sleep to wakeup time by waker kernel stack 4# For Linux, uses BCC, eBPF. 5# 6# USAGE: wakeuptime [-h] [-u] [-p PID] [-v] [-f] [duration] 7# 8# The current implementation uses an unrolled loop for x86_64, and was written 9# as a proof of concept. This implementation should be replaced in the future 10# with an appropriate bpf_ call, when available. 11# 12# Currently limited to a stack trace depth of 21 (maxdepth + 1). 13# 14# Copyright 2016 Netflix, Inc. 15# Licensed under the Apache License, Version 2.0 (the "License") 16# 17# 14-Jan-2016 Brendan Gregg Created this. 18 19from __future__ import print_function 20from bcc import BPF 21from time import sleep, strftime 22import argparse 23import signal 24 25# arguments 26examples = """examples: 27 ./wakeuptime # trace blocked time with waker stacks 28 ./wakeuptime 5 # trace for 5 seconds only 29 ./wakeuptime -f 5 # 5 seconds, and output in folded format 30 ./wakeuptime -u # don't include kernel threads (user only) 31 ./wakeuptime -p 185 # trace fo PID 185 only 32""" 33parser = argparse.ArgumentParser( 34 description="Summarize sleep to wakeup time by waker kernel stack", 35 formatter_class=argparse.RawDescriptionHelpFormatter, 36 epilog=examples) 37parser.add_argument("-u", "--useronly", action="store_true", 38 help="user threads only (no kernel threads)") 39parser.add_argument("-p", "--pid", 40 help="trace this PID only") 41parser.add_argument("-v", "--verbose", action="store_true", 42 help="show raw addresses") 43parser.add_argument("-f", "--folded", action="store_true", 44 help="output folded format") 45parser.add_argument("duration", nargs="?", default=99999999, 46 help="duration of trace, in seconds") 47args = parser.parse_args() 48folded = args.folded 49duration = int(args.duration) 50debug = 0 51maxdepth = 20 # and MAXDEPTH 52if args.pid and args.useronly: 53 print("ERROR: use either -p or -u.") 54 exit() 55 56# signal handler 57def signal_ignore(signal, frame): 58 print() 59 60# define BPF program 61bpf_text = """ 62#include <uapi/linux/ptrace.h> 63#include <linux/sched.h> 64 65#define MAXDEPTH 20 66#define MINBLOCK_US 1 67 68struct key_t { 69 char waker[TASK_COMM_LEN]; 70 char target[TASK_COMM_LEN]; 71 // Skip saving the ip 72 u64 ret[MAXDEPTH]; 73}; 74BPF_HASH(counts, struct key_t); 75BPF_HASH(start, u32); 76 77static u64 get_frame(u64 *bp) { 78 if (*bp) { 79 // The following stack walker is x86_64/arm64 specific 80 u64 ret = 0; 81 if (bpf_probe_read(&ret, sizeof(ret), (void *)(*bp+8))) 82 return 0; 83 if (bpf_probe_read(bp, sizeof(*bp), (void *)*bp)) 84 return 0; 85#ifdef __x86_64__ 86 if (ret < __START_KERNEL_map) 87#elif __aarch64__ 88 if (ret < VA_START) 89#else 90#error "Unsupported architecture for stack walker" 91#endif 92 return 0; 93 return ret; 94 } 95 return 0; 96} 97 98int offcpu(struct pt_regs *ctx) { 99 u32 pid = bpf_get_current_pid_tgid(); 100 u64 ts = bpf_ktime_get_ns(); 101 // XXX: should filter here too, but need task_struct 102 start.update(&pid, &ts); 103 return 0; 104} 105 106int waker(struct pt_regs *ctx, struct task_struct *p) { 107 u32 pid = p->pid; 108 u64 delta, *tsp, ts; 109 110 tsp = start.lookup(&pid); 111 if (tsp == 0) 112 return 0; // missed start 113 start.delete(&pid); 114 115 if (FILTER) 116 return 0; 117 118 // calculate delta time 119 delta = bpf_ktime_get_ns() - *tsp; 120 delta = delta / 1000; 121 if (delta < MINBLOCK_US) 122 return 0; 123 124 struct key_t key = {}; 125 u64 zero = 0, *val, bp = 0; 126 int depth = 0; 127 128 bpf_probe_read(&key.target, sizeof(key.target), p->comm); 129 bpf_get_current_comm(&key.waker, sizeof(key.waker)); 130 bp = PT_REGS_FP(ctx); 131 132 // unrolled loop (MAXDEPTH): 133 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 134 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 135 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 136 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 137 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 138 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 139 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 140 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 141 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 142 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 143 144 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 145 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 146 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 147 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 148 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 149 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 150 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 151 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 152 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 153 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 154 155out: 156 val = counts.lookup_or_init(&key, &zero); 157 if (val) { 158 (*val) += delta; 159 } 160 return 0; 161} 162""" 163if args.pid: 164 filter = 'pid != %s' % args.pid 165elif args.useronly: 166 filter = 'p->flags & PF_KTHREAD' 167else: 168 filter = '0' 169bpf_text = bpf_text.replace('FILTER', filter) 170if debug: 171 print(bpf_text) 172 173# initialize BPF 174b = BPF(text=bpf_text) 175b.attach_kprobe(event="schedule", fn_name="offcpu") 176b.attach_kprobe(event="try_to_wake_up", fn_name="waker") 177matched = b.num_open_kprobes() 178if matched == 0: 179 print("0 functions traced. Exiting.") 180 exit() 181 182# header 183if not folded: 184 print("Tracing blocked time (us) by kernel stack", end="") 185 if duration < 99999999: 186 print(" for %d secs." % duration) 187 else: 188 print("... Hit Ctrl-C to end.") 189 190# output 191while (1): 192 try: 193 sleep(duration) 194 except KeyboardInterrupt: 195 # as cleanup can take many seconds, trap Ctrl-C: 196 signal.signal(signal.SIGINT, signal_ignore) 197 198 if not folded: 199 print() 200 counts = b.get_table("counts") 201 for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): 202 if folded: 203 # print folded stack output 204 line = k.waker.decode('utf-8', 'replace') + ";" 205 for i in reversed(range(0, maxdepth)): 206 if k.ret[i] == 0: 207 continue 208 line = line + b.ksym(k.ret[i]) 209 if i != 0: 210 line = line + ";" 211 print("%s;%s %d" % (line, k.target.decode('utf-8', 'replace'), v.value)) 212 else: 213 # print default multi-line stack output 214 print(" %-16s %s" % ("target:", k.target.decode('utf-8', 'replace'))) 215 for i in range(0, maxdepth): 216 if k.ret[i] == 0: 217 break 218 print(" %-16x %s" % (k.ret[i], 219 b.ksym(k.ret[i]))) 220 print(" %-16s %s" % ("waker:", k.waker.decode('utf-8', 'replace'))) 221 print(" %d\n" % v.value) 222 counts.clear() 223 224 if not folded: 225 print("Detaching...") 226 exit() 227