1#!/usr/bin/env python 2# 3# wakeuptime Summarize sleep to wakeup time by waker kernel stack 4# For Linux, uses BCC, eBPF. 5# 6# USAGE: wakeuptime [-h] [-u] [-p PID] [-v] [-f] [duration] 7# 8# Copyright 2016 Netflix, Inc. 9# Licensed under the Apache License, Version 2.0 (the "License") 10# 11# 14-Jan-2016 Brendan Gregg Created this. 12# 03-Apr-2023 Rocky Xing Modified the order of stack output. 13# 04-Apr-2023 Rocky Xing Updated default stack storage size. 14 15from __future__ import print_function 16from bcc import BPF 17from bcc.utils import printb 18from time import sleep 19import argparse 20import signal 21import errno 22from sys import stderr 23 24# arg validation 25def positive_int(val): 26 try: 27 ival = int(val) 28 except ValueError: 29 raise argparse.ArgumentTypeError("must be an integer") 30 31 if ival < 0: 32 raise argparse.ArgumentTypeError("must be positive") 33 return ival 34 35def positive_nonzero_int(val): 36 ival = positive_int(val) 37 if ival == 0: 38 raise argparse.ArgumentTypeError("must be nonzero") 39 return ival 40 41# arguments 42examples = """examples: 43 ./wakeuptime # trace blocked time with waker stacks 44 ./wakeuptime 5 # trace for 5 seconds only 45 ./wakeuptime -f 5 # 5 seconds, and output in folded format 46 ./wakeuptime -u # don't include kernel threads (user only) 47 ./wakeuptime -p 185 # trace for PID 185 only 48""" 49parser = argparse.ArgumentParser( 50 description="Summarize sleep to wakeup time by waker kernel stack", 51 formatter_class=argparse.RawDescriptionHelpFormatter, 52 epilog=examples) 53parser.add_argument("-u", "--useronly", action="store_true", 54 help="user threads only (no kernel threads)") 55parser.add_argument("-p", "--pid", 56 type=positive_int, 57 help="trace this PID only") 58parser.add_argument("-v", "--verbose", action="store_true", 59 help="show raw addresses") 60parser.add_argument("-f", "--folded", action="store_true", 61 help="output folded format") 62parser.add_argument("--stack-storage-size", default=16384, 63 type=positive_nonzero_int, 64 help="the number of unique stack traces that can be stored and " 65 "displayed (default 16384)") 66parser.add_argument("duration", nargs="?", default=99999999, 67 type=positive_nonzero_int, 68 help="duration of trace, in seconds") 69parser.add_argument("-m", "--min-block-time", default=1, 70 type=positive_nonzero_int, 71 help="the amount of time in microseconds over which we " + 72 "store traces (default 1)") 73parser.add_argument("-M", "--max-block-time", default=(1 << 64) - 1, 74 type=positive_nonzero_int, 75 help="the amount of time in microseconds under which we " + 76 "store traces (default U64_MAX)") 77parser.add_argument("--ebpf", action="store_true", 78 help=argparse.SUPPRESS) 79args = parser.parse_args() 80folded = args.folded 81duration = int(args.duration) 82debug = 0 83if args.pid and args.useronly: 84 parser.error("use either -p or -u.") 85 86# signal handler 87def signal_ignore(signal, frame): 88 print() 89 90# define BPF program 91bpf_text = """ 92#include <uapi/linux/ptrace.h> 93#include <linux/sched.h> 94 95#define MINBLOCK_US MINBLOCK_US_VALUEULL 96#define MAXBLOCK_US MAXBLOCK_US_VALUEULL 97 98struct key_t { 99 int w_k_stack_id; 100 char waker[TASK_COMM_LEN]; 101 char target[TASK_COMM_LEN]; 102}; 103BPF_HASH(counts, struct key_t); 104BPF_HASH(start, u32); 105BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE); 106 107static int offcpu_sched_switch() { 108 u64 pid_tgid = bpf_get_current_pid_tgid(); 109 u32 pid = pid_tgid >> 32; 110 u32 tid = (u32)pid_tgid; 111 struct task_struct *p = (struct task_struct *) bpf_get_current_task(); 112 u64 ts; 113 114 if (FILTER) 115 return 0; 116 117 ts = bpf_ktime_get_ns(); 118 start.update(&tid, &ts); 119 return 0; 120} 121 122static int wakeup(ARG0, struct task_struct *p) { 123 u32 pid = p->tgid; 124 u32 tid = p->pid; 125 u64 delta, *tsp, ts; 126 127 tsp = start.lookup(&tid); 128 if (tsp == 0) 129 return 0; // missed start 130 start.delete(&tid); 131 132 if (FILTER) 133 return 0; 134 135 // calculate delta time 136 delta = bpf_ktime_get_ns() - *tsp; 137 delta = delta / 1000; 138 if ((delta < MINBLOCK_US) || (delta > MAXBLOCK_US)) 139 return 0; 140 141 struct key_t key = {}; 142 143 key.w_k_stack_id = stack_traces.get_stackid(ctx, 0); 144 bpf_probe_read_kernel(&key.target, sizeof(key.target), p->comm); 145 bpf_get_current_comm(&key.waker, sizeof(key.waker)); 146 147 counts.atomic_increment(key, delta); 148 return 0; 149} 150""" 151 152bpf_text_kprobe = """ 153int offcpu(struct pt_regs *ctx) { 154 return offcpu_sched_switch(); 155} 156 157int waker(struct pt_regs *ctx, struct task_struct *p) { 158 return wakeup(ctx, p); 159} 160""" 161 162bpf_text_raw_tp = """ 163RAW_TRACEPOINT_PROBE(sched_switch) 164{ 165 // TP_PROTO(bool preempt, struct task_struct *prev, struct task_struct *next) 166 return offcpu_sched_switch(); 167} 168 169RAW_TRACEPOINT_PROBE(sched_wakeup) 170{ 171 // TP_PROTO(struct task_struct *p) 172 struct task_struct *p = (struct task_struct *)ctx->args[0]; 173 return wakeup(ctx, p); 174} 175""" 176 177is_supported_raw_tp = BPF.support_raw_tracepoint() 178if is_supported_raw_tp: 179 bpf_text += bpf_text_raw_tp 180else: 181 bpf_text += bpf_text_kprobe 182 183if args.pid: 184 filter = 'pid != %s' % args.pid 185elif args.useronly: 186 filter = 'p->flags & PF_KTHREAD' 187else: 188 filter = '0' 189bpf_text = bpf_text.replace('FILTER', filter) 190 191if is_supported_raw_tp: 192 arg0 = 'struct bpf_raw_tracepoint_args *ctx' 193else: 194 arg0 = 'struct pt_regs *ctx' 195bpf_text = bpf_text.replace('ARG0', arg0) 196 197# set stack storage size 198bpf_text = bpf_text.replace('STACK_STORAGE_SIZE', str(args.stack_storage_size)) 199bpf_text = bpf_text.replace('MINBLOCK_US_VALUE', str(args.min_block_time)) 200bpf_text = bpf_text.replace('MAXBLOCK_US_VALUE', str(args.max_block_time)) 201 202if debug or args.ebpf: 203 print(bpf_text) 204 if args.ebpf: 205 exit() 206 207# initialize BPF 208b = BPF(text=bpf_text) 209if not is_supported_raw_tp: 210 b.attach_kprobe(event="schedule", fn_name="offcpu") 211 b.attach_kprobe(event="try_to_wake_up", fn_name="waker") 212 matched = b.num_open_kprobes() 213 if matched == 0: 214 print("0 functions traced. Exiting.") 215 exit() 216 217# header 218if not folded: 219 print("Tracing blocked time (us) by kernel stack", end="") 220 if duration < 99999999: 221 print(" for %d secs." % duration) 222 else: 223 print("... Hit Ctrl-C to end.") 224 225# output 226while (1): 227 try: 228 sleep(duration) 229 except KeyboardInterrupt: 230 # as cleanup can take many seconds, trap Ctrl-C: 231 signal.signal(signal.SIGINT, signal_ignore) 232 233 if not folded: 234 print() 235 missing_stacks = 0 236 has_enomem = False 237 counts = b.get_table("counts") 238 stack_traces = b.get_table("stack_traces") 239 for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): 240 # handle get_stackid errors 241 # check for an ENOMEM error 242 if k.w_k_stack_id == -errno.ENOMEM: 243 missing_stacks += 1 244 continue 245 246 waker_kernel_stack = [] if k.w_k_stack_id < 1 else \ 247 list(stack_traces.walk(k.w_k_stack_id))[1:] 248 249 if folded: 250 # print folded stack output 251 line = \ 252 [k.waker] + \ 253 [b.ksym(addr) 254 for addr in reversed(waker_kernel_stack)] + \ 255 [k.target] 256 printb(b"%s %d" % (b";".join(line), v.value)) 257 else: 258 # print default multi-line stack output 259 printb(b" %-16s %s" % (b"target:", k.target)) 260 for addr in waker_kernel_stack: 261 printb(b" %-16x %s" % (addr, b.ksym(addr))) 262 printb(b" %-16s %s" % (b"waker:", k.waker)) 263 print(" %d\n" % v.value) 264 counts.clear() 265 266 if missing_stacks > 0: 267 enomem_str = " Consider increasing --stack-storage-size." 268 print("WARNING: %d stack traces could not be displayed.%s" % 269 (missing_stacks, enomem_str), 270 file=stderr) 271 272 if not folded: 273 print("Detaching...") 274 exit() 275