1#!/usr/bin/env python 2# @lint-avoid-python-3-compatibility-imports 3# 4# cpudist Summarize on- and off-CPU time per task as a histogram. 5# 6# USAGE: cpudist [-h] [-O] [-T] [-m] [-P] [-L] [-p PID] [-I] [-e] [interval] [count] 7# 8# This measures the time a task spends on or off the CPU, and shows this time 9# as a histogram, optionally per-process. 10# 11# By default CPU idle time are excluded by simply excluding PID 0. 12# 13# Copyright 2016 Sasha Goldshtein 14# Licensed under the Apache License, Version 2.0 (the "License") 15# 16# 27-Mar-2022 Rocky Xing Changed to exclude CPU idle time by default. 17# 25-Jul-2022 Rocky Xing Added extension summary support. 18 19from __future__ import print_function 20from bcc import BPF 21from time import sleep, strftime 22import argparse 23 24examples = """examples: 25 cpudist # summarize on-CPU time as a histogram 26 cpudist -O # summarize off-CPU time as a histogram 27 cpudist 1 10 # print 1 second summaries, 10 times 28 cpudist -mT 1 # 1s summaries, milliseconds, and timestamps 29 cpudist -P # show each PID separately 30 cpudist -p 185 # trace PID 185 only 31 cpudist -I # include CPU idle time 32 cpudist -e # show extension summary (average/total/count) 33""" 34parser = argparse.ArgumentParser( 35 description="Summarize on- and off-CPU time per task as a histogram.", 36 formatter_class=argparse.RawDescriptionHelpFormatter, 37 epilog=examples) 38parser.add_argument("-O", "--offcpu", action="store_true", 39 help="measure off-CPU time") 40parser.add_argument("-T", "--timestamp", action="store_true", 41 help="include timestamp on output") 42parser.add_argument("-m", "--milliseconds", action="store_true", 43 help="millisecond histogram") 44parser.add_argument("-P", "--pids", action="store_true", 45 help="print a histogram per process ID") 46parser.add_argument("-L", "--tids", action="store_true", 47 help="print a histogram per thread ID") 48parser.add_argument("-p", "--pid", 49 help="trace this PID only") 50parser.add_argument("-I", "--include-idle", action="store_true", 51 help="include CPU idle time") 52parser.add_argument("-e", "--extension", action="store_true", 53 help="show extension summary (average/total/count)") 54parser.add_argument("interval", nargs="?", default=99999999, 55 help="output interval, in seconds") 56parser.add_argument("count", nargs="?", default=99999999, 57 help="number of outputs") 58parser.add_argument("--ebpf", action="store_true", 59 help=argparse.SUPPRESS) 60args = parser.parse_args() 61countdown = int(args.count) 62debug = 0 63 64bpf_text = """ 65#include <uapi/linux/ptrace.h> 66#include <linux/sched.h> 67""" 68 69if not args.offcpu: 70 bpf_text += "#define ONCPU\n" 71 72bpf_text += """ 73typedef struct entry_key { 74 u32 pid; 75 u32 cpu; 76} entry_key_t; 77 78typedef struct pid_key { 79 u64 id; 80 u64 slot; 81} pid_key_t; 82 83typedef struct ext_val { 84 u64 total; 85 u64 count; 86} ext_val_t; 87 88BPF_HASH(start, entry_key_t, u64, MAX_PID); 89STORAGE 90 91static inline void store_start(u32 tgid, u32 pid, u32 cpu, u64 ts) 92{ 93 if (PID_FILTER) 94 return; 95 96 if (IDLE_FILTER) 97 return; 98 99 entry_key_t entry_key = { .pid = pid, .cpu = (pid == 0 ? cpu : 0xFFFFFFFF) }; 100 start.update(&entry_key, &ts); 101} 102 103static inline void update_hist(u32 tgid, u32 pid, u32 cpu, u64 ts) 104{ 105 if (PID_FILTER) 106 return; 107 108 if (IDLE_FILTER) 109 return; 110 111 entry_key_t entry_key = { .pid = pid, .cpu = (pid == 0 ? cpu : 0xFFFFFFFF) }; 112 u64 *tsp = start.lookup(&entry_key); 113 if (tsp == 0) 114 return; 115 116 if (ts < *tsp) { 117 // Probably a clock issue where the recorded on-CPU event had a 118 // timestamp later than the recorded off-CPU event, or vice versa. 119 return; 120 } 121 u64 delta = ts - *tsp; 122 FACTOR 123 STORE 124} 125 126int sched_switch(struct pt_regs *ctx, struct task_struct *prev) 127{ 128 u64 ts = bpf_ktime_get_ns(); 129 u64 pid_tgid = bpf_get_current_pid_tgid(); 130 u32 tgid = pid_tgid >> 32, pid = pid_tgid; 131 u32 cpu = bpf_get_smp_processor_id(); 132 133 u32 prev_pid = prev->pid; 134 u32 prev_tgid = prev->tgid; 135#ifdef ONCPU 136 update_hist(prev_tgid, prev_pid, cpu, ts); 137#else 138 store_start(prev_tgid, prev_pid, cpu, ts); 139#endif 140 141BAIL: 142#ifdef ONCPU 143 store_start(tgid, pid, cpu, ts); 144#else 145 update_hist(tgid, pid, cpu, ts); 146#endif 147 148 return 0; 149} 150""" 151 152if args.pid: 153 bpf_text = bpf_text.replace('PID_FILTER', 'tgid != %s' % args.pid) 154else: 155 bpf_text = bpf_text.replace('PID_FILTER', '0') 156 157# set idle filter 158idle_filter = 'pid == 0' 159if args.include_idle: 160 idle_filter = '0' 161bpf_text = bpf_text.replace('IDLE_FILTER', idle_filter) 162 163if args.milliseconds: 164 bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000000;') 165 label = "msecs" 166else: 167 bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000;') 168 label = "usecs" 169 170storage_str = "" 171store_str = "" 172 173if args.pids or args.tids: 174 section = "pid" 175 pid = "tgid" 176 if args.tids: 177 pid = "pid" 178 section = "tid" 179 storage_str += "BPF_HISTOGRAM(dist, pid_key_t, MAX_PID);" 180 store_str += """ 181 pid_key_t key = {.id = """ + pid + """, .slot = bpf_log2l(delta)}; 182 dist.increment(key); 183 """ 184else: 185 section = "" 186 storage_str += "BPF_HISTOGRAM(dist);" 187 store_str += "dist.atomic_increment(bpf_log2l(delta));" 188 189if args.extension: 190 storage_str += "BPF_ARRAY(extension, ext_val_t, 1);" 191 store_str += """ 192 u32 index = 0; 193 ext_val_t *ext_val = extension.lookup(&index); 194 if (ext_val) { 195 lock_xadd(&ext_val->total, delta); 196 lock_xadd(&ext_val->count, 1); 197 } 198 """ 199 200bpf_text = bpf_text.replace("STORAGE", storage_str) 201bpf_text = bpf_text.replace("STORE", store_str) 202 203if debug or args.ebpf: 204 print(bpf_text) 205 if args.ebpf: 206 exit() 207 208max_pid = int(open("/proc/sys/kernel/pid_max").read()) 209 210b = BPF(text=bpf_text, cflags=["-DMAX_PID=%d" % max_pid]) 211b.attach_kprobe(event_re="^finish_task_switch$|^finish_task_switch\.isra\.\d$", 212 fn_name="sched_switch") 213 214print("Tracing %s-CPU time... Hit Ctrl-C to end." % 215 ("off" if args.offcpu else "on")) 216 217exiting = 0 if args.interval else 1 218dist = b.get_table("dist") 219if args.extension: 220 extension = b.get_table("extension") 221while (1): 222 try: 223 sleep(int(args.interval)) 224 except KeyboardInterrupt: 225 exiting = 1 226 227 print() 228 if args.timestamp: 229 print("%-8s\n" % strftime("%H:%M:%S"), end="") 230 231 def pid_to_comm(pid): 232 try: 233 comm = open("/proc/%d/comm" % pid, "r").read() 234 return "%d %s" % (pid, comm) 235 except IOError: 236 return str(pid) 237 238 dist.print_log2_hist(label, section, section_print_fn=pid_to_comm) 239 240 if args.extension: 241 total = extension[0].total 242 count = extension[0].count 243 if count > 0: 244 print("\navg = %ld %s, total: %ld %s, count: %ld\n" % 245 (total / count, label, total, label, count)) 246 extension.clear() 247 248 dist.clear() 249 250 countdown -= 1 251 if exiting or countdown == 0: 252 exit() 253