1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2 // Copyright (c) 2021 Wenbo Zhang
3 //
4 // Based on offcputime(8) from BCC by Brendan Gregg.
5 // 19-Mar-2021 Wenbo Zhang Created this.
6 #include <argp.h>
7 #include <signal.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <unistd.h>
11 #include <time.h>
12 #include <bpf/libbpf.h>
13 #include <bpf/bpf.h>
14 #include "offcputime.h"
15 #include "offcputime.skel.h"
16 #include "trace_helpers.h"
17
18 static struct env {
19 pid_t pid;
20 pid_t tid;
21 bool user_threads_only;
22 bool kernel_threads_only;
23 int stack_storage_size;
24 int perf_max_stack_depth;
25 __u64 min_block_time;
26 __u64 max_block_time;
27 long state;
28 int duration;
29 bool verbose;
30 } env = {
31 .pid = -1,
32 .tid = -1,
33 .stack_storage_size = 1024,
34 .perf_max_stack_depth = 127,
35 .min_block_time = 1,
36 .max_block_time = -1,
37 .state = -1,
38 .duration = 99999999,
39 };
40
41 const char *argp_program_version = "offcputime 0.1";
42 const char *argp_program_bug_address =
43 "https://github.com/iovisor/bcc/tree/master/libbpf-tools";
44 const char argp_program_doc[] =
45 "Summarize off-CPU time by stack trace.\n"
46 "\n"
47 "USAGE: offcputime [--help] [-p PID | -u | -k] [-m MIN-BLOCK-TIME] "
48 "[-M MAX-BLOCK-TIME] [--state] [--perf-max-stack-depth] [--stack-storage-size] "
49 "[duration]\n"
50 "EXAMPLES:\n"
51 " offcputime # trace off-CPU stack time until Ctrl-C\n"
52 " offcputime 5 # trace for 5 seconds only\n"
53 " offcputime -m 1000 # trace only events that last more than 1000 usec\n"
54 " offcputime -M 10000 # trace only events that last less than 10000 usec\n"
55 " offcputime -p 185 # only trace threads for PID 185\n"
56 " offcputime -t 188 # only trace thread 188\n"
57 " offcputime -u # only trace user threads (no kernel)\n"
58 " offcputime -k # only trace kernel threads (no user)\n";
59
60 #define OPT_PERF_MAX_STACK_DEPTH 1 /* --pef-max-stack-depth */
61 #define OPT_STACK_STORAGE_SIZE 2 /* --stack-storage-size */
62 #define OPT_STATE 3 /* --state */
63
64 static const struct argp_option opts[] = {
65 { "pid", 'p', "PID", 0, "Trace this PID only" },
66 { "tid", 't', "TID", 0, "Trace this TID only" },
67 { "user-threads-only", 'u', NULL, 0,
68 "User threads only (no kernel threads)" },
69 { "kernel-threads-only", 'k', NULL, 0,
70 "Kernel threads only (no user threads)" },
71 { "perf-max-stack-depth", OPT_PERF_MAX_STACK_DEPTH,
72 "PERF-MAX-STACK-DEPTH", 0, "the limit for both kernel and user stack traces (default 127)" },
73 { "stack-storage-size", OPT_STACK_STORAGE_SIZE, "STACK-STORAGE-SIZE", 0,
74 "the number of unique stack traces that can be stored and displayed (default 1024)" },
75 { "min-block-time", 'm', "MIN-BLOCK-TIME", 0,
76 "the amount of time in microseconds over which we store traces (default 1)" },
77 { "max-block-time", 'M', "MAX-BLOCK-TIME", 0,
78 "the amount of time in microseconds under which we store traces (default U64_MAX)" },
79 { "state", OPT_STATE, "STATE", 0, "filter on this thread state bitmask (eg, 2 == TASK_UNINTERRUPTIBLE) see include/linux/sched.h" },
80 { "verbose", 'v', NULL, 0, "Verbose debug output" },
81 { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
82 {},
83 };
84
parse_arg(int key,char * arg,struct argp_state * state)85 static error_t parse_arg(int key, char *arg, struct argp_state *state)
86 {
87 static int pos_args;
88
89 switch (key) {
90 case 'h':
91 argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
92 break;
93 case 'v':
94 env.verbose = true;
95 break;
96 case 'p':
97 errno = 0;
98 env.pid = strtol(arg, NULL, 10);
99 if (errno) {
100 fprintf(stderr, "invalid PID: %s\n", arg);
101 argp_usage(state);
102 }
103 break;
104 case 't':
105 errno = 0;
106 env.tid = strtol(arg, NULL, 10);
107 if (errno || env.tid <= 0) {
108 fprintf(stderr, "Invalid TID: %s\n", arg);
109 argp_usage(state);
110 }
111 break;
112 case 'u':
113 env.user_threads_only = true;
114 break;
115 case 'k':
116 env.kernel_threads_only = true;
117 break;
118 case OPT_PERF_MAX_STACK_DEPTH:
119 errno = 0;
120 env.perf_max_stack_depth = strtol(arg, NULL, 10);
121 if (errno) {
122 fprintf(stderr, "invalid perf max stack depth: %s\n", arg);
123 argp_usage(state);
124 }
125 break;
126 case OPT_STACK_STORAGE_SIZE:
127 errno = 0;
128 env.stack_storage_size = strtol(arg, NULL, 10);
129 if (errno) {
130 fprintf(stderr, "invalid stack storage size: %s\n", arg);
131 argp_usage(state);
132 }
133 break;
134 case 'm':
135 errno = 0;
136 env.min_block_time = strtoll(arg, NULL, 10);
137 if (errno) {
138 fprintf(stderr, "Invalid min block time (in us): %s\n", arg);
139 argp_usage(state);
140 }
141 break;
142 case 'M':
143 errno = 0;
144 env.max_block_time = strtoll(arg, NULL, 10);
145 if (errno) {
146 fprintf(stderr, "Invalid min block time (in us): %s\n", arg);
147 argp_usage(state);
148 }
149 break;
150 case OPT_STATE:
151 errno = 0;
152 env.state = strtol(arg, NULL, 10);
153 if (errno || env.state < 0 || env.state > 2) {
154 fprintf(stderr, "Invalid task state: %s\n", arg);
155 argp_usage(state);
156 }
157 break;
158 case ARGP_KEY_ARG:
159 if (pos_args++) {
160 fprintf(stderr,
161 "Unrecognized positional argument: %s\n", arg);
162 argp_usage(state);
163 }
164 errno = 0;
165 env.duration = strtol(arg, NULL, 10);
166 if (errno || env.duration <= 0) {
167 fprintf(stderr, "Invalid duration (in s): %s\n", arg);
168 argp_usage(state);
169 }
170 break;
171 default:
172 return ARGP_ERR_UNKNOWN;
173 }
174 return 0;
175 }
176
libbpf_print_fn(enum libbpf_print_level level,const char * format,va_list args)177 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
178 {
179 if (level == LIBBPF_DEBUG && !env.verbose)
180 return 0;
181 return vfprintf(stderr, format, args);
182 }
183
sig_handler(int sig)184 static void sig_handler(int sig)
185 {
186 }
187
print_map(struct ksyms * ksyms,struct syms_cache * syms_cache,struct offcputime_bpf * obj)188 static void print_map(struct ksyms *ksyms, struct syms_cache *syms_cache,
189 struct offcputime_bpf *obj)
190 {
191 struct key_t lookup_key = {}, next_key;
192 const struct ksym *ksym;
193 const struct syms *syms;
194 const struct sym *sym;
195 int err, i, ifd, sfd;
196 unsigned long *ip;
197 struct val_t val;
198 char *dso_name;
199 unsigned long dso_offset;
200 int idx;
201
202 ip = calloc(env.perf_max_stack_depth, sizeof(*ip));
203 if (!ip) {
204 fprintf(stderr, "failed to alloc ip\n");
205 return;
206 }
207
208 ifd = bpf_map__fd(obj->maps.info);
209 sfd = bpf_map__fd(obj->maps.stackmap);
210 while (!bpf_map_get_next_key(ifd, &lookup_key, &next_key)) {
211 idx = 0;
212
213 err = bpf_map_lookup_elem(ifd, &next_key, &val);
214 if (err < 0) {
215 fprintf(stderr, "failed to lookup info: %d\n", err);
216 goto cleanup;
217 }
218 lookup_key = next_key;
219 if (val.delta == 0)
220 continue;
221 if (bpf_map_lookup_elem(sfd, &next_key.kern_stack_id, ip) != 0) {
222 fprintf(stderr, " [Missed Kernel Stack]\n");
223 goto print_ustack;
224 }
225
226 for (i = 0; i < env.perf_max_stack_depth && ip[i]; i++) {
227 ksym = ksyms__map_addr(ksyms, ip[i]);
228 if (!env.verbose) {
229 printf(" %s\n", ksym ? ksym->name : "unknown");
230 } else {
231 if (ksym)
232 printf(" #%-2d 0x%lx %s+0x%lx\n", idx++, ip[i], ksym->name, ip[i] - ksym->addr);
233 else
234 printf(" #%-2d 0x%lx [unknown]\n", idx++, ip[i]);
235 }
236 }
237
238 print_ustack:
239 if (next_key.user_stack_id == -1)
240 goto skip_ustack;
241
242 if (bpf_map_lookup_elem(sfd, &next_key.user_stack_id, ip) != 0) {
243 fprintf(stderr, " [Missed User Stack]\n");
244 goto skip_ustack;
245 }
246
247 syms = syms_cache__get_syms(syms_cache, next_key.tgid);
248 if (!syms) {
249 if (!env.verbose) {
250 fprintf(stderr, "failed to get syms\n");
251 } else {
252 for (i = 0; i < env.perf_max_stack_depth && ip[i]; i++)
253 printf(" #%-2d 0x%016lx [unknown]\n", idx++, ip[i]);
254 }
255 goto skip_ustack;
256 }
257 for (i = 0; i < env.perf_max_stack_depth && ip[i]; i++) {
258 if (!env.verbose) {
259 sym = syms__map_addr(syms, ip[i]);
260 if (sym)
261 printf(" %s\n", sym->name);
262 else
263 printf(" [unknown]\n");
264 } else {
265 sym = syms__map_addr_dso(syms, ip[i], &dso_name, &dso_offset);
266 printf(" #%-2d 0x%016lx", idx++, ip[i]);
267 if (sym)
268 printf(" %s+0x%lx", sym->name, sym->offset);
269 if (dso_name)
270 printf(" (%s+0x%lx)", dso_name, dso_offset);
271 printf("\n");
272 }
273 }
274
275 skip_ustack:
276 printf(" %-16s %s (%d)\n", "-", val.comm, next_key.pid);
277 printf(" %lld\n\n", val.delta);
278 }
279
280 cleanup:
281 free(ip);
282 }
283
main(int argc,char ** argv)284 int main(int argc, char **argv)
285 {
286 static const struct argp argp = {
287 .options = opts,
288 .parser = parse_arg,
289 .doc = argp_program_doc,
290 };
291 struct syms_cache *syms_cache = NULL;
292 struct ksyms *ksyms = NULL;
293 struct offcputime_bpf *obj;
294 int err;
295
296 err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
297 if (err)
298 return err;
299 if (env.user_threads_only && env.kernel_threads_only) {
300 fprintf(stderr, "user_threads_only and kernel_threads_only cannot be used together.\n");
301 return 1;
302 }
303 if (env.min_block_time >= env.max_block_time) {
304 fprintf(stderr, "min_block_time should be smaller than max_block_time\n");
305 return 1;
306 }
307
308 libbpf_set_print(libbpf_print_fn);
309
310 obj = offcputime_bpf__open();
311 if (!obj) {
312 fprintf(stderr, "failed to open BPF object\n");
313 return 1;
314 }
315
316 /* initialize global data (filtering options) */
317 obj->rodata->targ_tgid = env.pid;
318 obj->rodata->targ_pid = env.tid;
319 obj->rodata->user_threads_only = env.user_threads_only;
320 obj->rodata->kernel_threads_only = env.kernel_threads_only;
321 obj->rodata->state = env.state;
322 obj->rodata->min_block_ns = env.min_block_time;
323 obj->rodata->max_block_ns = env.max_block_time;
324
325 bpf_map__set_value_size(obj->maps.stackmap,
326 env.perf_max_stack_depth * sizeof(unsigned long));
327 bpf_map__set_max_entries(obj->maps.stackmap, env.stack_storage_size);
328
329 err = offcputime_bpf__load(obj);
330 if (err) {
331 fprintf(stderr, "failed to load BPF programs\n");
332 goto cleanup;
333 }
334 ksyms = ksyms__load();
335 if (!ksyms) {
336 fprintf(stderr, "failed to load kallsyms\n");
337 goto cleanup;
338 }
339 syms_cache = syms_cache__new(0);
340 if (!syms_cache) {
341 fprintf(stderr, "failed to create syms_cache\n");
342 goto cleanup;
343 }
344 err = offcputime_bpf__attach(obj);
345 if (err) {
346 fprintf(stderr, "failed to attach BPF programs\n");
347 goto cleanup;
348 }
349
350 signal(SIGINT, sig_handler);
351
352 /*
353 * We'll get sleep interrupted when someone presses Ctrl-C (which will
354 * be "handled" with noop by sig_handler).
355 */
356 sleep(env.duration);
357
358 print_map(ksyms, syms_cache, obj);
359
360 cleanup:
361 offcputime_bpf__destroy(obj);
362 syms_cache__free(syms_cache);
363 ksyms__free(ksyms);
364 return err != 0;
365 }
366