1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2 // Copyright (c) 2020 Anton Protopopov
3 //
4 // Based on syscount(8) from BCC by Sasha Goldshtein
5 #include <unistd.h>
6 #include <signal.h>
7 #include <fcntl.h>
8 #include <time.h>
9 #include <unistd.h>
10 #include <argp.h>
11 #include <bpf/bpf.h>
12 #include "syscount.h"
13 #include "syscount.skel.h"
14 #include "errno_helpers.h"
15 #include "syscall_helpers.h"
16 #include "btf_helpers.h"
17 #include "trace_helpers.h"
18
19 /* This structure extends data_t by adding a key item which should be sorted
20 * together with the count and total_ns fields */
21 struct data_ext_t {
22 __u64 count;
23 __u64 total_ns;
24 char comm[TASK_COMM_LEN];
25 __u32 key;
26 };
27
28
29 #define warn(...) fprintf(stderr, __VA_ARGS__)
30
31 const char *argp_program_version = "syscount 0.1";
32 const char *argp_program_bug_address =
33 "https://github.com/iovisor/bcc/tree/master/libbpf-tools";
34 static const char argp_program_doc[] =
35 "\nsyscount: summarize syscall counts and latencies\n"
36 "\n"
37 "EXAMPLES:\n"
38 " syscount # print top 10 syscalls by count every second\n"
39 " syscount -p $(pidof dd) # look only at a particular process\n"
40 " syscount -L # measure and sort output by latency\n"
41 " syscount -P # group statistics by pid, not by syscall\n"
42 " syscount -x -i 5 # count only failed syscalls\n"
43 " syscount -e ENOENT -i 5 # count only syscalls failed with a given errno\n"
44 " syscount -c CG # Trace process under cgroupsPath CG\n";
45 ;
46
47 static const struct argp_option opts[] = {
48 { "verbose", 'v', NULL, 0, "Verbose debug output" },
49 { "pid", 'p', "PID", 0, "Process PID to trace" },
50 { "interval", 'i', "INTERVAL", 0, "Print summary at this interval"
51 " (seconds), 0 for infinite wait (default)" },
52 { "duration", 'd', "DURATION", 0, "Total tracing duration (seconds)" },
53 { "top", 'T', "TOP", 0, "Print only the top syscalls (default 10)" },
54 { "cgroup", 'c', "/sys/fs/cgroup/unified/<CG>", 0, "Trace process in cgroup path"},
55 { "failures", 'x', NULL, 0, "Trace only failed syscalls" },
56 { "latency", 'L', NULL, 0, "Collect syscall latency" },
57 { "milliseconds", 'm', NULL, 0, "Display latency in milliseconds"
58 " (default: microseconds)" },
59 { "process", 'P', NULL, 0, "Count by process and not by syscall" },
60 { "errno", 'e', "ERRNO", 0, "Trace only syscalls that return this error"
61 "(numeric or EPERM, etc.)" },
62 { "list", 'l', NULL, 0, "Print list of recognized syscalls and exit" },
63 { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
64 {},
65 };
66
67 static struct env {
68 bool list_syscalls;
69 bool milliseconds;
70 bool failures;
71 bool verbose;
72 bool latency;
73 bool process;
74 int filter_errno;
75 int interval;
76 int duration;
77 int top;
78 pid_t pid;
79 char *cgroupspath;
80 bool cg;
81 } env = {
82 .top = 10,
83 };
84
get_int(const char * arg,int * ret,int min,int max)85 static int get_int(const char *arg, int *ret, int min, int max)
86 {
87 char *end;
88 long val;
89
90 errno = 0;
91 val = strtol(arg, &end, 10);
92 if (errno) {
93 warn("strtol: %s: %s\n", arg, strerror(errno));
94 return -1;
95 } else if (end == arg || val < min || val > max) {
96 return -1;
97 }
98 if (ret)
99 *ret = val;
100 return 0;
101 }
102
libbpf_print_fn(enum libbpf_print_level level,const char * format,va_list args)103 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
104 {
105 if (level == LIBBPF_DEBUG && !env.verbose)
106 return 0;
107
108 return vfprintf(stderr, format, args);
109 }
110
compar_count(const void * dx,const void * dy)111 static int compar_count(const void *dx, const void *dy)
112 {
113 __u64 x = ((struct data_ext_t *) dx)->count;
114 __u64 y = ((struct data_ext_t *) dy)->count;
115 return x > y ? -1 : !(x == y);
116 }
117
compar_latency(const void * dx,const void * dy)118 static int compar_latency(const void *dx, const void *dy)
119 {
120 __u64 x = ((struct data_ext_t *) dx)->total_ns;
121 __u64 y = ((struct data_ext_t *) dy)->total_ns;
122 return x > y ? -1 : !(x == y);
123 }
124
agg_col(struct data_ext_t * val,char * buf,size_t size)125 static const char *agg_col(struct data_ext_t *val, char *buf, size_t size)
126 {
127 if (env.process) {
128 snprintf(buf, size, "%-6u %-15s", val->key, val->comm);
129 } else {
130 syscall_name(val->key, buf, size);
131 }
132 return buf;
133 }
134
agg_colname(void)135 static const char *agg_colname(void)
136 {
137 return (env.process) ? "PID COMM" : "SYSCALL";
138 }
139
time_colname(void)140 static const char *time_colname(void)
141 {
142 return (env.milliseconds) ? "TIME (ms)" : "TIME (us)";
143 }
144
print_latency_header(void)145 static void print_latency_header(void)
146 {
147 printf("%-22s %8s %16s\n", agg_colname(), "COUNT", time_colname());
148 }
149
print_count_header(void)150 static void print_count_header(void)
151 {
152 printf("%-22s %8s\n", agg_colname(), "COUNT");
153 }
154
print_latency(struct data_ext_t * vals,size_t count)155 static void print_latency(struct data_ext_t *vals, size_t count)
156 {
157 double div = env.milliseconds ? 1000000.0 : 1000.0;
158 char buf[2 * TASK_COMM_LEN];
159 int i;
160
161 print_latency_header();
162 for (i = 0; i < count && i < env.top; i++)
163 printf("%-22s %8llu %16.3lf\n",
164 agg_col(&vals[i], buf, sizeof(buf)),
165 vals[i].count, vals[i].total_ns / div);
166 printf("\n");
167 }
168
print_count(struct data_ext_t * vals,size_t count)169 static void print_count(struct data_ext_t *vals, size_t count)
170 {
171 char buf[2 * TASK_COMM_LEN];
172 int i;
173
174 print_count_header();
175 for (i = 0; i < count && i < env.top; i++)
176 printf("%-22s %8llu\n",
177 agg_col(&vals[i], buf, sizeof(buf)), vals[i].count);
178 printf("\n");
179 }
180
print_timestamp()181 static void print_timestamp()
182 {
183 time_t now = time(NULL);
184 struct tm tm;
185
186 if (localtime_r(&now, &tm))
187 printf("[%02d:%02d:%02d]\n", tm.tm_hour, tm.tm_min, tm.tm_sec);
188 else
189 warn("localtime_r: %s", strerror(errno));
190 }
191
192 static bool batch_map_ops = true; /* hope for the best */
193
read_vals_batch(int fd,struct data_ext_t * vals,__u32 * count)194 static bool read_vals_batch(int fd, struct data_ext_t *vals, __u32 *count)
195 {
196 struct data_t orig_vals[*count];
197 void *in = NULL, *out;
198 __u32 i, n, n_read = 0;
199 __u32 keys[*count];
200 int err = 0;
201
202 while (n_read < *count && !err) {
203 n = *count - n_read;
204 err = bpf_map_lookup_and_delete_batch(fd, &in, &out,
205 keys + n_read, orig_vals + n_read, &n, NULL);
206 if (err && errno != ENOENT) {
207 /* we want to propagate EINVAL upper, so that
208 * the batch_map_ops flag is set to false */
209 if (errno != EINVAL)
210 warn("bpf_map_lookup_and_delete_batch: %s\n",
211 strerror(-err));
212 return false;
213 }
214 n_read += n;
215 in = out;
216 }
217
218 for (i = 0; i < n_read; i++) {
219 vals[i].count = orig_vals[i].count;
220 vals[i].total_ns = orig_vals[i].total_ns;
221 vals[i].key = keys[i];
222 strncpy(vals[i].comm, orig_vals[i].comm, TASK_COMM_LEN);
223 }
224
225 *count = n_read;
226 return true;
227 }
228
read_vals(int fd,struct data_ext_t * vals,__u32 * count)229 static bool read_vals(int fd, struct data_ext_t *vals, __u32 *count)
230 {
231 __u32 keys[MAX_ENTRIES];
232 struct data_t val;
233 __u32 key = -1;
234 __u32 next_key;
235 int i = 0, j;
236 int err;
237
238 if (batch_map_ops) {
239 bool ok = read_vals_batch(fd, vals, count);
240 if (!ok && errno == EINVAL) {
241 /* fall back to a racy variant */
242 batch_map_ops = false;
243 } else {
244 return ok;
245 }
246 }
247
248 if (!vals || !count || !*count)
249 return true;
250
251 for (key = -1; i < *count; ) {
252 err = bpf_map_get_next_key(fd, &key, &next_key);
253 if (err && errno != ENOENT) {
254 warn("failed to get next key: %s\n", strerror(errno));
255 return false;
256 } else if (err) {
257 break;
258 }
259 key = keys[i++] = next_key;
260 }
261
262 for (j = 0; j < i; j++) {
263 err = bpf_map_lookup_elem(fd, &keys[j], &val);
264 if (err && errno != ENOENT) {
265 warn("failed to lookup element: %s\n", strerror(errno));
266 return false;
267 }
268 vals[j].count = val.count;
269 vals[j].total_ns = val.total_ns;
270 vals[j].key = keys[j];
271 memcpy(vals[j].comm, val.comm, TASK_COMM_LEN);
272 }
273
274 /* There is a race here: system calls which are represented by keys
275 * above and happened between lookup and delete will be ignored. This
276 * will be fixed in future by using bpf_map_lookup_and_delete_batch,
277 * but this function is too fresh to use it in bcc. */
278
279 for (j = 0; j < i; j++) {
280 err = bpf_map_delete_elem(fd, &keys[j]);
281 if (err) {
282 warn("failed to delete element: %s\n", strerror(errno));
283 return false;
284 }
285 }
286
287 *count = i;
288 return true;
289 }
290
parse_arg(int key,char * arg,struct argp_state * state)291 static error_t parse_arg(int key, char *arg, struct argp_state *state)
292 {
293 int number;
294 int err;
295
296 switch (key) {
297 case 'h':
298 argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
299 break;
300 case 'v':
301 env.verbose = true;
302 break;
303 case 'x':
304 env.failures = true;
305 break;
306 case 'L':
307 env.latency = true;
308 break;
309 case 'm':
310 env.milliseconds = true;
311 break;
312 case 'P':
313 env.process = true;
314 break;
315 case 'p':
316 err = get_int(arg, &env.pid, 1, INT_MAX);
317 if (err) {
318 warn("invalid PID: %s\n", arg);
319 argp_usage(state);
320 }
321 break;
322 case 'i':
323 err = get_int(arg, &env.interval, 0, INT_MAX);
324 if (err) {
325 warn("invalid INTERVAL: %s\n", arg);
326 argp_usage(state);
327 }
328 break;
329 case 'd':
330 err = get_int(arg, &env.duration, 1, INT_MAX);
331 if (err) {
332 warn("invalid DURATION: %s\n", arg);
333 argp_usage(state);
334 }
335 break;
336 case 'T':
337 err = get_int(arg, &env.top, 1, INT_MAX);
338 if (err) {
339 warn("invalid TOP: %s\n", arg);
340 argp_usage(state);
341 }
342 break;
343 case 'c':
344 env.cgroupspath = arg;
345 env.cg = true;
346 break;
347 case 'e':
348 err = get_int(arg, &number, 1, INT_MAX);
349 if (err) {
350 number = errno_by_name(arg);
351 if (number < 0) {
352 warn("invalid errno: %s (bad, or can't "
353 "parse dynamically; consider using "
354 "numeric value and/or installing the "
355 "errno program from moreutils)\n", arg);
356 argp_usage(state);
357 }
358 }
359 env.filter_errno = number;
360 break;
361 case 'l':
362 env.list_syscalls = true;
363 break;
364 default:
365 return ARGP_ERR_UNKNOWN;
366 }
367 return 0;
368 }
369
370 static volatile sig_atomic_t hang_on = 1;
371
sig_int(int signo)372 void sig_int(int signo)
373 {
374 hang_on = 0;
375 }
376
main(int argc,char ** argv)377 int main(int argc, char **argv)
378 {
379 LIBBPF_OPTS(bpf_object_open_opts, open_opts);
380 void (*print)(struct data_ext_t *, size_t);
381 int (*compar)(const void *, const void *);
382 static const struct argp argp = {
383 .options = opts,
384 .parser = parse_arg,
385 .doc = argp_program_doc,
386 };
387 struct data_ext_t vals[MAX_ENTRIES];
388 struct syscount_bpf *obj;
389 int seconds = 0;
390 __u32 count;
391 int err;
392 int idx, cg_map_fd;
393 int cgfd = -1;
394
395 init_syscall_names();
396
397 err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
398 if (err)
399 goto free_names;
400
401 if (env.list_syscalls) {
402 list_syscalls();
403 goto free_names;
404 }
405
406 libbpf_set_print(libbpf_print_fn);
407
408 err = ensure_core_btf(&open_opts);
409 if (err) {
410 fprintf(stderr, "failed to fetch necessary BTF for CO-RE: %s\n", strerror(-err));
411 return 1;
412 }
413
414 obj = syscount_bpf__open_opts(&open_opts);
415 if (!obj) {
416 warn("failed to open BPF object\n");
417 err = 1;
418 goto free_names;
419 }
420
421 if (env.pid)
422 obj->rodata->filter_pid = env.pid;
423 if (env.failures)
424 obj->rodata->filter_failed = true;
425 if (env.latency)
426 obj->rodata->measure_latency = true;
427 if (env.process)
428 obj->rodata->count_by_process = true;
429 if (env.filter_errno)
430 obj->rodata->filter_errno = env.filter_errno;
431 if (env.cg)
432 obj->rodata->filter_cg = env.cg;
433
434 err = syscount_bpf__load(obj);
435 if (err) {
436 warn("failed to load BPF object: %s\n", strerror(-err));
437 goto cleanup_obj;
438 }
439
440 /* update cgroup path fd to map */
441 if (env.cg) {
442 idx = 0;
443 cg_map_fd = bpf_map__fd(obj->maps.cgroup_map);
444 cgfd = open(env.cgroupspath, O_RDONLY);
445 if (cgfd < 0) {
446 fprintf(stderr, "Failed opening Cgroup path: %s", env.cgroupspath);
447 goto cleanup_obj;
448 }
449 if (bpf_map_update_elem(cg_map_fd, &idx, &cgfd, BPF_ANY)) {
450 fprintf(stderr, "Failed adding target cgroup to map");
451 goto cleanup_obj;
452 }
453 }
454
455 obj->links.sys_exit = bpf_program__attach(obj->progs.sys_exit);
456 if (!obj->links.sys_exit) {
457 err = -errno;
458 warn("failed to attach sys_exit program: %s\n", strerror(-err));
459 goto cleanup_obj;
460 }
461 if (env.latency) {
462 obj->links.sys_enter = bpf_program__attach(obj->progs.sys_enter);
463 if (!obj->links.sys_enter) {
464 err = -errno;
465 warn("failed to attach sys_enter programs: %s\n",
466 strerror(-err));
467 goto cleanup_obj;
468 }
469 }
470
471 if (signal(SIGINT, sig_int) == SIG_ERR) {
472 warn("can't set signal handler: %s\n", strerror(errno));
473 goto cleanup_obj;
474 }
475
476 compar = env.latency ? compar_latency : compar_count;
477 print = env.latency ? print_latency : print_count;
478
479 printf("Tracing syscalls, printing top %d... Ctrl+C to quit.\n", env.top);
480 while (hang_on) {
481 sleep(env.interval ?: 1);
482 if (env.duration) {
483 seconds += env.interval ?: 1;
484 if (seconds >= env.duration)
485 hang_on = 0;
486 }
487 if (hang_on && !env.interval)
488 continue;
489
490 count = MAX_ENTRIES;
491 if (!read_vals(bpf_map__fd(obj->maps.data), vals, &count))
492 break;
493 if (!count)
494 continue;
495
496 qsort(vals, count, sizeof(vals[0]), compar);
497 print_timestamp();
498 print(vals, count);
499 }
500
501 cleanup_obj:
502 syscount_bpf__destroy(obj);
503 free_names:
504 free_syscall_names();
505 cleanup_core_btf(&open_opts);
506 if (cgfd > 0)
507 close(cgfd);
508
509 return err != 0;
510 }
511