xref: /aosp_15_r20/external/bcc/libbpf-tools/syscount.c (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2 // Copyright (c) 2020 Anton Protopopov
3 //
4 // Based on syscount(8) from BCC by Sasha Goldshtein
5 #include <unistd.h>
6 #include <signal.h>
7 #include <fcntl.h>
8 #include <time.h>
9 #include <unistd.h>
10 #include <argp.h>
11 #include <bpf/bpf.h>
12 #include "syscount.h"
13 #include "syscount.skel.h"
14 #include "errno_helpers.h"
15 #include "syscall_helpers.h"
16 #include "btf_helpers.h"
17 #include "trace_helpers.h"
18 
19 /* This structure extends data_t by adding a key item which should be sorted
20  * together with the count and total_ns fields */
21 struct data_ext_t {
22 	__u64 count;
23 	__u64 total_ns;
24 	char comm[TASK_COMM_LEN];
25 	__u32 key;
26 };
27 
28 
29 #define warn(...) fprintf(stderr, __VA_ARGS__)
30 
31 const char *argp_program_version = "syscount 0.1";
32 const char *argp_program_bug_address =
33 	"https://github.com/iovisor/bcc/tree/master/libbpf-tools";
34 static const char argp_program_doc[] =
35 "\nsyscount: summarize syscall counts and latencies\n"
36 "\n"
37 "EXAMPLES:\n"
38 "    syscount                 # print top 10 syscalls by count every second\n"
39 "    syscount -p $(pidof dd)  # look only at a particular process\n"
40 "    syscount -L              # measure and sort output by latency\n"
41 "    syscount -P              # group statistics by pid, not by syscall\n"
42 "    syscount -x -i 5         # count only failed syscalls\n"
43 "    syscount -e ENOENT -i 5  # count only syscalls failed with a given errno\n"
44 "    syscount -c CG           # Trace process under cgroupsPath CG\n";
45 ;
46 
47 static const struct argp_option opts[] = {
48 	{ "verbose", 'v', NULL, 0, "Verbose debug output" },
49 	{ "pid", 'p', "PID", 0, "Process PID to trace" },
50 	{ "interval", 'i', "INTERVAL", 0, "Print summary at this interval"
51 				" (seconds), 0 for infinite wait (default)" },
52 	{ "duration", 'd', "DURATION", 0, "Total tracing duration (seconds)" },
53 	{ "top", 'T', "TOP", 0, "Print only the top syscalls (default 10)" },
54 	{ "cgroup", 'c', "/sys/fs/cgroup/unified/<CG>", 0, "Trace process in cgroup path"},
55 	{ "failures", 'x', NULL, 0, "Trace only failed syscalls" },
56 	{ "latency", 'L', NULL, 0, "Collect syscall latency" },
57 	{ "milliseconds", 'm', NULL, 0, "Display latency in milliseconds"
58 					" (default: microseconds)" },
59 	{ "process", 'P', NULL, 0, "Count by process and not by syscall" },
60 	{ "errno", 'e', "ERRNO", 0, "Trace only syscalls that return this error"
61 				 "(numeric or EPERM, etc.)" },
62 	{ "list", 'l', NULL, 0, "Print list of recognized syscalls and exit" },
63 	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
64 	{},
65 };
66 
67 static struct env {
68 	bool list_syscalls;
69 	bool milliseconds;
70 	bool failures;
71 	bool verbose;
72 	bool latency;
73 	bool process;
74 	int filter_errno;
75 	int interval;
76 	int duration;
77 	int top;
78 	pid_t pid;
79 	char *cgroupspath;
80 	bool cg;
81 } env = {
82 	.top = 10,
83 };
84 
get_int(const char * arg,int * ret,int min,int max)85 static int get_int(const char *arg, int *ret, int min, int max)
86 {
87 	char *end;
88 	long val;
89 
90 	errno = 0;
91 	val = strtol(arg, &end, 10);
92 	if (errno) {
93 		warn("strtol: %s: %s\n", arg, strerror(errno));
94 		return -1;
95 	} else if (end == arg || val < min || val > max) {
96 		return -1;
97 	}
98 	if (ret)
99 		*ret = val;
100 	return 0;
101 }
102 
libbpf_print_fn(enum libbpf_print_level level,const char * format,va_list args)103 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
104 {
105 	if (level == LIBBPF_DEBUG && !env.verbose)
106 		return 0;
107 
108 	return vfprintf(stderr, format, args);
109 }
110 
compar_count(const void * dx,const void * dy)111 static int compar_count(const void *dx, const void *dy)
112 {
113 	__u64 x = ((struct data_ext_t *) dx)->count;
114 	__u64 y = ((struct data_ext_t *) dy)->count;
115 	return x > y ? -1 : !(x == y);
116 }
117 
compar_latency(const void * dx,const void * dy)118 static int compar_latency(const void *dx, const void *dy)
119 {
120 	__u64 x = ((struct data_ext_t *) dx)->total_ns;
121 	__u64 y = ((struct data_ext_t *) dy)->total_ns;
122 	return x > y ? -1 : !(x == y);
123 }
124 
agg_col(struct data_ext_t * val,char * buf,size_t size)125 static const char *agg_col(struct data_ext_t *val, char *buf, size_t size)
126 {
127 	if (env.process) {
128 		snprintf(buf, size, "%-6u %-15s", val->key, val->comm);
129 	} else {
130 		syscall_name(val->key, buf, size);
131 	}
132 	return buf;
133 }
134 
agg_colname(void)135 static const char *agg_colname(void)
136 {
137 	return (env.process) ? "PID    COMM" : "SYSCALL";
138 }
139 
time_colname(void)140 static const char *time_colname(void)
141 {
142 	return (env.milliseconds) ? "TIME (ms)" : "TIME (us)";
143 }
144 
print_latency_header(void)145 static void print_latency_header(void)
146 {
147 	printf("%-22s %8s %16s\n", agg_colname(), "COUNT", time_colname());
148 }
149 
print_count_header(void)150 static void print_count_header(void)
151 {
152 	printf("%-22s %8s\n", agg_colname(), "COUNT");
153 }
154 
print_latency(struct data_ext_t * vals,size_t count)155 static void print_latency(struct data_ext_t *vals, size_t count)
156 {
157 	double div = env.milliseconds ? 1000000.0 : 1000.0;
158 	char buf[2 * TASK_COMM_LEN];
159 	int i;
160 
161 	print_latency_header();
162 	for (i = 0; i < count && i < env.top; i++)
163 		printf("%-22s %8llu %16.3lf\n",
164 		       agg_col(&vals[i], buf, sizeof(buf)),
165 		       vals[i].count, vals[i].total_ns / div);
166 	printf("\n");
167 }
168 
print_count(struct data_ext_t * vals,size_t count)169 static void print_count(struct data_ext_t *vals, size_t count)
170 {
171 	char buf[2 * TASK_COMM_LEN];
172 	int i;
173 
174 	print_count_header();
175 	for (i = 0; i < count && i < env.top; i++)
176 		printf("%-22s %8llu\n",
177 		       agg_col(&vals[i], buf, sizeof(buf)), vals[i].count);
178 	printf("\n");
179 }
180 
print_timestamp()181 static void print_timestamp()
182 {
183 	time_t now = time(NULL);
184 	struct tm tm;
185 
186 	if (localtime_r(&now, &tm))
187 		printf("[%02d:%02d:%02d]\n", tm.tm_hour, tm.tm_min, tm.tm_sec);
188 	else
189 		warn("localtime_r: %s", strerror(errno));
190 }
191 
192 static bool batch_map_ops = true; /* hope for the best */
193 
read_vals_batch(int fd,struct data_ext_t * vals,__u32 * count)194 static bool read_vals_batch(int fd, struct data_ext_t *vals, __u32 *count)
195 {
196 	struct data_t orig_vals[*count];
197 	void *in = NULL, *out;
198 	__u32 i, n, n_read = 0;
199 	__u32 keys[*count];
200 	int err = 0;
201 
202 	while (n_read < *count && !err) {
203 		n = *count - n_read;
204 		err = bpf_map_lookup_and_delete_batch(fd, &in, &out,
205 				keys + n_read, orig_vals + n_read, &n, NULL);
206 		if (err && errno != ENOENT) {
207 			/* we want to propagate EINVAL upper, so that
208 			 * the batch_map_ops flag is set to false */
209 			if (errno != EINVAL)
210 				warn("bpf_map_lookup_and_delete_batch: %s\n",
211 				     strerror(-err));
212 			return false;
213 		}
214 		n_read += n;
215 		in = out;
216 	}
217 
218 	for (i = 0; i < n_read; i++) {
219 		vals[i].count = orig_vals[i].count;
220 		vals[i].total_ns = orig_vals[i].total_ns;
221 		vals[i].key = keys[i];
222 		strncpy(vals[i].comm, orig_vals[i].comm, TASK_COMM_LEN);
223 	}
224 
225 	*count = n_read;
226 	return true;
227 }
228 
read_vals(int fd,struct data_ext_t * vals,__u32 * count)229 static bool read_vals(int fd, struct data_ext_t *vals, __u32 *count)
230 {
231 	__u32 keys[MAX_ENTRIES];
232 	struct data_t val;
233 	__u32 key = -1;
234 	__u32 next_key;
235 	int i = 0, j;
236 	int err;
237 
238 	if (batch_map_ops) {
239 		bool ok = read_vals_batch(fd, vals, count);
240 		if (!ok && errno == EINVAL) {
241 			/* fall back to a racy variant */
242 			batch_map_ops = false;
243 		} else {
244 			return ok;
245 		}
246 	}
247 
248 	if (!vals || !count || !*count)
249 		return true;
250 
251 	for (key = -1; i < *count; ) {
252 		err = bpf_map_get_next_key(fd, &key, &next_key);
253 		if (err && errno != ENOENT) {
254 			warn("failed to get next key: %s\n", strerror(errno));
255 			return false;
256 		} else if (err) {
257 			break;
258 		}
259 		key = keys[i++] = next_key;
260 	}
261 
262 	for (j = 0; j < i; j++) {
263 		err = bpf_map_lookup_elem(fd, &keys[j], &val);
264 		if (err && errno != ENOENT) {
265 			warn("failed to lookup element: %s\n", strerror(errno));
266 			return false;
267 		}
268 		vals[j].count = val.count;
269 		vals[j].total_ns = val.total_ns;
270 		vals[j].key = keys[j];
271 		memcpy(vals[j].comm, val.comm, TASK_COMM_LEN);
272 	}
273 
274 	/* There is a race here: system calls which are represented by keys
275 	 * above and happened between lookup and delete will be ignored.  This
276 	 * will be fixed in future by using bpf_map_lookup_and_delete_batch,
277 	 * but this function is too fresh to use it in bcc. */
278 
279 	for (j = 0; j < i; j++) {
280 		err = bpf_map_delete_elem(fd, &keys[j]);
281 		if (err) {
282 			warn("failed to delete element: %s\n", strerror(errno));
283 			return false;
284 		}
285 	}
286 
287 	*count = i;
288 	return true;
289 }
290 
parse_arg(int key,char * arg,struct argp_state * state)291 static error_t parse_arg(int key, char *arg, struct argp_state *state)
292 {
293 	int number;
294 	int err;
295 
296 	switch (key) {
297 	case 'h':
298 		argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
299 		break;
300 	case 'v':
301 		env.verbose = true;
302 		break;
303 	case 'x':
304 		env.failures = true;
305 		break;
306 	case 'L':
307 		env.latency = true;
308 		break;
309 	case 'm':
310 		env.milliseconds = true;
311 		break;
312 	case 'P':
313 		env.process = true;
314 		break;
315 	case 'p':
316 		err = get_int(arg, &env.pid, 1, INT_MAX);
317 		if (err) {
318 			warn("invalid PID: %s\n", arg);
319 			argp_usage(state);
320 		}
321 		break;
322 	case 'i':
323 		err = get_int(arg, &env.interval, 0, INT_MAX);
324 		if (err) {
325 			warn("invalid INTERVAL: %s\n", arg);
326 			argp_usage(state);
327 		}
328 		break;
329 	case 'd':
330 		err = get_int(arg, &env.duration, 1, INT_MAX);
331 		if (err) {
332 			warn("invalid DURATION: %s\n", arg);
333 			argp_usage(state);
334 		}
335 		break;
336 	case 'T':
337 		err = get_int(arg, &env.top, 1, INT_MAX);
338 		if (err) {
339 			warn("invalid TOP: %s\n", arg);
340 			argp_usage(state);
341 		}
342 		break;
343 	case 'c':
344 		env.cgroupspath = arg;
345 		env.cg = true;
346 		break;
347 	case 'e':
348 		err = get_int(arg, &number, 1, INT_MAX);
349 		if (err) {
350 			number = errno_by_name(arg);
351 			if (number < 0) {
352 				warn("invalid errno: %s (bad, or can't "
353 				     "parse dynamically; consider using "
354 				     "numeric value and/or installing the "
355 				     "errno program from moreutils)\n", arg);
356 				argp_usage(state);
357 			}
358 		}
359 		env.filter_errno = number;
360 		break;
361 	case 'l':
362 		env.list_syscalls = true;
363 		break;
364 	default:
365 		return ARGP_ERR_UNKNOWN;
366 	}
367 	return 0;
368 }
369 
370 static volatile sig_atomic_t hang_on = 1;
371 
sig_int(int signo)372 void sig_int(int signo)
373 {
374 	hang_on = 0;
375 }
376 
main(int argc,char ** argv)377 int main(int argc, char **argv)
378 {
379 	LIBBPF_OPTS(bpf_object_open_opts, open_opts);
380 	void (*print)(struct data_ext_t *, size_t);
381 	int (*compar)(const void *, const void *);
382 	static const struct argp argp = {
383 		.options = opts,
384 		.parser = parse_arg,
385 		.doc = argp_program_doc,
386 	};
387 	struct data_ext_t vals[MAX_ENTRIES];
388 	struct syscount_bpf *obj;
389 	int seconds = 0;
390 	__u32 count;
391 	int err;
392 	int idx, cg_map_fd;
393 	int cgfd = -1;
394 
395 	init_syscall_names();
396 
397 	err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
398 	if (err)
399 		goto free_names;
400 
401 	if (env.list_syscalls) {
402 		list_syscalls();
403 		goto free_names;
404 	}
405 
406 	libbpf_set_print(libbpf_print_fn);
407 
408 	err = ensure_core_btf(&open_opts);
409 	if (err) {
410 		fprintf(stderr, "failed to fetch necessary BTF for CO-RE: %s\n", strerror(-err));
411 		return 1;
412 	}
413 
414 	obj = syscount_bpf__open_opts(&open_opts);
415 	if (!obj) {
416 		warn("failed to open BPF object\n");
417 		err = 1;
418 		goto free_names;
419 	}
420 
421 	if (env.pid)
422 		obj->rodata->filter_pid = env.pid;
423 	if (env.failures)
424 		obj->rodata->filter_failed = true;
425 	if (env.latency)
426 		obj->rodata->measure_latency = true;
427 	if (env.process)
428 		obj->rodata->count_by_process = true;
429 	if (env.filter_errno)
430 		obj->rodata->filter_errno = env.filter_errno;
431 	if (env.cg)
432 		obj->rodata->filter_cg = env.cg;
433 
434 	err = syscount_bpf__load(obj);
435 	if (err) {
436 		warn("failed to load BPF object: %s\n", strerror(-err));
437 		goto cleanup_obj;
438 	}
439 
440 	/* update cgroup path fd to map */
441 	if (env.cg) {
442 		idx = 0;
443 		cg_map_fd = bpf_map__fd(obj->maps.cgroup_map);
444 		cgfd = open(env.cgroupspath, O_RDONLY);
445 		if (cgfd < 0) {
446 			fprintf(stderr, "Failed opening Cgroup path: %s", env.cgroupspath);
447 			goto cleanup_obj;
448 		}
449 		if (bpf_map_update_elem(cg_map_fd, &idx, &cgfd, BPF_ANY)) {
450 			fprintf(stderr, "Failed adding target cgroup to map");
451 			goto cleanup_obj;
452 		}
453 	}
454 
455 	obj->links.sys_exit = bpf_program__attach(obj->progs.sys_exit);
456 	if (!obj->links.sys_exit) {
457 		err = -errno;
458 		warn("failed to attach sys_exit program: %s\n", strerror(-err));
459 		goto cleanup_obj;
460 	}
461 	if (env.latency) {
462 		obj->links.sys_enter = bpf_program__attach(obj->progs.sys_enter);
463 		if (!obj->links.sys_enter) {
464 			err = -errno;
465 			warn("failed to attach sys_enter programs: %s\n",
466 			     strerror(-err));
467 			goto cleanup_obj;
468 		}
469 	}
470 
471 	if (signal(SIGINT, sig_int) == SIG_ERR) {
472 		warn("can't set signal handler: %s\n", strerror(errno));
473 		goto cleanup_obj;
474 	}
475 
476 	compar = env.latency ? compar_latency : compar_count;
477 	print = env.latency ? print_latency : print_count;
478 
479 	printf("Tracing syscalls, printing top %d... Ctrl+C to quit.\n", env.top);
480 	while (hang_on) {
481 		sleep(env.interval ?: 1);
482 		if (env.duration) {
483 			seconds += env.interval ?: 1;
484 			if (seconds >= env.duration)
485 				hang_on = 0;
486 		}
487 		if (hang_on && !env.interval)
488 			continue;
489 
490 		count = MAX_ENTRIES;
491 		if (!read_vals(bpf_map__fd(obj->maps.data), vals, &count))
492 			break;
493 		if (!count)
494 			continue;
495 
496 		qsort(vals, count, sizeof(vals[0]), compar);
497 		print_timestamp();
498 		print(vals, count);
499 	}
500 
501 cleanup_obj:
502 	syscount_bpf__destroy(obj);
503 free_names:
504 	free_syscall_names();
505 	cleanup_core_btf(&open_opts);
506 	if (cgfd > 0)
507 		close(cgfd);
508 
509 	return err != 0;
510 }
511