xref: /aosp_15_r20/external/bcc/libbpf-tools/biosnoop.c (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2 // Copyright (c) 2020 Wenbo Zhang
3 //
4 // Based on biosnoop(8) from BCC by Brendan Gregg.
5 // 29-Jun-2020   Wenbo Zhang   Created this.
6 #include <argp.h>
7 #include <signal.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <unistd.h>
11 #include <time.h>
12 #include <bpf/libbpf.h>
13 #include <sys/resource.h>
14 #include <bpf/bpf.h>
15 #include <fcntl.h>
16 #include "blk_types.h"
17 #include "biosnoop.h"
18 #include "biosnoop.skel.h"
19 #include "trace_helpers.h"
20 
21 #define PERF_BUFFER_PAGES	16
22 #define PERF_POLL_TIMEOUT_MS	100
23 
24 static volatile sig_atomic_t exiting = 0;
25 
26 static struct env {
27 	__u64 min_lat_ms;
28 	char *disk;
29 	int duration;
30 	bool timestamp;
31 	bool queued;
32 	bool verbose;
33 	char *cgroupspath;
34 	bool cg;
35 } env = {};
36 
37 static volatile __u64 start_ts;
38 
39 const char *argp_program_version = "biosnoop 0.1";
40 const char *argp_program_bug_address =
41 	"https://github.com/iovisor/bcc/tree/master/libbpf-tools";
42 const char argp_program_doc[] =
43 "Trace block I/O.\n"
44 "\n"
45 "USAGE: biosnoop [--help] [-d DISK] [-c CG] [-Q]\n"
46 "\n"
47 "EXAMPLES:\n"
48 "    biosnoop              # trace all block I/O\n"
49 "    biosnoop -Q           # include OS queued time in I/O time\n"
50 "    biosnoop -t           # use timestamps instead\n"
51 "    biosnoop 10           # trace for 10 seconds only\n"
52 "    biosnoop -d sdc       # trace sdc only\n"
53 "    biosnoop -c CG        # Trace process under cgroupsPath CG\n"
54 "    biosnoop -m 1         # trace for slower than 1ms\n";
55 
56 static const struct argp_option opts[] = {
57 	{ "queued", 'Q', NULL, 0, "Include OS queued time in I/O time" },
58 	{ "disk",  'd', "DISK",  0, "Trace this disk only" },
59 	{ "verbose", 'v', NULL, 0, "Verbose debug output" },
60 	{ "cgroup", 'c', "/sys/fs/cgroup/unified/CG", 0, "Trace process in cgroup path"},
61 	{ "min", 'm', "MIN", 0, "Min latency to trace, in ms" },
62 	{ "timestamp", 't', NULL, 0, "Include timestamp on output" },
63 	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
64 	{},
65 };
66 
parse_arg(int key,char * arg,struct argp_state * state)67 static error_t parse_arg(int key, char *arg, struct argp_state *state)
68 {
69 	static int pos_args;
70 
71 	switch (key) {
72 	case 'h':
73 		argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
74 		break;
75 	case 'v':
76 		env.verbose = true;
77 		break;
78 	case 'Q':
79 		env.queued = true;
80 		break;
81 	case 'c':
82 		env.cg = true;
83 		env.cgroupspath = arg;
84 		break;
85 	case 'd':
86 		env.disk = arg;
87 		if (strlen(arg) + 1 > DISK_NAME_LEN) {
88 			fprintf(stderr, "invaild disk name: too long\n");
89 			argp_usage(state);
90 		}
91 		break;
92 	case 'm':
93 		errno = 0;
94 		env.min_lat_ms = strtoll(arg, NULL, 10);
95 		if (errno) {
96 			fprintf(stderr, "invalid latency (in us): %s\n", arg);
97 			argp_usage(state);
98 		}
99 		break;
100 	case 't':
101 		env.timestamp = true;
102 		break;
103 	case ARGP_KEY_ARG:
104 		if (pos_args++) {
105 			fprintf(stderr,
106 				"unrecognized positional argument: %s\n", arg);
107 			argp_usage(state);
108 		}
109 		errno = 0;
110 		env.duration = strtoll(arg, NULL, 10);
111 		if (errno || env.duration <= 0) {
112 			fprintf(stderr, "invalid delay (in us): %s\n", arg);
113 			argp_usage(state);
114 		}
115 		break;
116 	default:
117 		return ARGP_ERR_UNKNOWN;
118 	}
119 	return 0;
120 }
121 
libbpf_print_fn(enum libbpf_print_level level,const char * format,va_list args)122 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
123 {
124 	if (level == LIBBPF_DEBUG && !env.verbose)
125 		return 0;
126 	return vfprintf(stderr, format, args);
127 }
128 
sig_int(int signo)129 static void sig_int(int signo)
130 {
131 	exiting = 1;
132 }
133 
blk_fill_rwbs(char * rwbs,unsigned int op)134 static void blk_fill_rwbs(char *rwbs, unsigned int op)
135 {
136 	int i = 0;
137 
138 	if (op & REQ_PREFLUSH)
139 		rwbs[i++] = 'F';
140 
141 	switch (op & REQ_OP_MASK) {
142 	case REQ_OP_WRITE:
143 	case REQ_OP_WRITE_SAME:
144 		rwbs[i++] = 'W';
145 		break;
146 	case REQ_OP_DISCARD:
147 		rwbs[i++] = 'D';
148 		break;
149 	case REQ_OP_SECURE_ERASE:
150 		rwbs[i++] = 'D';
151 		rwbs[i++] = 'E';
152 		break;
153 	case REQ_OP_FLUSH:
154 		rwbs[i++] = 'F';
155 		break;
156 	case REQ_OP_READ:
157 		rwbs[i++] = 'R';
158 		break;
159 	default:
160 		rwbs[i++] = 'N';
161 	}
162 
163 	if (op & REQ_FUA)
164 		rwbs[i++] = 'F';
165 	if (op & REQ_RAHEAD)
166 		rwbs[i++] = 'A';
167 	if (op & REQ_SYNC)
168 		rwbs[i++] = 'S';
169 	if (op & REQ_META)
170 		rwbs[i++] = 'M';
171 
172 	rwbs[i] = '\0';
173 }
174 
175 static struct partitions *partitions;
176 
handle_event(void * ctx,int cpu,void * data,__u32 data_sz)177 void handle_event(void *ctx, int cpu, void *data, __u32 data_sz)
178 {
179 	const struct partition *partition;
180 	const struct event *e = data;
181 	char rwbs[RWBS_LEN];
182 	struct timespec ct;
183 	struct tm *tm;
184 	char ts[32];
185 
186 	if (env.timestamp) {
187 		/* Since `bpf_ktime_get_boot_ns` requires at least 5.8 kernel,
188 		 * so get time from usespace instead */
189 		clock_gettime(CLOCK_REALTIME, &ct);
190 		tm = localtime(&ct.tv_sec);
191 		strftime(ts, sizeof(ts), "%H:%M:%S", tm);
192 		printf("%-8s.%03ld ", ts, ct.tv_nsec / 1000000);
193 	} else {
194 		if (!start_ts) {
195 			start_ts = e->ts;
196 		}
197 		printf("%-11.6f ",(e->ts - start_ts) / 1000000000.0);
198 	}
199 	blk_fill_rwbs(rwbs, e->cmd_flags);
200 	partition = partitions__get_by_dev(partitions, e->dev);
201 	printf("%-14.14s %-7d %-7s %-4s %-10lld %-7d ",
202 		e->comm, e->pid, partition ? partition->name : "Unknown", rwbs,
203 		e->sector, e->len);
204 	if (env.queued)
205 		printf("%7.3f ", e->qdelta != -1 ?
206 			e->qdelta / 1000000.0 : -1);
207 	printf("%7.3f\n", e->delta / 1000000.0);
208 }
209 
handle_lost_events(void * ctx,int cpu,__u64 lost_cnt)210 void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt)
211 {
212 	fprintf(stderr, "lost %llu events on CPU #%d\n", lost_cnt, cpu);
213 }
214 
blk_account_io_set_attach_target(struct biosnoop_bpf * obj)215 static void blk_account_io_set_attach_target(struct biosnoop_bpf *obj)
216 {
217 	if (fentry_can_attach("blk_account_io_start", NULL))
218 		bpf_program__set_attach_target(obj->progs.blk_account_io_start,
219 					       0, "blk_account_io_start");
220 	else
221 		bpf_program__set_attach_target(obj->progs.blk_account_io_start,
222 					       0, "__blk_account_io_start");
223 }
224 
main(int argc,char ** argv)225 int main(int argc, char **argv)
226 {
227 	const struct partition *partition;
228 	static const struct argp argp = {
229 		.options = opts,
230 		.parser = parse_arg,
231 		.doc = argp_program_doc,
232 	};
233 	struct perf_buffer *pb = NULL;
234 	struct ksyms *ksyms = NULL;
235 	struct biosnoop_bpf *obj;
236 	__u64 time_end = 0;
237 	int err;
238 	int idx, cg_map_fd;
239 	int cgfd = -1;
240 
241 	err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
242 	if (err)
243 		return err;
244 
245 	libbpf_set_print(libbpf_print_fn);
246 
247 	obj = biosnoop_bpf__open();
248 	if (!obj) {
249 		fprintf(stderr, "failed to open BPF object\n");
250 		return 1;
251 	}
252 
253 	partitions = partitions__load();
254 	if (!partitions) {
255 		fprintf(stderr, "failed to load partitions info\n");
256 		goto cleanup;
257 	}
258 
259 	/* initialize global data (filtering options) */
260 	if (env.disk) {
261 		partition = partitions__get_by_name(partitions, env.disk);
262 		if (!partition) {
263 			fprintf(stderr, "invaild partition name: not exist\n");
264 			goto cleanup;
265 		}
266 		obj->rodata->filter_dev = true;
267 		obj->rodata->targ_dev = partition->dev;
268 	}
269 	obj->rodata->targ_queued = env.queued;
270 	obj->rodata->filter_cg = env.cg;
271 	obj->rodata->min_ns = env.min_lat_ms * 1000000;
272 
273 	if (tracepoint_exists("block", "block_io_start"))
274 		bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
275 	else {
276 		bpf_program__set_autoload(obj->progs.block_io_start, false);
277 		blk_account_io_set_attach_target(obj);
278 	}
279 
280 	ksyms = ksyms__load();
281 	if (!ksyms) {
282 		fprintf(stderr, "failed to load kallsyms\n");
283 		goto cleanup;
284 	}
285 	if (!ksyms__get_symbol(ksyms, "blk_account_io_merge_bio"))
286 		bpf_program__set_autoload(obj->progs.blk_account_io_merge_bio, false);
287 
288 	if (!env.queued)
289 		bpf_program__set_autoload(obj->progs.block_rq_insert, false);
290 
291 	err = biosnoop_bpf__load(obj);
292 	if (err) {
293 		fprintf(stderr, "failed to load BPF object: %d\n", err);
294 		goto cleanup;
295 	}
296 
297 	/* update cgroup path fd to map */
298 	if (env.cg) {
299 		idx = 0;
300 		cg_map_fd = bpf_map__fd(obj->maps.cgroup_map);
301 		cgfd = open(env.cgroupspath, O_RDONLY);
302 		if (cgfd < 0) {
303 			fprintf(stderr, "Failed opening Cgroup path: %s\n", env.cgroupspath);
304 			goto cleanup;
305 		}
306 		if (bpf_map_update_elem(cg_map_fd, &idx, &cgfd, BPF_ANY)) {
307 			fprintf(stderr, "Failed adding target cgroup to map\n");
308 			goto cleanup;
309 		}
310 	}
311 
312 	err = biosnoop_bpf__attach(obj);
313 	if (err) {
314 		fprintf(stderr, "failed to attach BPF programs: %d\n", err);
315 		goto cleanup;
316 	}
317 
318 	pb = perf_buffer__new(bpf_map__fd(obj->maps.events), PERF_BUFFER_PAGES,
319 			      handle_event, handle_lost_events, NULL, NULL);
320 	if (!pb) {
321 		err = -errno;
322 		fprintf(stderr, "failed to open perf buffer: %d\n", err);
323 		goto cleanup;
324 	}
325 
326 	if (env.timestamp) {
327 		printf("%-12s ", "TIMESTAMP");
328 	} else {
329 		printf("%-11s ", "TIME(s)");
330 	}
331 	printf("%-14s %-7s %-7s %-4s %-10s %-7s ",
332 		"COMM", "PID", "DISK", "T", "SECTOR", "BYTES");
333 	if (env.queued)
334 		printf("%7s ", "QUE(ms)");
335 	printf("%7s\n", "LAT(ms)");
336 
337 	/* setup duration */
338 	if (env.duration)
339 		time_end = get_ktime_ns() + env.duration * NSEC_PER_SEC;
340 
341 	if (signal(SIGINT, sig_int) == SIG_ERR) {
342 		fprintf(stderr, "can't set signal handler: %s\n", strerror(errno));
343 		err = 1;
344 		goto cleanup;
345 	}
346 
347 	/* main: poll */
348 	while (!exiting) {
349 		err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS);
350 		if (err < 0 && err != -EINTR) {
351 			fprintf(stderr, "error polling perf buffer: %s\n", strerror(-err));
352 			goto cleanup;
353 		}
354 		/* reset err to return 0 if exiting */
355 		err = 0;
356 		if (env.duration && get_ktime_ns() > time_end)
357 			break;
358 	}
359 
360 cleanup:
361 	perf_buffer__free(pb);
362 	biosnoop_bpf__destroy(obj);
363 	ksyms__free(ksyms);
364 	partitions__free(partitions);
365 	if (cgfd > 0)
366 		close(cgfd);
367 
368 	return err != 0;
369 }
370