1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3 /*
4 * biotop Trace block I/O by process.
5 * Copyright (c) 2022 Francis Laniel <[email protected]>
6 *
7 * Based on biotop(8) from BCC by Brendan Gregg.
8 * 03-Mar-2022 Francis Laniel Created this.
9 */
10 #ifndef _GNU_SOURCE
11 #define _GNU_SOURCE
12 #endif
13 #include <argp.h>
14 #include <errno.h>
15 #include <signal.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <time.h>
20 #include <unistd.h>
21
22 #include <bpf/libbpf.h>
23 #include <bpf/bpf.h>
24 #include "biotop.h"
25 #include "biotop.skel.h"
26 #include "compat.h"
27 #include "trace_helpers.h"
28
29 #define warn(...) fprintf(stderr, __VA_ARGS__)
30 #define OUTPUT_ROWS_LIMIT 10240
31
32 enum SORT {
33 ALL,
34 IO,
35 BYTES,
36 TIME,
37 };
38
39 struct disk {
40 int major;
41 int minor;
42 char name[256];
43 };
44
45 struct vector {
46 size_t nr;
47 size_t capacity;
48 void **elems;
49 };
50
grow_vector(struct vector * vector)51 int grow_vector(struct vector *vector) {
52 if (vector->nr >= vector->capacity) {
53 void **reallocated;
54
55 if (!vector->capacity)
56 vector->capacity = 1;
57 else
58 vector->capacity *= 2;
59
60 reallocated = libbpf_reallocarray(vector->elems, vector->capacity, sizeof(*vector->elems));
61 if (!reallocated)
62 return -1;
63
64 vector->elems = reallocated;
65 }
66
67 return 0;
68 }
69
free_vector(struct vector vector)70 void free_vector(struct vector vector) {
71 for (size_t i = 0; i < vector.nr; i++)
72 if (vector.elems[i] != NULL)
73 free(vector.elems[i]);
74 free(vector.elems);
75 }
76
77 struct vector disks = {};
78
79 static volatile sig_atomic_t exiting = 0;
80
81 static bool clear_screen = true;
82 static int output_rows = 20;
83 static int sort_by = ALL;
84 static int interval = 1;
85 static int count = 99999999;
86 static bool verbose = false;
87
88 const char *argp_program_version = "biotop 0.1";
89 const char *argp_program_bug_address =
90 "https://github.com/iovisor/bcc/tree/master/libbpf-tools";
91 const char argp_program_doc[] =
92 "Trace file reads/writes by process.\n"
93 "\n"
94 "USAGE: biotop [-h] [interval] [count]\n"
95 "\n"
96 "EXAMPLES:\n"
97 " biotop # file I/O top, refresh every 1s\n"
98 " biotop 5 10 # 5s summaries, 10 times\n";
99
100 static const struct argp_option opts[] = {
101 { "noclear", 'C', NULL, 0, "Don't clear the screen" },
102 { "sort", 's', "SORT", 0, "Sort columns, default all [all, io, bytes, time]" },
103 { "rows", 'r', "ROWS", 0, "Maximum rows to print, default 20" },
104 { "verbose", 'v', NULL, 0, "Verbose debug output" },
105 { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
106 {},
107 };
108
parse_arg(int key,char * arg,struct argp_state * state)109 static error_t parse_arg(int key, char *arg, struct argp_state *state)
110 {
111 long rows;
112 static int pos_args;
113
114 switch (key) {
115 case 'C':
116 clear_screen = false;
117 break;
118 case 's':
119 if (!strcmp(arg, "all")) {
120 sort_by = ALL;
121 } else if (!strcmp(arg, "io")) {
122 sort_by = IO;
123 } else if (!strcmp(arg, "bytes")) {
124 sort_by = BYTES;
125 } else if (!strcmp(arg, "time")) {
126 sort_by = TIME;
127 } else {
128 warn("invalid sort method: %s\n", arg);
129 argp_usage(state);
130 }
131 break;
132 case 'r':
133 errno = 0;
134 rows = strtol(arg, NULL, 10);
135 if (errno || rows <= 0) {
136 warn("invalid rows: %s\n", arg);
137 argp_usage(state);
138 }
139 output_rows = rows;
140 if (output_rows > OUTPUT_ROWS_LIMIT)
141 output_rows = OUTPUT_ROWS_LIMIT;
142 break;
143 case 'v':
144 verbose = true;
145 break;
146 case 'h':
147 argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
148 break;
149 case ARGP_KEY_ARG:
150 errno = 0;
151 if (pos_args == 0) {
152 interval = strtol(arg, NULL, 10);
153 if (errno || interval <= 0) {
154 warn("invalid interval\n");
155 argp_usage(state);
156 }
157 } else if (pos_args == 1) {
158 count = strtol(arg, NULL, 10);
159 if (errno || count <= 0) {
160 warn("invalid count\n");
161 argp_usage(state);
162 }
163 } else {
164 warn("unrecognized positional argument: %s\n", arg);
165 argp_usage(state);
166 }
167 pos_args++;
168 break;
169 default:
170 return ARGP_ERR_UNKNOWN;
171 }
172 return 0;
173 }
174
libbpf_print_fn(enum libbpf_print_level level,const char * format,va_list args)175 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
176 {
177 if (level == LIBBPF_DEBUG && !verbose)
178 return 0;
179 return vfprintf(stderr, format, args);
180 }
181
sig_int(int signo)182 static void sig_int(int signo)
183 {
184 exiting = 1;
185 }
186
187 struct data_t {
188 struct info_t key;
189 struct val_t value;
190 };
191
sort_column(const void * obj1,const void * obj2)192 static int sort_column(const void *obj1, const void *obj2)
193 {
194 struct data_t *d1 = (struct data_t *) obj1;
195 struct data_t *d2 = (struct data_t *) obj2;
196
197 struct val_t *s1 = &d1->value;
198 struct val_t *s2 = &d2->value;
199
200 if (sort_by == IO)
201 return s2->io - s1->io;
202 else if (sort_by == BYTES)
203 return s2->bytes - s1->bytes;
204 else if (sort_by == TIME)
205 return s2->us - s1->us;
206 else
207 return (s2->io + s2->bytes + s2->us)
208 - (s1->io + s1->bytes + s1->us);
209 }
210
parse_disk_stat(void)211 static void parse_disk_stat(void)
212 {
213 FILE *fp;
214 char *line;
215 size_t zero;
216
217 fp = fopen("/proc/diskstats", "r");
218 if (!fp)
219 return;
220
221 zero = 0;
222 while (getline(&line, &zero, fp) != -1) {
223 struct disk disk;
224
225 if (sscanf(line, "%d %d %s", &disk.major, &disk.minor, disk.name) != 3)
226 continue;
227
228 if (grow_vector(&disks) == -1)
229 goto err;
230
231 disks.elems[disks.nr] = malloc(sizeof(disk));
232 if (!disks.elems[disks.nr])
233 goto err;
234
235 memcpy(disks.elems[disks.nr], &disk, sizeof(disk));
236
237 disks.nr++;
238 }
239
240 free(line);
241 fclose(fp);
242
243 return;
244 err:
245 fprintf(stderr, "realloc or malloc failed\n");
246
247 free_vector(disks);
248 }
249
search_disk_name(int major,int minor)250 static char *search_disk_name(int major, int minor)
251 {
252 for (size_t i = 0; i < disks.nr; i++) {
253 struct disk *diskp;
254
255 if (!disks.elems[i])
256 continue;
257
258 diskp = (struct disk *) disks.elems[i];
259 if (diskp->major == major && diskp->minor == minor)
260 return diskp->name;
261 }
262
263 return "";
264 }
265
print_stat(struct biotop_bpf * obj)266 static int print_stat(struct biotop_bpf *obj)
267 {
268 FILE *f;
269 time_t t;
270 struct tm *tm;
271 char ts[16], buf[256];
272 struct info_t *prev_key = NULL;
273 static struct data_t datas[OUTPUT_ROWS_LIMIT];
274 int n, i, err = 0, rows = 0;
275 int fd = bpf_map__fd(obj->maps.counts);
276
277 f = fopen("/proc/loadavg", "r");
278 if (f) {
279 time(&t);
280 tm = localtime(&t);
281 strftime(ts, sizeof(ts), "%H:%M:%S", tm);
282 memset(buf, 0, sizeof(buf));
283 n = fread(buf, 1, sizeof(buf), f);
284 if (n)
285 printf("%8s loadavg: %s\n", ts, buf);
286 fclose(f);
287 }
288 printf("%-7s %-16s %1s %-3s %-3s %-8s %5s %7s %6s\n",
289 "PID", "COMM", "D", "MAJ", "MIN", "DISK", "I/O", "Kbytes", "AVGms");
290
291 while (1) {
292 err = bpf_map_get_next_key(fd, prev_key, &datas[rows].key);
293 if (err) {
294 if (errno == ENOENT) {
295 err = 0;
296 break;
297 }
298 warn("bpf_map_get_next_key failed: %s\n", strerror(errno));
299 return err;
300 }
301 err = bpf_map_lookup_elem(fd, &datas[rows].key, &datas[rows].value);
302 if (err) {
303 warn("bpf_map_lookup_elem failed: %s\n", strerror(errno));
304 return err;
305 }
306 prev_key = &datas[rows].key;
307 rows++;
308 }
309
310 qsort(datas, rows, sizeof(struct data_t), sort_column);
311 rows = rows < output_rows ? rows : output_rows;
312 for (i = 0; i < rows; i++) {
313 int major;
314 int minor;
315 struct info_t *key = &datas[i].key;
316 struct val_t *value = &datas[i].value;
317 float avg_ms = 0;
318
319 /* To avoid floating point exception. */
320 if (value->io)
321 avg_ms = ((float) value->us) / 1000 / value->io;
322
323 major = key->major;
324 minor = key->minor;
325
326 printf("%-7d %-16s %1s %-3d %-3d %-8s %5d %7lld %6.2f\n",
327 key->pid, key->name, key->rwflag ? "W": "R",
328 major, minor, search_disk_name(major, minor),
329 value->io, value->bytes / 1024, avg_ms);
330 }
331
332 printf("\n");
333 prev_key = NULL;
334
335 while (1) {
336 struct info_t key;
337
338 err = bpf_map_get_next_key(fd, prev_key, &key);
339 if (err) {
340 if (errno == ENOENT) {
341 err = 0;
342 break;
343 }
344 warn("bpf_map_get_next_key failed: %s\n", strerror(errno));
345 return err;
346 }
347 err = bpf_map_delete_elem(fd, &key);
348 if (err) {
349 warn("bpf_map_delete_elem failed: %s\n", strerror(errno));
350 return err;
351 }
352 prev_key = &key;
353 }
354 return err;
355 }
356
has_block_io_tracepoints(void)357 static bool has_block_io_tracepoints(void)
358 {
359 return tracepoint_exists("block", "block_io_start") &&
360 tracepoint_exists("block", "block_io_done");
361 }
362
disable_block_io_tracepoints(struct biotop_bpf * obj)363 static void disable_block_io_tracepoints(struct biotop_bpf *obj)
364 {
365 bpf_program__set_autoload(obj->progs.block_io_start, false);
366 bpf_program__set_autoload(obj->progs.block_io_done, false);
367 }
368
disable_blk_account_io_kprobes(struct biotop_bpf * obj)369 static void disable_blk_account_io_kprobes(struct biotop_bpf *obj)
370 {
371 bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
372 bpf_program__set_autoload(obj->progs.blk_account_io_done, false);
373 bpf_program__set_autoload(obj->progs.__blk_account_io_start, false);
374 bpf_program__set_autoload(obj->progs.__blk_account_io_done, false);
375 }
376
blk_account_io_set_autoload(struct biotop_bpf * obj,struct ksyms * ksyms)377 static void blk_account_io_set_autoload(struct biotop_bpf *obj,
378 struct ksyms *ksyms)
379 {
380 if (!ksyms__get_symbol(ksyms, "__blk_account_io_start")) {
381 bpf_program__set_autoload(obj->progs.__blk_account_io_start, false);
382 bpf_program__set_autoload(obj->progs.__blk_account_io_done, false);
383 } else {
384 bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
385 bpf_program__set_autoload(obj->progs.blk_account_io_done, false);
386 }
387 }
388
main(int argc,char ** argv)389 int main(int argc, char **argv)
390 {
391 static const struct argp argp = {
392 .options = opts,
393 .parser = parse_arg,
394 .doc = argp_program_doc,
395 };
396 struct biotop_bpf *obj;
397 struct ksyms *ksyms;
398 int err;
399
400 err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
401 if (err)
402 return err;
403
404 libbpf_set_print(libbpf_print_fn);
405
406 obj = biotop_bpf__open();
407 if (!obj) {
408 warn("failed to open BPF object\n");
409 return 1;
410 }
411
412 parse_disk_stat();
413
414 ksyms = ksyms__load();
415 if (!ksyms) {
416 err = -ENOMEM;
417 warn("failed to load kallsyms\n");
418 goto cleanup;
419 }
420
421 if (has_block_io_tracepoints())
422 disable_blk_account_io_kprobes(obj);
423 else {
424 disable_block_io_tracepoints(obj);
425 blk_account_io_set_autoload(obj, ksyms);
426 }
427
428 err = biotop_bpf__load(obj);
429 if (err) {
430 warn("failed to load BPF object: %d\n", err);
431 goto cleanup;
432 }
433
434 err = biotop_bpf__attach(obj);
435 if (err) {
436 warn("failed to attach BPF programs: %d\n", err);
437 goto cleanup;
438 }
439
440 if (signal(SIGINT, sig_int) == SIG_ERR) {
441 warn("can't set signal handler: %s\n", strerror(errno));
442 err = 1;
443 goto cleanup;
444 }
445
446 while (1) {
447 sleep(interval);
448
449 if (clear_screen) {
450 err = system("clear");
451 if (err)
452 goto cleanup;
453 }
454
455 err = print_stat(obj);
456 if (err)
457 goto cleanup;
458
459 count--;
460 if (exiting || !count)
461 goto cleanup;
462 }
463
464 cleanup:
465 ksyms__free(ksyms);
466 free_vector(disks);
467 biotop_bpf__destroy(obj);
468
469 return err != 0;
470 }
471