1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2020 Wenbo Zhang
3 #include <vmlinux.h>
4 #include <bpf/bpf_helpers.h>
5 #include <bpf/bpf_core_read.h>
6 #include <bpf/bpf_tracing.h>
7 #include "biosnoop.h"
8 #include "core_fixes.bpf.h"
9
10 #define MAX_ENTRIES 10240
11
12 const volatile bool filter_cg = false;
13 const volatile bool targ_queued = false;
14 const volatile bool filter_dev = false;
15 const volatile __u32 targ_dev = 0;
16 const volatile __u64 min_ns = 0;
17
18 extern __u32 LINUX_KERNEL_VERSION __kconfig;
19
20 struct {
21 __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
22 __type(key, u32);
23 __type(value, u32);
24 __uint(max_entries, 1);
25 } cgroup_map SEC(".maps");
26
27 struct piddata {
28 char comm[TASK_COMM_LEN];
29 u32 pid;
30 };
31
32 struct {
33 __uint(type, BPF_MAP_TYPE_HASH);
34 __uint(max_entries, MAX_ENTRIES);
35 __type(key, struct request *);
36 __type(value, struct piddata);
37 } infobyreq SEC(".maps");
38
39 struct stage {
40 u64 insert;
41 u64 issue;
42 __u32 dev;
43 };
44
45 struct {
46 __uint(type, BPF_MAP_TYPE_HASH);
47 __uint(max_entries, MAX_ENTRIES);
48 __type(key, struct request *);
49 __type(value, struct stage);
50 } start SEC(".maps");
51
52 struct {
53 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
54 __uint(key_size, sizeof(u32));
55 __uint(value_size, sizeof(u32));
56 } events SEC(".maps");
57
58 static __always_inline
trace_pid(struct request * rq)59 int trace_pid(struct request *rq)
60 {
61 u64 id = bpf_get_current_pid_tgid();
62 struct piddata piddata = {};
63
64 piddata.pid = id >> 32;
65 bpf_get_current_comm(&piddata.comm, sizeof(&piddata.comm));
66 bpf_map_update_elem(&infobyreq, &rq, &piddata, 0);
67 return 0;
68 }
69
70 SEC("fentry/blk_account_io_start")
BPF_PROG(blk_account_io_start,struct request * rq)71 int BPF_PROG(blk_account_io_start, struct request *rq)
72 {
73 if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
74 return 0;
75
76 return trace_pid(rq);
77 }
78
79 SEC("tp_btf/block_io_start")
BPF_PROG(block_io_start,struct request * rq)80 int BPF_PROG(block_io_start, struct request *rq)
81 {
82 if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
83 return 0;
84
85 return trace_pid(rq);
86 }
87
88 SEC("kprobe/blk_account_io_merge_bio")
BPF_KPROBE(blk_account_io_merge_bio,struct request * rq)89 int BPF_KPROBE(blk_account_io_merge_bio, struct request *rq)
90 {
91 if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
92 return 0;
93
94 return trace_pid(rq);
95 }
96
97 static __always_inline
trace_rq_start(struct request * rq,bool insert)98 int trace_rq_start(struct request *rq, bool insert)
99 {
100 struct stage *stagep, stage = {};
101 u64 ts = bpf_ktime_get_ns();
102
103 stagep = bpf_map_lookup_elem(&start, &rq);
104 if (!stagep) {
105 struct gendisk *disk = get_disk(rq);
106
107 stage.dev = disk ? MKDEV(BPF_CORE_READ(disk, major),
108 BPF_CORE_READ(disk, first_minor)) : 0;
109 if (filter_dev && targ_dev != stage.dev)
110 return 0;
111 stagep = &stage;
112 }
113 if (insert)
114 stagep->insert = ts;
115 else
116 stagep->issue = ts;
117 if (stagep == &stage)
118 bpf_map_update_elem(&start, &rq, stagep, 0);
119 return 0;
120 }
121
122 SEC("tp_btf/block_rq_insert")
BPF_PROG(block_rq_insert)123 int BPF_PROG(block_rq_insert)
124 {
125 if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
126 return 0;
127
128 /**
129 * commit a54895fa (v5.11-rc1) changed tracepoint argument list
130 * from TP_PROTO(struct request_queue *q, struct request *rq)
131 * to TP_PROTO(struct request *rq)
132 */
133 if (LINUX_KERNEL_VERSION >= KERNEL_VERSION(5, 11, 0))
134 return trace_rq_start((void *)ctx[0], true);
135 else
136 return trace_rq_start((void *)ctx[1], true);
137 }
138
139 SEC("tp_btf/block_rq_issue")
BPF_PROG(block_rq_issue)140 int BPF_PROG(block_rq_issue)
141 {
142 if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
143 return 0;
144
145 /**
146 * commit a54895fa (v5.11-rc1) changed tracepoint argument list
147 * from TP_PROTO(struct request_queue *q, struct request *rq)
148 * to TP_PROTO(struct request *rq)
149 */
150 if (LINUX_KERNEL_VERSION >= KERNEL_VERSION(5, 11, 0))
151 return trace_rq_start((void *)ctx[0], false);
152 else
153 return trace_rq_start((void *)ctx[1], false);
154 }
155
156 SEC("tp_btf/block_rq_complete")
BPF_PROG(block_rq_complete,struct request * rq,int error,unsigned int nr_bytes)157 int BPF_PROG(block_rq_complete, struct request *rq, int error,
158 unsigned int nr_bytes)
159 {
160 if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
161 return 0;
162
163 u64 ts = bpf_ktime_get_ns();
164 struct piddata *piddatap;
165 struct event event = {};
166 struct stage *stagep;
167 s64 delta;
168
169 stagep = bpf_map_lookup_elem(&start, &rq);
170 if (!stagep)
171 return 0;
172 delta = (s64)(ts - stagep->issue);
173 if (delta < 0 || delta < min_ns)
174 goto cleanup;
175 piddatap = bpf_map_lookup_elem(&infobyreq, &rq);
176 if (!piddatap) {
177 event.comm[0] = '?';
178 } else {
179 __builtin_memcpy(&event.comm, piddatap->comm,
180 sizeof(event.comm));
181 event.pid = piddatap->pid;
182 }
183 event.delta = delta;
184 if (targ_queued && BPF_CORE_READ(rq, q, elevator)) {
185 if (!stagep->insert)
186 event.qdelta = -1; /* missed or don't insert entry */
187 else
188 event.qdelta = stagep->issue - stagep->insert;
189 }
190 event.ts = ts;
191 event.sector = BPF_CORE_READ(rq, __sector);
192 event.len = BPF_CORE_READ(rq, __data_len);
193 event.cmd_flags = BPF_CORE_READ(rq, cmd_flags);
194 event.dev = stagep->dev;
195 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event,
196 sizeof(event));
197
198 cleanup:
199 bpf_map_delete_elem(&start, &rq);
200 bpf_map_delete_elem(&infobyreq, &rq);
201 return 0;
202 }
203
204 char LICENSE[] SEC("license") = "GPL";
205