1 /*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #pragma once
18
19 #include <android-base/unique_fd.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <linux/bpf.h>
23 #include <linux/unistd.h>
24 #include <sys/file.h>
25
26
27 namespace android {
28 namespace bpf {
29
30 using ::android::base::borrowed_fd;
31 using ::android::base::unique_fd;
32
ptr_to_u64(const void * const x)33 inline uint64_t ptr_to_u64(const void * const x) {
34 return (uint64_t)(uintptr_t)x;
35 }
36
37 /* Note: bpf_attr is a union which might have a much larger size then the anonymous struct portion
38 * of it that we are using. The kernel's bpf() system call will perform a strict check to ensure
39 * all unused portions are zero. It will fail with E2BIG if we don't fully zero bpf_attr.
40 */
41
bpf(enum bpf_cmd cmd,const bpf_attr & attr)42 inline int bpf(enum bpf_cmd cmd, const bpf_attr& attr) {
43 return syscall(__NR_bpf, cmd, &attr, sizeof(attr));
44 }
45
46 // this version is meant for use with cmd's which mutate the argument
bpf(enum bpf_cmd cmd,bpf_attr * attr)47 inline int bpf(enum bpf_cmd cmd, bpf_attr *attr) {
48 return syscall(__NR_bpf, cmd, attr, sizeof(*attr));
49 }
50
createMap(bpf_map_type map_type,uint32_t key_size,uint32_t value_size,uint32_t max_entries,uint32_t map_flags)51 inline int createMap(bpf_map_type map_type, uint32_t key_size, uint32_t value_size,
52 uint32_t max_entries, uint32_t map_flags) {
53 return bpf(BPF_MAP_CREATE, {
54 .map_type = map_type,
55 .key_size = key_size,
56 .value_size = value_size,
57 .max_entries = max_entries,
58 .map_flags = map_flags,
59 });
60 }
61
62 // Note:
63 // 'map_type' must be one of BPF_MAP_TYPE_{ARRAY,HASH}_OF_MAPS
64 // 'value_size' must be sizeof(u32), ie. 4
65 // 'inner_map_fd' is basically a template specifying {map_type, key_size, value_size, max_entries, map_flags}
66 // of the inner map type (and possibly only key_size/value_size actually matter?).
createOuterMap(bpf_map_type map_type,uint32_t key_size,uint32_t value_size,uint32_t max_entries,uint32_t map_flags,const borrowed_fd & inner_map_fd)67 inline int createOuterMap(bpf_map_type map_type, uint32_t key_size, uint32_t value_size,
68 uint32_t max_entries, uint32_t map_flags,
69 const borrowed_fd& inner_map_fd) {
70 return bpf(BPF_MAP_CREATE, {
71 .map_type = map_type,
72 .key_size = key_size,
73 .value_size = value_size,
74 .max_entries = max_entries,
75 .map_flags = map_flags,
76 .inner_map_fd = static_cast<__u32>(inner_map_fd.get()),
77 });
78 }
79
writeToMapEntry(const borrowed_fd & map_fd,const void * key,const void * value,uint64_t flags)80 inline int writeToMapEntry(const borrowed_fd& map_fd, const void* key, const void* value,
81 uint64_t flags) {
82 return bpf(BPF_MAP_UPDATE_ELEM, {
83 .map_fd = static_cast<__u32>(map_fd.get()),
84 .key = ptr_to_u64(key),
85 .value = ptr_to_u64(value),
86 .flags = flags,
87 });
88 }
89
findMapEntry(const borrowed_fd & map_fd,const void * key,void * value)90 inline int findMapEntry(const borrowed_fd& map_fd, const void* key, void* value) {
91 return bpf(BPF_MAP_LOOKUP_ELEM, {
92 .map_fd = static_cast<__u32>(map_fd.get()),
93 .key = ptr_to_u64(key),
94 .value = ptr_to_u64(value),
95 });
96 }
97
deleteMapEntry(const borrowed_fd & map_fd,const void * key)98 inline int deleteMapEntry(const borrowed_fd& map_fd, const void* key) {
99 return bpf(BPF_MAP_DELETE_ELEM, {
100 .map_fd = static_cast<__u32>(map_fd.get()),
101 .key = ptr_to_u64(key),
102 });
103 }
104
getNextMapKey(const borrowed_fd & map_fd,const void * key,void * next_key)105 inline int getNextMapKey(const borrowed_fd& map_fd, const void* key, void* next_key) {
106 return bpf(BPF_MAP_GET_NEXT_KEY, {
107 .map_fd = static_cast<__u32>(map_fd.get()),
108 .key = ptr_to_u64(key),
109 .next_key = ptr_to_u64(next_key),
110 });
111 }
112
getFirstMapKey(const borrowed_fd & map_fd,void * firstKey)113 inline int getFirstMapKey(const borrowed_fd& map_fd, void* firstKey) {
114 return getNextMapKey(map_fd, NULL, firstKey);
115 }
116
bpfFdPin(const borrowed_fd & map_fd,const char * pathname)117 inline int bpfFdPin(const borrowed_fd& map_fd, const char* pathname) {
118 return bpf(BPF_OBJ_PIN, {
119 .pathname = ptr_to_u64(pathname),
120 .bpf_fd = static_cast<__u32>(map_fd.get()),
121 });
122 }
123
bpfFdGet(const char * pathname,uint32_t flag)124 inline int bpfFdGet(const char* pathname, uint32_t flag) {
125 return bpf(BPF_OBJ_GET, {
126 .pathname = ptr_to_u64(pathname),
127 .file_flags = flag,
128 });
129 }
130
131 int bpfGetFdMapId(const borrowed_fd& map_fd);
132
bpfLock(int fd,short type)133 inline int bpfLock(int fd, short type) {
134 if (fd < 0) return fd; // pass any errors straight through
135 #ifdef BPF_MAP_LOCKLESS_FOR_TEST
136 return fd;
137 #endif
138 int mapId = bpfGetFdMapId(fd);
139 int saved_errno = errno;
140 // 4.14+ required to fetch map id, but we don't want to call isAtLeastKernelVersion
141 if (mapId == -1 && saved_errno == EINVAL) return fd;
142 if (mapId <= 0) abort(); // should not be possible
143
144 // on __LP64__ (aka. 64-bit userspace) 'struct flock64' is the same as 'struct flock'
145 struct flock64 fl = {
146 .l_type = type, // short: F_{RD,WR,UN}LCK
147 .l_whence = SEEK_SET, // short: SEEK_{SET,CUR,END}
148 .l_start = mapId, // off_t: start offset
149 .l_len = 1, // off_t: number of bytes
150 };
151
152 // see: bionic/libc/bionic/fcntl.cpp: iff !__LP64__ this uses fcntl64
153 int ret = fcntl(fd, F_OFD_SETLK, &fl);
154 if (!ret) return fd; // success
155 close(fd);
156 return ret; // most likely -1 with errno == EAGAIN, due to already held lock
157 }
158
mapRetrieveLocklessRW(const char * pathname)159 inline int mapRetrieveLocklessRW(const char* pathname) {
160 return bpfFdGet(pathname, 0);
161 }
162
mapRetrieveExclusiveRW(const char * pathname)163 inline int mapRetrieveExclusiveRW(const char* pathname) {
164 return bpfLock(mapRetrieveLocklessRW(pathname), F_WRLCK);
165 }
166
mapRetrieveRW(const char * pathname)167 inline int mapRetrieveRW(const char* pathname) {
168 return bpfLock(mapRetrieveLocklessRW(pathname), F_RDLCK);
169 }
170
mapRetrieveRO(const char * pathname)171 inline int mapRetrieveRO(const char* pathname) {
172 return bpfFdGet(pathname, BPF_F_RDONLY);
173 }
174
175 // WARNING: it's impossible to grab a shared (ie. read) lock on a write-only fd,
176 // so we instead choose to grab an exclusive (ie. write) lock.
mapRetrieveWO(const char * pathname)177 inline int mapRetrieveWO(const char* pathname) {
178 return bpfLock(bpfFdGet(pathname, BPF_F_WRONLY), F_WRLCK);
179 }
180
retrieveProgram(const char * pathname)181 inline int retrieveProgram(const char* pathname) {
182 return bpfFdGet(pathname, BPF_F_RDONLY);
183 }
184
usableProgram(const char * pathname)185 inline bool usableProgram(const char* pathname) {
186 unique_fd fd(retrieveProgram(pathname));
187 return fd.ok();
188 }
189
190 inline int attachProgram(bpf_attach_type type, const borrowed_fd& prog_fd,
191 const borrowed_fd& cg_fd, uint32_t flags = 0) {
192 return bpf(BPF_PROG_ATTACH, {
193 .target_fd = static_cast<__u32>(cg_fd.get()),
194 .attach_bpf_fd = static_cast<__u32>(prog_fd.get()),
195 .attach_type = type,
196 .attach_flags = flags,
197 });
198 }
199
detachProgram(bpf_attach_type type,const borrowed_fd & cg_fd)200 inline int detachProgram(bpf_attach_type type, const borrowed_fd& cg_fd) {
201 return bpf(BPF_PROG_DETACH, {
202 .target_fd = static_cast<__u32>(cg_fd.get()),
203 .attach_type = type,
204 });
205 }
206
207 inline int queryProgram(const borrowed_fd& cg_fd,
208 enum bpf_attach_type attach_type,
209 __u32 query_flags = 0,
210 __u32 attach_flags = 0) {
211 int prog_id = -1; // equivalent to an array of one integer.
212 bpf_attr arg = {
213 .query = {
214 .target_fd = static_cast<__u32>(cg_fd.get()),
215 .attach_type = attach_type,
216 .query_flags = query_flags,
217 .attach_flags = attach_flags,
218 .prog_ids = ptr_to_u64(&prog_id), // pointer to output array
219 .prog_cnt = 1, // in: space - nr of ints in the array, out: used
220 }
221 };
222 int v = bpf(BPF_PROG_QUERY, &arg);
223 if (v) return v; // error case
224 if (!arg.query.prog_cnt) return 0; // no program, kernel never returns zero id
225 return prog_id; // return actual id
226 }
227
detachSingleProgram(bpf_attach_type type,const borrowed_fd & prog_fd,const borrowed_fd & cg_fd)228 inline int detachSingleProgram(bpf_attach_type type, const borrowed_fd& prog_fd,
229 const borrowed_fd& cg_fd) {
230 return bpf(BPF_PROG_DETACH, {
231 .target_fd = static_cast<__u32>(cg_fd.get()),
232 .attach_bpf_fd = static_cast<__u32>(prog_fd.get()),
233 .attach_type = type,
234 });
235 }
236
237 // Available in 4.12 and later kernels.
runProgram(const borrowed_fd & prog_fd,const void * data,const uint32_t data_size)238 inline int runProgram(const borrowed_fd& prog_fd, const void* data,
239 const uint32_t data_size) {
240 return bpf(BPF_PROG_RUN, {
241 .test = {
242 .prog_fd = static_cast<__u32>(prog_fd.get()),
243 .data_size_in = data_size,
244 .data_in = ptr_to_u64(data),
245 },
246 });
247 }
248
249 // BPF_OBJ_GET_INFO_BY_FD requires 4.14+ kernel
250 //
251 // Note: some fields are only defined in newer kernels (ie. the map_info struct grows
252 // over time), so we need to check that the field we're interested in is actually
253 // supported/returned by the running kernel. We do this by checking it is fully
254 // within the bounds of the struct size as reported by the kernel.
255 #define DEFINE_BPF_GET_FD(TYPE, NAME, FIELD) \
256 inline int bpfGetFd ## NAME(const borrowed_fd& fd) { \
257 struct bpf_ ## TYPE ## _info info = {}; \
258 union bpf_attr attr = { .info = { \
259 .bpf_fd = static_cast<__u32>(fd.get()), \
260 .info_len = sizeof(info), \
261 .info = ptr_to_u64(&info), \
262 }}; \
263 int rv = bpf(BPF_OBJ_GET_INFO_BY_FD, attr); \
264 if (rv) return rv; \
265 if (attr.info.info_len < offsetof(bpf_ ## TYPE ## _info, FIELD) + sizeof(info.FIELD)) { \
266 errno = EOPNOTSUPP; \
267 return -1; \
268 }; \
269 return info.FIELD; \
270 }
271
272 // All 7 of these fields are already present in Linux v4.14 (even ACK 4.14-P)
273 // while BPF_OBJ_GET_INFO_BY_FD is not implemented at all in v4.9 (even ACK 4.9-Q)
274 DEFINE_BPF_GET_FD(map, MapType, type) // int bpfGetFdMapType(const borrowed_fd& map_fd)
275 DEFINE_BPF_GET_FD(map, MapId, id) // int bpfGetFdMapId(const borrowed_fd& map_fd)
276 DEFINE_BPF_GET_FD(map, KeySize, key_size) // int bpfGetFdKeySize(const borrowed_fd& map_fd)
277 DEFINE_BPF_GET_FD(map, ValueSize, value_size) // int bpfGetFdValueSize(const borrowed_fd& map_fd)
278 DEFINE_BPF_GET_FD(map, MaxEntries, max_entries) // int bpfGetFdMaxEntries(const borrowed_fd& map_fd)
279 DEFINE_BPF_GET_FD(map, MapFlags, map_flags) // int bpfGetFdMapFlags(const borrowed_fd& map_fd)
280 DEFINE_BPF_GET_FD(prog, ProgId, id) // int bpfGetFdProgId(const borrowed_fd& prog_fd)
281
282 #undef DEFINE_BPF_GET_FD
283
284 } // namespace bpf
285 } // namespace android
286
287