1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <android-base/unique_fd.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <linux/bpf.h>
23 #include <linux/unistd.h>
24 #include <sys/file.h>
25 
26 
27 namespace android {
28 namespace bpf {
29 
30 using ::android::base::borrowed_fd;
31 using ::android::base::unique_fd;
32 
ptr_to_u64(const void * const x)33 inline uint64_t ptr_to_u64(const void * const x) {
34     return (uint64_t)(uintptr_t)x;
35 }
36 
37 /* Note: bpf_attr is a union which might have a much larger size then the anonymous struct portion
38  * of it that we are using.  The kernel's bpf() system call will perform a strict check to ensure
39  * all unused portions are zero.  It will fail with E2BIG if we don't fully zero bpf_attr.
40  */
41 
bpf(enum bpf_cmd cmd,const bpf_attr & attr)42 inline int bpf(enum bpf_cmd cmd, const bpf_attr& attr) {
43     return syscall(__NR_bpf, cmd, &attr, sizeof(attr));
44 }
45 
46 // this version is meant for use with cmd's which mutate the argument
bpf(enum bpf_cmd cmd,bpf_attr * attr)47 inline int bpf(enum bpf_cmd cmd, bpf_attr *attr) {
48     return syscall(__NR_bpf, cmd, attr, sizeof(*attr));
49 }
50 
createMap(bpf_map_type map_type,uint32_t key_size,uint32_t value_size,uint32_t max_entries,uint32_t map_flags)51 inline int createMap(bpf_map_type map_type, uint32_t key_size, uint32_t value_size,
52                      uint32_t max_entries, uint32_t map_flags) {
53     return bpf(BPF_MAP_CREATE, {
54                                        .map_type = map_type,
55                                        .key_size = key_size,
56                                        .value_size = value_size,
57                                        .max_entries = max_entries,
58                                        .map_flags = map_flags,
59                                });
60 }
61 
62 // Note:
63 //   'map_type' must be one of BPF_MAP_TYPE_{ARRAY,HASH}_OF_MAPS
64 //   'value_size' must be sizeof(u32), ie. 4
65 //   'inner_map_fd' is basically a template specifying {map_type, key_size, value_size, max_entries, map_flags}
66 //   of the inner map type (and possibly only key_size/value_size actually matter?).
createOuterMap(bpf_map_type map_type,uint32_t key_size,uint32_t value_size,uint32_t max_entries,uint32_t map_flags,const borrowed_fd & inner_map_fd)67 inline int createOuterMap(bpf_map_type map_type, uint32_t key_size, uint32_t value_size,
68                           uint32_t max_entries, uint32_t map_flags,
69                           const borrowed_fd& inner_map_fd) {
70     return bpf(BPF_MAP_CREATE, {
71                                        .map_type = map_type,
72                                        .key_size = key_size,
73                                        .value_size = value_size,
74                                        .max_entries = max_entries,
75                                        .map_flags = map_flags,
76                                        .inner_map_fd = static_cast<__u32>(inner_map_fd.get()),
77                                });
78 }
79 
writeToMapEntry(const borrowed_fd & map_fd,const void * key,const void * value,uint64_t flags)80 inline int writeToMapEntry(const borrowed_fd& map_fd, const void* key, const void* value,
81                            uint64_t flags) {
82     return bpf(BPF_MAP_UPDATE_ELEM, {
83                                             .map_fd = static_cast<__u32>(map_fd.get()),
84                                             .key = ptr_to_u64(key),
85                                             .value = ptr_to_u64(value),
86                                             .flags = flags,
87                                     });
88 }
89 
findMapEntry(const borrowed_fd & map_fd,const void * key,void * value)90 inline int findMapEntry(const borrowed_fd& map_fd, const void* key, void* value) {
91     return bpf(BPF_MAP_LOOKUP_ELEM, {
92                                             .map_fd = static_cast<__u32>(map_fd.get()),
93                                             .key = ptr_to_u64(key),
94                                             .value = ptr_to_u64(value),
95                                     });
96 }
97 
deleteMapEntry(const borrowed_fd & map_fd,const void * key)98 inline int deleteMapEntry(const borrowed_fd& map_fd, const void* key) {
99     return bpf(BPF_MAP_DELETE_ELEM, {
100                                             .map_fd = static_cast<__u32>(map_fd.get()),
101                                             .key = ptr_to_u64(key),
102                                     });
103 }
104 
getNextMapKey(const borrowed_fd & map_fd,const void * key,void * next_key)105 inline int getNextMapKey(const borrowed_fd& map_fd, const void* key, void* next_key) {
106     return bpf(BPF_MAP_GET_NEXT_KEY, {
107                                              .map_fd = static_cast<__u32>(map_fd.get()),
108                                              .key = ptr_to_u64(key),
109                                              .next_key = ptr_to_u64(next_key),
110                                      });
111 }
112 
getFirstMapKey(const borrowed_fd & map_fd,void * firstKey)113 inline int getFirstMapKey(const borrowed_fd& map_fd, void* firstKey) {
114     return getNextMapKey(map_fd, NULL, firstKey);
115 }
116 
bpfFdPin(const borrowed_fd & map_fd,const char * pathname)117 inline int bpfFdPin(const borrowed_fd& map_fd, const char* pathname) {
118     return bpf(BPF_OBJ_PIN, {
119                                     .pathname = ptr_to_u64(pathname),
120                                     .bpf_fd = static_cast<__u32>(map_fd.get()),
121                             });
122 }
123 
bpfFdGet(const char * pathname,uint32_t flag)124 inline int bpfFdGet(const char* pathname, uint32_t flag) {
125     return bpf(BPF_OBJ_GET, {
126                                     .pathname = ptr_to_u64(pathname),
127                                     .file_flags = flag,
128                             });
129 }
130 
131 int bpfGetFdMapId(const borrowed_fd& map_fd);
132 
bpfLock(int fd,short type)133 inline int bpfLock(int fd, short type) {
134     if (fd < 0) return fd;  // pass any errors straight through
135 #ifdef BPF_MAP_LOCKLESS_FOR_TEST
136     return fd;
137 #endif
138     int mapId = bpfGetFdMapId(fd);
139     int saved_errno = errno;
140     // 4.14+ required to fetch map id, but we don't want to call isAtLeastKernelVersion
141     if (mapId == -1 && saved_errno == EINVAL) return fd;
142     if (mapId <= 0) abort();  // should not be possible
143 
144     // on __LP64__ (aka. 64-bit userspace) 'struct flock64' is the same as 'struct flock'
145     struct flock64 fl = {
146         .l_type = type,        // short: F_{RD,WR,UN}LCK
147         .l_whence = SEEK_SET,  // short: SEEK_{SET,CUR,END}
148         .l_start = mapId,      // off_t: start offset
149         .l_len = 1,            // off_t: number of bytes
150     };
151 
152     // see: bionic/libc/bionic/fcntl.cpp: iff !__LP64__ this uses fcntl64
153     int ret = fcntl(fd, F_OFD_SETLK, &fl);
154     if (!ret) return fd;  // success
155     close(fd);
156     return ret;  // most likely -1 with errno == EAGAIN, due to already held lock
157 }
158 
mapRetrieveLocklessRW(const char * pathname)159 inline int mapRetrieveLocklessRW(const char* pathname) {
160     return bpfFdGet(pathname, 0);
161 }
162 
mapRetrieveExclusiveRW(const char * pathname)163 inline int mapRetrieveExclusiveRW(const char* pathname) {
164     return bpfLock(mapRetrieveLocklessRW(pathname), F_WRLCK);
165 }
166 
mapRetrieveRW(const char * pathname)167 inline int mapRetrieveRW(const char* pathname) {
168     return bpfLock(mapRetrieveLocklessRW(pathname), F_RDLCK);
169 }
170 
mapRetrieveRO(const char * pathname)171 inline int mapRetrieveRO(const char* pathname) {
172     return bpfFdGet(pathname, BPF_F_RDONLY);
173 }
174 
175 // WARNING: it's impossible to grab a shared (ie. read) lock on a write-only fd,
176 // so we instead choose to grab an exclusive (ie. write) lock.
mapRetrieveWO(const char * pathname)177 inline int mapRetrieveWO(const char* pathname) {
178     return bpfLock(bpfFdGet(pathname, BPF_F_WRONLY), F_WRLCK);
179 }
180 
retrieveProgram(const char * pathname)181 inline int retrieveProgram(const char* pathname) {
182     return bpfFdGet(pathname, BPF_F_RDONLY);
183 }
184 
usableProgram(const char * pathname)185 inline bool usableProgram(const char* pathname) {
186     unique_fd fd(retrieveProgram(pathname));
187     return fd.ok();
188 }
189 
190 inline int attachProgram(bpf_attach_type type, const borrowed_fd& prog_fd,
191                          const borrowed_fd& cg_fd, uint32_t flags = 0) {
192     return bpf(BPF_PROG_ATTACH, {
193                                         .target_fd = static_cast<__u32>(cg_fd.get()),
194                                         .attach_bpf_fd = static_cast<__u32>(prog_fd.get()),
195                                         .attach_type = type,
196                                         .attach_flags = flags,
197                                 });
198 }
199 
detachProgram(bpf_attach_type type,const borrowed_fd & cg_fd)200 inline int detachProgram(bpf_attach_type type, const borrowed_fd& cg_fd) {
201     return bpf(BPF_PROG_DETACH, {
202                                         .target_fd = static_cast<__u32>(cg_fd.get()),
203                                         .attach_type = type,
204                                 });
205 }
206 
207 inline int queryProgram(const borrowed_fd& cg_fd,
208                         enum bpf_attach_type attach_type,
209                         __u32 query_flags = 0,
210                         __u32 attach_flags = 0) {
211     int prog_id = -1;  // equivalent to an array of one integer.
212     bpf_attr arg = {
213             .query = {
214                     .target_fd = static_cast<__u32>(cg_fd.get()),
215                     .attach_type = attach_type,
216                     .query_flags = query_flags,
217                     .attach_flags = attach_flags,
218                     .prog_ids = ptr_to_u64(&prog_id),  // pointer to output array
219                     .prog_cnt = 1,  // in: space - nr of ints in the array, out: used
220             }
221     };
222     int v = bpf(BPF_PROG_QUERY, &arg);
223     if (v) return v;  // error case
224     if (!arg.query.prog_cnt) return 0;  // no program, kernel never returns zero id
225     return prog_id;  // return actual id
226 }
227 
detachSingleProgram(bpf_attach_type type,const borrowed_fd & prog_fd,const borrowed_fd & cg_fd)228 inline int detachSingleProgram(bpf_attach_type type, const borrowed_fd& prog_fd,
229                                const borrowed_fd& cg_fd) {
230     return bpf(BPF_PROG_DETACH, {
231                                         .target_fd = static_cast<__u32>(cg_fd.get()),
232                                         .attach_bpf_fd = static_cast<__u32>(prog_fd.get()),
233                                         .attach_type = type,
234                                 });
235 }
236 
237 // Available in 4.12 and later kernels.
runProgram(const borrowed_fd & prog_fd,const void * data,const uint32_t data_size)238 inline int runProgram(const borrowed_fd& prog_fd, const void* data,
239                       const uint32_t data_size) {
240     return bpf(BPF_PROG_RUN, {
241                                      .test = {
242                                              .prog_fd = static_cast<__u32>(prog_fd.get()),
243                                              .data_size_in = data_size,
244                                              .data_in = ptr_to_u64(data),
245                                      },
246                              });
247 }
248 
249 // BPF_OBJ_GET_INFO_BY_FD requires 4.14+ kernel
250 //
251 // Note: some fields are only defined in newer kernels (ie. the map_info struct grows
252 // over time), so we need to check that the field we're interested in is actually
253 // supported/returned by the running kernel.  We do this by checking it is fully
254 // within the bounds of the struct size as reported by the kernel.
255 #define DEFINE_BPF_GET_FD(TYPE, NAME, FIELD) \
256 inline int bpfGetFd ## NAME(const borrowed_fd& fd) { \
257     struct bpf_ ## TYPE ## _info info = {}; \
258     union bpf_attr attr = { .info = { \
259         .bpf_fd = static_cast<__u32>(fd.get()), \
260         .info_len = sizeof(info), \
261         .info = ptr_to_u64(&info), \
262     }}; \
263     int rv = bpf(BPF_OBJ_GET_INFO_BY_FD, attr); \
264     if (rv) return rv; \
265     if (attr.info.info_len < offsetof(bpf_ ## TYPE ## _info, FIELD) + sizeof(info.FIELD)) { \
266         errno = EOPNOTSUPP; \
267         return -1; \
268     }; \
269     return info.FIELD; \
270 }
271 
272 // All 7 of these fields are already present in Linux v4.14 (even ACK 4.14-P)
273 // while BPF_OBJ_GET_INFO_BY_FD is not implemented at all in v4.9 (even ACK 4.9-Q)
274 DEFINE_BPF_GET_FD(map, MapType, type)            // int bpfGetFdMapType(const borrowed_fd& map_fd)
275 DEFINE_BPF_GET_FD(map, MapId, id)                // int bpfGetFdMapId(const borrowed_fd& map_fd)
276 DEFINE_BPF_GET_FD(map, KeySize, key_size)        // int bpfGetFdKeySize(const borrowed_fd& map_fd)
277 DEFINE_BPF_GET_FD(map, ValueSize, value_size)    // int bpfGetFdValueSize(const borrowed_fd& map_fd)
278 DEFINE_BPF_GET_FD(map, MaxEntries, max_entries)  // int bpfGetFdMaxEntries(const borrowed_fd& map_fd)
279 DEFINE_BPF_GET_FD(map, MapFlags, map_flags)      // int bpfGetFdMapFlags(const borrowed_fd& map_fd)
280 DEFINE_BPF_GET_FD(prog, ProgId, id)              // int bpfGetFdProgId(const borrowed_fd& prog_fd)
281 
282 #undef DEFINE_BPF_GET_FD
283 
284 }  // namespace bpf
285 }  // namespace android
286 
287