1 /*
2 * Copyright (C) 2024 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "UprobeStatsBpfLoad"
18
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <linux/bpf.h>
22 #include <linux/elf.h>
23 #include <log/log.h>
24 #include <stdint.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/stat.h>
29 #include <sys/utsname.h>
30 #include <sys/wait.h>
31 #include <sysexits.h>
32 #include <unistd.h>
33
34 #include "BpfSyscallWrappers.h"
35 #include "bpf/BpfUtils.h"
36 #include "bpf_map_def.h"
37
38 #include <cstdlib>
39 #include <fstream>
40 #include <iostream>
41 #include <optional>
42 #include <string>
43 #include <unordered_map>
44 #include <vector>
45
46 #include <android-base/cmsg.h>
47 #include <android-base/file.h>
48 #include <android-base/logging.h>
49 #include <android-base/strings.h>
50 #include <android-base/unique_fd.h>
51
52 #define BPF_FS_PATH "/sys/fs/bpf/"
53
54 // Size of the BPF log buffer for verifier logging
55 #define BPF_LOAD_LOG_SZ 0xfffff
56
57 using android::base::EndsWith;
58 using android::base::StartsWith;
59 using android::base::unique_fd;
60 using std::ifstream;
61 using std::ios;
62 using std::optional;
63 using std::strerror;
64 using std::string;
65 using std::vector;
66
67 namespace android {
68 namespace bpf {
69
70 static unsigned int page_size = static_cast<unsigned int>(getpagesize());
71
pathToObjName(const string & path)72 static string pathToObjName(const string &path) {
73 // extract everything after the final slash, ie. this is the filename
74 // '[email protected]' or 'bar.o'
75 string filename = android::base::Split(path, "/").back();
76 // strip off everything from the final period onwards (strip '.o' suffix), ie.
77 // 'foo@1' or 'bar'
78 string name = filename.substr(0, filename.find_last_of('.'));
79 // strip any potential @1 suffix, this will leave us with just 'foo' or 'bar'
80 // this can be used to provide duplicate programs (mux based on the bpfloader
81 // version)
82 return name.substr(0, name.find_last_of('@'));
83 }
84
85 typedef struct {
86 const char *name;
87 enum bpf_prog_type type;
88 } sectionType;
89
90 /*
91 * Map section name prefixes to program types, the section name will be:
92 * SECTION(<prefix>/<name-of-program>)
93 * For example:
94 * SECTION("tracepoint/sched_switch_func") where sched_switch_funcs
95 * is the name of the program, and tracepoint is the type.
96 *
97 * However, be aware that you should not be directly using the SECTION() macro.
98 * Instead use the DEFINE_(BPF|XDP)_(PROG|MAP)... & LICENSE/CRITICAL macros.
99 */
100 sectionType sectionNameTypes[] = {
101 {"kprobe/", BPF_PROG_TYPE_KPROBE},
102 {"kretprobe/", BPF_PROG_TYPE_KPROBE},
103 {"perf_event/", BPF_PROG_TYPE_PERF_EVENT},
104 {"skfilter/", BPF_PROG_TYPE_SOCKET_FILTER},
105 {"tracepoint/", BPF_PROG_TYPE_TRACEPOINT},
106 {"uprobe/", BPF_PROG_TYPE_KPROBE},
107 {"uretprobe/", BPF_PROG_TYPE_KPROBE},
108 };
109
110 typedef struct {
111 enum bpf_prog_type type;
112 string name;
113 vector<char> data;
114 vector<char> rel_data;
115 optional<struct bpf_prog_def> prog_def;
116
117 unique_fd prog_fd; /* fd after loading */
118 } codeSection;
119
readElfHeader(ifstream & elfFile,Elf64_Ehdr * eh)120 static int readElfHeader(ifstream &elfFile, Elf64_Ehdr *eh) {
121 elfFile.seekg(0);
122 if (elfFile.fail())
123 return -1;
124
125 if (!elfFile.read((char *)eh, sizeof(*eh)))
126 return -1;
127
128 return 0;
129 }
130
131 /* Reads all section header tables into an Shdr array */
readSectionHeadersAll(ifstream & elfFile,vector<Elf64_Shdr> & shTable)132 static int readSectionHeadersAll(ifstream &elfFile,
133 vector<Elf64_Shdr> &shTable) {
134 Elf64_Ehdr eh;
135 int ret = 0;
136
137 ret = readElfHeader(elfFile, &eh);
138 if (ret)
139 return ret;
140
141 elfFile.seekg(eh.e_shoff);
142 if (elfFile.fail())
143 return -1;
144
145 /* Read shdr table entries */
146 shTable.resize(eh.e_shnum);
147
148 if (!elfFile.read((char *)shTable.data(), (eh.e_shnum * eh.e_shentsize)))
149 return -ENOMEM;
150
151 return 0;
152 }
153
154 /* Read a section by its index - for ex to get sec hdr strtab blob */
readSectionByIdx(ifstream & elfFile,int id,vector<char> & sec)155 static int readSectionByIdx(ifstream &elfFile, int id, vector<char> &sec) {
156 vector<Elf64_Shdr> shTable;
157 int ret = readSectionHeadersAll(elfFile, shTable);
158 if (ret)
159 return ret;
160
161 elfFile.seekg(shTable[id].sh_offset);
162 if (elfFile.fail())
163 return -1;
164
165 sec.resize(shTable[id].sh_size);
166 if (!elfFile.read(sec.data(), shTable[id].sh_size))
167 return -1;
168
169 return 0;
170 }
171
172 /* Read whole section header string table */
readSectionHeaderStrtab(ifstream & elfFile,vector<char> & strtab)173 static int readSectionHeaderStrtab(ifstream &elfFile, vector<char> &strtab) {
174 Elf64_Ehdr eh;
175 int ret = readElfHeader(elfFile, &eh);
176 if (ret)
177 return ret;
178
179 ret = readSectionByIdx(elfFile, eh.e_shstrndx, strtab);
180 if (ret)
181 return ret;
182
183 return 0;
184 }
185
186 /* Get name from offset in strtab */
getSymName(ifstream & elfFile,int nameOff,string & name)187 static int getSymName(ifstream &elfFile, int nameOff, string &name) {
188 int ret;
189 vector<char> secStrTab;
190
191 ret = readSectionHeaderStrtab(elfFile, secStrTab);
192 if (ret)
193 return ret;
194
195 if (nameOff >= (int)secStrTab.size())
196 return -1;
197
198 name = string((char *)secStrTab.data() + nameOff);
199 return 0;
200 }
201
202 /* Reads a full section by name - example to get the GPL license */
readSectionByName(const char * name,ifstream & elfFile,vector<char> & data)203 static int readSectionByName(const char *name, ifstream &elfFile,
204 vector<char> &data) {
205 vector<char> secStrTab;
206 vector<Elf64_Shdr> shTable;
207 int ret;
208
209 ret = readSectionHeadersAll(elfFile, shTable);
210 if (ret)
211 return ret;
212
213 ret = readSectionHeaderStrtab(elfFile, secStrTab);
214 if (ret)
215 return ret;
216
217 for (int i = 0; i < (int)shTable.size(); i++) {
218 char *secname = secStrTab.data() + shTable[i].sh_name;
219 if (!secname)
220 continue;
221
222 if (!strcmp(secname, name)) {
223 vector<char> dataTmp;
224 dataTmp.resize(shTable[i].sh_size);
225
226 elfFile.seekg(shTable[i].sh_offset);
227 if (elfFile.fail())
228 return -1;
229
230 if (!elfFile.read((char *)dataTmp.data(), shTable[i].sh_size))
231 return -1;
232
233 data = dataTmp;
234 return 0;
235 }
236 }
237 return -2;
238 }
239
readSectionUint(const char * name,ifstream & elfFile,unsigned int defVal)240 unsigned int readSectionUint(const char *name, ifstream &elfFile,
241 unsigned int defVal) {
242 vector<char> theBytes;
243 int ret = readSectionByName(name, elfFile, theBytes);
244 if (ret) {
245 ALOGV("Couldn't find section %s (defaulting to %u [0x%x]).", name, defVal,
246 defVal);
247 return defVal;
248 } else if (theBytes.size() < sizeof(unsigned int)) {
249 ALOGE("Section %s too short (defaulting to %u [0x%x]).", name, defVal,
250 defVal);
251 return defVal;
252 } else {
253 // decode first 4 bytes as LE32 uint, there will likely be more bytes due to
254 // alignment.
255 unsigned int value = static_cast<unsigned char>(theBytes[3]);
256 value <<= 8;
257 value += static_cast<unsigned char>(theBytes[2]);
258 value <<= 8;
259 value += static_cast<unsigned char>(theBytes[1]);
260 value <<= 8;
261 value += static_cast<unsigned char>(theBytes[0]);
262 ALOGV("Section %s value is %u [0x%x]", name, value, value);
263 return value;
264 }
265 }
266
readSectionByType(ifstream & elfFile,int type,vector<char> & data)267 static int readSectionByType(ifstream &elfFile, int type, vector<char> &data) {
268 int ret;
269 vector<Elf64_Shdr> shTable;
270
271 ret = readSectionHeadersAll(elfFile, shTable);
272 if (ret)
273 return ret;
274
275 for (int i = 0; i < (int)shTable.size(); i++) {
276 if ((int)shTable[i].sh_type != type)
277 continue;
278
279 vector<char> dataTmp;
280 dataTmp.resize(shTable[i].sh_size);
281
282 elfFile.seekg(shTable[i].sh_offset);
283 if (elfFile.fail())
284 return -1;
285
286 if (!elfFile.read((char *)dataTmp.data(), shTable[i].sh_size))
287 return -1;
288
289 data = dataTmp;
290 return 0;
291 }
292 return -2;
293 }
294
symCompare(Elf64_Sym a,Elf64_Sym b)295 static bool symCompare(Elf64_Sym a, Elf64_Sym b) {
296 return (a.st_value < b.st_value);
297 }
298
readSymTab(ifstream & elfFile,int sort,vector<Elf64_Sym> & data)299 static int readSymTab(ifstream &elfFile, int sort, vector<Elf64_Sym> &data) {
300 int ret, numElems;
301 Elf64_Sym *buf;
302 vector<char> secData;
303
304 ret = readSectionByType(elfFile, SHT_SYMTAB, secData);
305 if (ret)
306 return ret;
307
308 buf = (Elf64_Sym *)secData.data();
309 numElems = (secData.size() / sizeof(Elf64_Sym));
310 data.assign(buf, buf + numElems);
311
312 if (sort)
313 std::sort(data.begin(), data.end(), symCompare);
314 return 0;
315 }
316
getFuseProgType()317 static enum bpf_prog_type getFuseProgType() {
318 int result = BPF_PROG_TYPE_UNSPEC;
319 ifstream("/sys/fs/fuse/bpf_prog_type_fuse") >> result;
320 return static_cast<bpf_prog_type>(result);
321 }
322
getSectionType(string & name)323 static enum bpf_prog_type getSectionType(string &name) {
324 for (auto &snt : sectionNameTypes)
325 if (StartsWith(name, snt.name))
326 return snt.type;
327
328 // TODO Remove this code when fuse-bpf is upstream and this BPF_PROG_TYPE_FUSE
329 // is fixed
330 if (StartsWith(name, "fuse/"))
331 return getFuseProgType();
332
333 return BPF_PROG_TYPE_UNSPEC;
334 }
335
getSectionName(enum bpf_prog_type type)336 static string getSectionName(enum bpf_prog_type type) {
337 for (auto &snt : sectionNameTypes)
338 if (snt.type == type)
339 return string(snt.name);
340
341 return "UNKNOWN SECTION NAME " + std::to_string(type);
342 }
343
readProgDefs(ifstream & elfFile,vector<struct bpf_prog_def> & pd)344 static int readProgDefs(ifstream &elfFile, vector<struct bpf_prog_def> &pd) {
345 vector<char> pdData;
346 int ret = readSectionByName("progs", elfFile, pdData);
347 if (ret)
348 return ret;
349
350 if (pdData.size() % sizeof(struct bpf_prog_def)) {
351 ALOGE("readProgDefs failed due to improper sized progs section, %zu %% %zu "
352 "!= 0",
353 pdData.size(), sizeof(struct bpf_prog_def));
354 return -1;
355 };
356
357 pd.resize(pdData.size() / sizeof(struct bpf_prog_def));
358 memcpy(pd.data(), pdData.data(), pdData.size());
359 return 0;
360 }
361
getSectionSymNames(ifstream & elfFile,const string & sectionName,vector<string> & names,optional<unsigned> symbolType=std::nullopt)362 static int getSectionSymNames(ifstream &elfFile, const string §ionName,
363 vector<string> &names,
364 optional<unsigned> symbolType = std::nullopt) {
365 int ret;
366 string name;
367 vector<Elf64_Sym> symtab;
368 vector<Elf64_Shdr> shTable;
369
370 ret = readSymTab(elfFile, 1 /* sort */, symtab);
371 if (ret)
372 return ret;
373
374 /* Get index of section */
375 ret = readSectionHeadersAll(elfFile, shTable);
376 if (ret)
377 return ret;
378
379 int sec_idx = -1;
380 for (int i = 0; i < (int)shTable.size(); i++) {
381 ret = getSymName(elfFile, shTable[i].sh_name, name);
382 if (ret)
383 return ret;
384
385 if (!name.compare(sectionName)) {
386 sec_idx = i;
387 break;
388 }
389 }
390
391 /* No section found with matching name*/
392 if (sec_idx == -1) {
393 ALOGW("No %s section could be found in elf object", sectionName.c_str());
394 return -1;
395 }
396
397 for (int i = 0; i < (int)symtab.size(); i++) {
398 if (symbolType.has_value() && ELF_ST_TYPE(symtab[i].st_info) != symbolType)
399 continue;
400
401 if (symtab[i].st_shndx == sec_idx) {
402 string s;
403 ret = getSymName(elfFile, symtab[i].st_name, s);
404 if (ret)
405 return ret;
406 names.push_back(s);
407 }
408 }
409
410 return 0;
411 }
412
IsAllowed(bpf_prog_type type,const bpf_prog_type * allowed,size_t numAllowed)413 static bool IsAllowed(bpf_prog_type type, const bpf_prog_type *allowed,
414 size_t numAllowed) {
415 if (allowed == nullptr)
416 return true;
417
418 for (size_t i = 0; i < numAllowed; i++) {
419 if (allowed[i] == BPF_PROG_TYPE_UNSPEC) {
420 if (type == getFuseProgType())
421 return true;
422 } else if (type == allowed[i])
423 return true;
424 }
425
426 return false;
427 }
428
429 /* Read a section by its index - for ex to get sec hdr strtab blob */
readCodeSections(ifstream & elfFile,vector<codeSection> & cs,const bpf_prog_type * allowed,size_t numAllowed)430 static int readCodeSections(ifstream &elfFile, vector<codeSection> &cs,
431 const bpf_prog_type *allowed, size_t numAllowed) {
432 vector<Elf64_Shdr> shTable;
433 int entries, ret = 0;
434
435 ret = readSectionHeadersAll(elfFile, shTable);
436 if (ret)
437 return ret;
438 entries = shTable.size();
439
440 vector<struct bpf_prog_def> pd;
441 ret = readProgDefs(elfFile, pd);
442 if (ret)
443 return ret;
444 vector<string> progDefNames;
445 ret = getSectionSymNames(elfFile, "progs", progDefNames);
446 if (!pd.empty() && ret)
447 return ret;
448
449 for (int i = 0; i < entries; i++) {
450 string name;
451 codeSection cs_temp;
452 cs_temp.type = BPF_PROG_TYPE_UNSPEC;
453
454 ret = getSymName(elfFile, shTable[i].sh_name, name);
455 if (ret)
456 return ret;
457
458 enum bpf_prog_type ptype = getSectionType(name);
459
460 if (ptype == BPF_PROG_TYPE_UNSPEC)
461 continue;
462
463 if (!IsAllowed(ptype, allowed, numAllowed)) {
464 ALOGE("Program type %s not permitted here",
465 getSectionName(ptype).c_str());
466 return -1;
467 }
468
469 string oldName = name;
470
471 // convert all slashes to underscores
472 std::replace(name.begin(), name.end(), '/', '_');
473
474 cs_temp.type = ptype;
475 cs_temp.name = name;
476
477 ret = readSectionByIdx(elfFile, i, cs_temp.data);
478 if (ret)
479 return ret;
480 ALOGV("Loaded code section %d (%s)", i, name.c_str());
481
482 vector<string> csSymNames;
483 ret = getSectionSymNames(elfFile, oldName, csSymNames, STT_FUNC);
484 if (ret || !csSymNames.size())
485 return ret;
486 for (size_t i = 0; i < progDefNames.size(); ++i) {
487 if (!progDefNames[i].compare(csSymNames[0] + "_def")) {
488 cs_temp.prog_def = pd[i];
489 break;
490 }
491 }
492
493 /* Check for rel section */
494 if (cs_temp.data.size() > 0 && i < entries) {
495 ret = getSymName(elfFile, shTable[i + 1].sh_name, name);
496 if (ret)
497 return ret;
498
499 if (name == (".rel" + oldName)) {
500 ret = readSectionByIdx(elfFile, i + 1, cs_temp.rel_data);
501 if (ret)
502 return ret;
503 ALOGV("Loaded relo section %d (%s)", i, name.c_str());
504 }
505 }
506
507 if (cs_temp.data.size() > 0) {
508 cs.push_back(std::move(cs_temp));
509 ALOGV("Adding section %d to cs list", i);
510 }
511 }
512 return 0;
513 }
514
getSymNameByIdx(ifstream & elfFile,int index,string & name)515 static int getSymNameByIdx(ifstream &elfFile, int index, string &name) {
516 vector<Elf64_Sym> symtab;
517 int ret = 0;
518
519 ret = readSymTab(elfFile, 0 /* !sort */, symtab);
520 if (ret)
521 return ret;
522
523 if (index >= (int)symtab.size())
524 return -1;
525
526 return getSymName(elfFile, symtab[index].st_name, name);
527 }
528
mapMatchesExpectations(const unique_fd & fd,const string & mapName,const struct bpf_map_def & mapDef,const enum bpf_map_type type)529 static bool mapMatchesExpectations(const unique_fd &fd, const string &mapName,
530 const struct bpf_map_def &mapDef,
531 const enum bpf_map_type type) {
532 // Assuming fd is a valid Bpf Map file descriptor then
533 // all the following should always succeed on a 4.14+ kernel.
534 // If they somehow do fail, they'll return -1 (and set errno),
535 // which should then cause (among others) a key_size mismatch.
536 int fd_type = bpfGetFdMapType(fd);
537 int fd_key_size = bpfGetFdKeySize(fd);
538 int fd_value_size = bpfGetFdValueSize(fd);
539 int fd_max_entries = bpfGetFdMaxEntries(fd);
540 int fd_map_flags = bpfGetFdMapFlags(fd);
541
542 // DEVMAPs are readonly from the bpf program side's point of view, as such
543 // the kernel in kernel/bpf/devmap.c dev_map_init_map() will set the flag
544 int desired_map_flags = (int)mapDef.map_flags;
545 if (type == BPF_MAP_TYPE_DEVMAP || type == BPF_MAP_TYPE_DEVMAP_HASH)
546 desired_map_flags |= BPF_F_RDONLY_PROG;
547
548 // The .h file enforces that this is a power of two, and page size will
549 // also always be a power of two, so this logic is actually enough to
550 // force it to be a multiple of the page size, as required by the kernel.
551 unsigned int desired_max_entries = mapDef.max_entries;
552 if (type == BPF_MAP_TYPE_RINGBUF) {
553 if (desired_max_entries < page_size)
554 desired_max_entries = page_size;
555 }
556
557 // The following checks should *never* trigger, if one of them somehow does,
558 // it probably means a bpf .o file has been changed/replaced at runtime
559 // and bpfloader was manually rerun (normally it should only run *once*
560 // early during the boot process).
561 // Another possibility is that something is misconfigured in the code:
562 // most likely a shared map is declared twice differently.
563 // But such a change should never be checked into the source tree...
564 if ((fd_type == type) && (fd_key_size == (int)mapDef.key_size) &&
565 (fd_value_size == (int)mapDef.value_size) &&
566 (fd_max_entries == (int)desired_max_entries) &&
567 (fd_map_flags == desired_map_flags)) {
568 return true;
569 }
570
571 ALOGE("bpf map name %s mismatch: desired/found: "
572 "type:%d/%d key:%u/%d value:%u/%d entries:%u/%d flags:%u/%d",
573 mapName.c_str(), type, fd_type, mapDef.key_size, fd_key_size,
574 mapDef.value_size, fd_value_size, mapDef.max_entries, fd_max_entries,
575 desired_map_flags, fd_map_flags);
576 return false;
577 }
578
createMaps(const char * elfPath,ifstream & elfFile,vector<unique_fd> & mapFds,const char * prefix)579 static int createMaps(const char *elfPath, ifstream &elfFile,
580 vector<unique_fd> &mapFds, const char *prefix) {
581 int ret;
582 vector<char> mdData;
583 vector<struct bpf_map_def> md;
584 vector<string> mapNames;
585 string objName = pathToObjName(string(elfPath));
586
587 ret = readSectionByName("maps", elfFile, mdData);
588 if (ret == -2)
589 return 0; // no maps to read
590 if (ret)
591 return ret;
592
593 if (mdData.size() % sizeof(struct bpf_map_def)) {
594 ALOGE(
595 "createMaps failed due to improper sized maps section, %zu %% %zu != 0",
596 mdData.size(), sizeof(struct bpf_map_def));
597 return -1;
598 }
599 md.resize(mdData.size() / sizeof(struct bpf_map_def));
600 memcpy(md.data(), mdData.data(), mdData.size());
601
602 ret = getSectionSymNames(elfFile, "maps", mapNames);
603 if (ret)
604 return ret;
605
606 unsigned kvers = kernelVersion();
607
608 for (int i = 0; i < (int)mapNames.size(); i++) {
609 if (md[i].zero != 0)
610 abort();
611
612 if (kvers < md[i].min_kver) {
613 ALOGD("skipping map %s which requires kernel version 0x%x >= 0x%x",
614 mapNames[i].c_str(), kvers, md[i].min_kver);
615 mapFds.push_back(unique_fd());
616 continue;
617 }
618
619 if (kvers >= md[i].max_kver) {
620 ALOGD("skipping map %s which requires kernel version 0x%x < 0x%x",
621 mapNames[i].c_str(), kvers, md[i].max_kver);
622 mapFds.push_back(unique_fd());
623 continue;
624 }
625
626 enum bpf_map_type type = md[i].type;
627 if (type == BPF_MAP_TYPE_DEVMAP_HASH && !isAtLeastKernelVersion(5, 4, 0)) {
628 // On Linux Kernels older than 5.4 this map type doesn't exist, but it can
629 // kind of be approximated: HASH has the same userspace visible api.
630 // However it cannot be used by ebpf programs in the same way.
631 // Since bpf_redirect_map() only requires 4.14, a program using a
632 // DEVMAP_HASH map would fail to load (due to trying to redirect to a HASH
633 // instead of DEVMAP_HASH). One must thus tag any BPF_MAP_TYPE_DEVMAP_HASH
634 // + bpf_redirect_map() using programs as being 5.4+...
635 type = BPF_MAP_TYPE_HASH;
636 }
637
638 // The .h file enforces that this is a power of two, and page size will
639 // also always be a power of two, so this logic is actually enough to
640 // force it to be a multiple of the page size, as required by the kernel.
641 unsigned int max_entries = md[i].max_entries;
642 if (type == BPF_MAP_TYPE_RINGBUF) {
643 if (max_entries < page_size)
644 max_entries = page_size;
645 }
646
647 // Format of pin location is /sys/fs/bpf/<prefix>map_<objName>_<mapName>
648 // except that maps shared across .o's have empty <objName>
649 // Note: <objName> refers to the extension-less basename of the .o file
650 // (without @ suffix).
651 string mapPinLoc = string(BPF_FS_PATH) + prefix + "map_" +
652 (md[i].shared ? "" : objName) + "_" + mapNames[i];
653 bool reuse = false;
654 unique_fd fd;
655 int saved_errno;
656
657 if (access(mapPinLoc.c_str(), F_OK) == 0) {
658 fd.reset(mapRetrieveRO(mapPinLoc.c_str()));
659 saved_errno = errno;
660 ALOGV("bpf_create_map reusing map %s, ret: %d", mapNames[i].c_str(),
661 fd.get());
662 reuse = true;
663 } else {
664 union bpf_attr req = {
665 .map_type = type,
666 .key_size = md[i].key_size,
667 .value_size = md[i].value_size,
668 .max_entries = max_entries,
669 .map_flags = md[i].map_flags,
670 };
671 strlcpy(req.map_name, mapNames[i].c_str(), sizeof(req.map_name));
672 fd.reset(bpf(BPF_MAP_CREATE, req));
673 saved_errno = errno;
674 ALOGV("bpf_create_map name %s, ret: %d", mapNames[i].c_str(), fd.get());
675 }
676
677 if (!fd.ok())
678 return -saved_errno;
679
680 // When reusing a pinned map, we need to check the map type/sizes/etc match,
681 // but for safety (since reuse code path is rare) run these checks even if
682 // we just created it. We assume failure is due to pinned map mismatch,
683 // hence the 'NOT UNIQUE' return code.
684 if (!mapMatchesExpectations(fd, mapNames[i], md[i], type))
685 return -ENOTUNIQ;
686
687 if (!reuse) {
688 ret = bpfFdPin(fd, mapPinLoc.c_str());
689 if (ret) {
690 int err = errno;
691 ALOGE("pin %s -> %d [%d:%s]", mapPinLoc.c_str(), ret, err,
692 strerror(err));
693 return -err;
694 }
695 ret = chmod(mapPinLoc.c_str(), md[i].mode);
696 if (ret) {
697 int err = errno;
698 ALOGE("chmod(%s, 0%o) = %d [%d:%s]", mapPinLoc.c_str(), md[i].mode, ret,
699 err, strerror(err));
700 return -err;
701 }
702 ret = chown(mapPinLoc.c_str(), (uid_t)md[i].uid, (gid_t)md[i].gid);
703 if (ret) {
704 int err = errno;
705 ALOGE("chown(%s, %u, %u) = %d [%d:%s]", mapPinLoc.c_str(), md[i].uid,
706 md[i].gid, ret, err, strerror(err));
707 return -err;
708 }
709 }
710
711 int mapId = bpfGetFdMapId(fd);
712 if (mapId == -1) {
713 ALOGE("bpfGetFdMapId failed, ret: %d [%d]", mapId, errno);
714 } else {
715 ALOGD("map %s id %d", mapPinLoc.c_str(), mapId);
716 }
717
718 mapFds.push_back(std::move(fd));
719 }
720
721 return ret;
722 }
723
applyRelo(void * insnsPtr,Elf64_Addr offset,int fd)724 static void applyRelo(void *insnsPtr, Elf64_Addr offset, int fd) {
725 int insnIndex;
726 struct bpf_insn *insn, *insns;
727
728 insns = (struct bpf_insn *)(insnsPtr);
729
730 insnIndex = offset / sizeof(struct bpf_insn);
731 insn = &insns[insnIndex];
732
733 // Occasionally might be useful for relocation debugging, but pretty spammy
734 if (0) {
735 ALOGV("applying relo to instruction at byte offset: %llu, "
736 "insn offset %d, insn %llx",
737 (unsigned long long)offset, insnIndex, *(unsigned long long *)insn);
738 }
739
740 if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
741 ALOGE("invalid relo for insn %d: code 0x%x", insnIndex, insn->code);
742 return;
743 }
744
745 insn->imm = fd;
746 insn->src_reg = BPF_PSEUDO_MAP_FD;
747 }
748
applyMapRelo(ifstream & elfFile,vector<unique_fd> & mapFds,vector<codeSection> & cs)749 static void applyMapRelo(ifstream &elfFile, vector<unique_fd> &mapFds,
750 vector<codeSection> &cs) {
751 vector<string> mapNames;
752
753 int ret = getSectionSymNames(elfFile, "maps", mapNames);
754 if (ret)
755 return;
756
757 for (int k = 0; k != (int)cs.size(); k++) {
758 Elf64_Rel *rel = (Elf64_Rel *)(cs[k].rel_data.data());
759 int n_rel = cs[k].rel_data.size() / sizeof(*rel);
760
761 for (int i = 0; i < n_rel; i++) {
762 int symIndex = ELF64_R_SYM(rel[i].r_info);
763 string symName;
764
765 ret = getSymNameByIdx(elfFile, symIndex, symName);
766 if (ret)
767 return;
768
769 /* Find the map fd and apply relo */
770 for (int j = 0; j < (int)mapNames.size(); j++) {
771 if (!mapNames[j].compare(symName)) {
772 applyRelo(cs[k].data.data(), rel[i].r_offset, mapFds[j]);
773 break;
774 }
775 }
776 }
777 }
778 }
779
loadCodeSections(const char * elfPath,vector<codeSection> & cs,const string & license,const char * prefix)780 static int loadCodeSections(const char *elfPath, vector<codeSection> &cs,
781 const string &license, const char *prefix) {
782 unsigned kvers = kernelVersion();
783
784 if (!kvers) {
785 ALOGE("unable to get kernel version");
786 return -EINVAL;
787 }
788
789 string objName = pathToObjName(string(elfPath));
790
791 for (int i = 0; i < (int)cs.size(); i++) {
792 unique_fd &fd = cs[i].prog_fd;
793 int ret;
794 string name = cs[i].name;
795
796 if (!cs[i].prog_def.has_value()) {
797 ALOGE("[%d] '%s' missing program definition! bad bpf.o build?", i,
798 name.c_str());
799 return -EINVAL;
800 }
801
802 unsigned min_kver = cs[i].prog_def->min_kver;
803 unsigned max_kver = cs[i].prog_def->max_kver;
804 if (kvers < min_kver || kvers >= max_kver) {
805 ALOGD(
806 "skipping program cs[%d].name:%s min_kver:%x max_kver:%x (kvers:%x)",
807 i, name.c_str(), min_kver, max_kver, kvers);
808 continue;
809 }
810
811 // strip any potential $foo suffix
812 // this can be used to provide duplicate programs
813 // conditionally loaded based on running kernel version
814 name = name.substr(0, name.find_last_of('$'));
815
816 bool reuse = false;
817 // Format of pin location is
818 // /sys/fs/bpf/<prefix>prog_<objName>_<progName>
819 string progPinLoc =
820 string(BPF_FS_PATH) + prefix + "prog_" + objName + '_' + string(name);
821 if (access(progPinLoc.c_str(), F_OK) == 0) {
822 fd.reset(retrieveProgram(progPinLoc.c_str()));
823 ALOGV("New bpf prog load reusing prog %s, ret: %d (%s)",
824 progPinLoc.c_str(), fd.get(),
825 (!fd.ok() ? std::strerror(errno) : "no error"));
826 reuse = true;
827 } else {
828 vector<char> log_buf(BPF_LOAD_LOG_SZ, 0);
829
830 union bpf_attr req = {
831 .prog_type = cs[i].type,
832 .kern_version = kvers,
833 .license = ptr_to_u64(license.c_str()),
834 .insns = ptr_to_u64(cs[i].data.data()),
835 .insn_cnt =
836 static_cast<__u32>(cs[i].data.size() / sizeof(struct bpf_insn)),
837 .log_level = 1,
838 .log_buf = ptr_to_u64(log_buf.data()),
839 .log_size = static_cast<__u32>(log_buf.size()),
840 };
841 strlcpy(req.prog_name, cs[i].name.c_str(), sizeof(req.prog_name));
842 fd.reset(bpf(BPF_PROG_LOAD, req));
843
844 if (!fd.ok()) {
845 ALOGW("BPF_PROG_LOAD call for %s (%s) returned fd: %d (%s)", elfPath,
846 cs[i].name.c_str(), fd.get(), std::strerror(errno));
847
848 vector<string> lines = android::base::Split(log_buf.data(), "\n");
849
850 ALOGW("BPF_PROG_LOAD - BEGIN log_buf contents:");
851 for (const auto &line : lines)
852 ALOGW("%s", line.c_str());
853 ALOGW("BPF_PROG_LOAD - END log_buf contents.");
854
855 if (cs[i].prog_def->optional) {
856 ALOGW("failed program is marked optional - continuing...");
857 continue;
858 }
859 ALOGE("non-optional program failed to load.");
860 }
861 }
862
863 if (!fd.ok())
864 return fd.get();
865
866 if (!reuse) {
867 ret = bpfFdPin(fd, progPinLoc.c_str());
868 if (ret) {
869 int err = errno;
870 ALOGE("create %s -> %d [%d:%s]", progPinLoc.c_str(), ret, err,
871 strerror(err));
872 return -err;
873 }
874 if (chmod(progPinLoc.c_str(), 0440)) {
875 int err = errno;
876 ALOGE("chmod %s 0440 -> [%d:%s]", progPinLoc.c_str(), err,
877 strerror(err));
878 return -err;
879 }
880 if (chown(progPinLoc.c_str(), (uid_t)cs[i].prog_def->uid,
881 (gid_t)cs[i].prog_def->gid)) {
882 int err = errno;
883 ALOGE("chown %s %d %d -> [%d:%s]", progPinLoc.c_str(),
884 cs[i].prog_def->uid, cs[i].prog_def->gid, err, strerror(err));
885 return -err;
886 }
887 }
888
889 int progId = bpfGetFdProgId(fd);
890 if (progId == -1) {
891 ALOGE("bpfGetFdProgId failed, ret: %d [%d]", progId, errno);
892 } else {
893 ALOGD("prog %s id %d", progPinLoc.c_str(), progId);
894 }
895 }
896
897 return 0;
898 }
899
900 struct Location {
901 const char *const dir = "";
902 const char *const prefix = "";
903 const bpf_prog_type *allowedProgTypes = nullptr;
904 size_t allowedProgTypesLength = 0;
905 };
906
loadProg(const char * elfPath,bool * isCritical,const Location & location)907 int loadProg(const char *elfPath, bool *isCritical, const Location &location) {
908 vector<char> license;
909 vector<char> critical;
910 vector<codeSection> cs;
911 vector<unique_fd> mapFds;
912 int ret;
913
914 if (!isCritical)
915 return -1;
916 *isCritical = false;
917
918 ifstream elfFile(elfPath, ios::in | ios::binary);
919 if (!elfFile.is_open())
920 return -1;
921
922 ret = readSectionByName("critical", elfFile, critical);
923 *isCritical = !ret;
924
925 ret = readSectionByName("license", elfFile, license);
926 if (ret) {
927 ALOGE("Couldn't find license in %s", elfPath);
928 return ret;
929 }
930
931 ALOGI("UprobeStatsBpfLoad loading %s%s ELF object %s with license %s",
932 *isCritical ? "critical for " : "optional",
933 *isCritical ? (char *)critical.data() : "", elfPath,
934 (char *)license.data());
935
936 ret = readCodeSections(elfFile, cs, location.allowedProgTypes,
937 location.allowedProgTypesLength);
938 if (ret) {
939 ALOGE("Couldn't read all code sections in %s", elfPath);
940 return ret;
941 }
942
943 ret = createMaps(elfPath, elfFile, mapFds, location.prefix);
944 if (ret) {
945 ALOGE("Failed to create maps: (ret=%d) in %s", ret, elfPath);
946 return ret;
947 }
948
949 for (int i = 0; i < (int)mapFds.size(); i++)
950 ALOGV("map_fd found at %d is %d in %s", i, mapFds[i].get(), elfPath);
951
952 applyMapRelo(elfFile, mapFds, cs);
953
954 ret = loadCodeSections(elfPath, cs, string(license.data()), location.prefix);
955 if (ret)
956 ALOGE("Failed to load programs, loadCodeSections ret=%d", ret);
957
958 return ret;
959 }
960
961 // Networking-related program types are limited to the Tethering Apex
962 // to prevent things from breaking due to conflicts on mainline updates
963 // (exception made for socket filters, ie. xt_bpf for potential use in iptables,
964 // or for attaching to sockets directly)
965 constexpr bpf_prog_type kPlatformAllowedProgTypes[] = {
966 BPF_PROG_TYPE_KPROBE, BPF_PROG_TYPE_PERF_EVENT,
967 BPF_PROG_TYPE_SOCKET_FILTER, BPF_PROG_TYPE_TRACEPOINT,
968 BPF_PROG_TYPE_UNSPEC, // Will be replaced with fuse bpf program type
969 };
970
971 constexpr bpf_prog_type kMemEventsAllowedProgTypes[] = {
972 BPF_PROG_TYPE_TRACEPOINT,
973 BPF_PROG_TYPE_SOCKET_FILTER,
974 };
975
976 constexpr bpf_prog_type kUprobestatsAllowedProgTypes[] = {
977 BPF_PROG_TYPE_KPROBE,
978 };
979
980 // see b/162057235. For arbitrary program types, the concern is that due to the
981 // lack of SELinux access controls over BPF program attachpoints, we have no way
982 // to control the attachment of programs to shared resources (or to detect when
983 // a shared resource has one BPF program replace another that is attached there)
984 constexpr bpf_prog_type kVendorAllowedProgTypes[] = {
985 BPF_PROG_TYPE_SOCKET_FILTER,
986 };
987
988 const Location locations[] = {
989 // uprobestats
990 {
991 .dir = "/apex/com.android.uprobestats/etc/bpf/uprobestats/",
992 .prefix = "uprobestats/",
993 .allowedProgTypes = kUprobestatsAllowedProgTypes,
994 .allowedProgTypesLength = arraysize(kUprobestatsAllowedProgTypes),
995 },
996 };
997
loadAllElfObjects(const Location & location)998 int loadAllElfObjects(const Location &location) {
999 int retVal = 0;
1000 DIR *dir;
1001 struct dirent *ent;
1002
1003 if ((dir = opendir(location.dir)) != NULL) {
1004 while ((ent = readdir(dir)) != NULL) {
1005 string s = ent->d_name;
1006 if (!EndsWith(s, ".o"))
1007 continue;
1008
1009 string progPath(location.dir);
1010 progPath += s;
1011
1012 bool critical;
1013 int ret = loadProg(progPath.c_str(), &critical, location);
1014 if (ret) {
1015 if (critical)
1016 retVal = ret;
1017 ALOGE("Failed to load object: %s, ret: %s", progPath.c_str(),
1018 strerror(-ret));
1019 } else {
1020 ALOGV("Loaded object: %s", progPath.c_str());
1021 }
1022 }
1023 closedir(dir);
1024 }
1025 return retVal;
1026 }
1027
createSysFsBpfSubDir(const char * const prefix)1028 int createSysFsBpfSubDir(const char *const prefix) {
1029 if (*prefix) {
1030 mode_t prevUmask = umask(0);
1031
1032 string s = "/sys/fs/bpf/";
1033 s += prefix;
1034
1035 errno = 0;
1036 int ret = mkdir(s.c_str(), S_ISVTX | S_IRWXU | S_IRWXG | S_IRWXO);
1037 if (ret && errno != EEXIST) {
1038 const int err = errno;
1039 ALOGE("Failed to create directory: %s, ret: %s", s.c_str(),
1040 strerror(err));
1041 return -err;
1042 }
1043
1044 umask(prevUmask);
1045 }
1046 return 0;
1047 }
1048
1049 } // namespace bpf
1050 } // namespace android
1051
1052 // ----- extern C stuff for rust below here -----
1053
initLogging()1054 void initLogging() {
1055 // since we only ever get called from mainline NetBpfLoad
1056 // (see packages/modules/Connectivity/netbpfload/NetBpfLoad.cpp around line
1057 // 516) and there no arguments, so we can just pretend/assume this is the
1058 // case.
1059 const char *argv[] = {"/system/bin/bpfloader", NULL};
1060 android::base::InitLogging(const_cast<char **>(argv),
1061 &android::base::KernelLogger);
1062 }
1063
createBpfFsSubDirectories()1064 bool createBpfFsSubDirectories() {
1065 for (const auto &location : android::bpf::locations) {
1066 if (android::bpf::createSysFsBpfSubDir(location.prefix)) {
1067 ALOGE("=== Failed to create subdir %s ===", location.prefix);
1068 return true;
1069 }
1070 }
1071 return false;
1072 }
1073
legacyBpfLoader()1074 void legacyBpfLoader() {
1075 // Load all ELF objects, create programs and maps, and pin them
1076 for (const auto &location : android::bpf::locations) {
1077 if (android::bpf::loadAllElfObjects(location)) {
1078 ALOGE("=== FAILURE LOADING BPF PROGRAMS FROM %s ===", location.dir);
1079 }
1080 }
1081 }
1082
load()1083 void load() {
1084 if (createBpfFsSubDirectories()) {
1085 return;
1086 }
1087 legacyBpfLoader();
1088 }
1089
1090 const char *const platformBpfLoader = "/system/bin/bpfloader";
1091
main(int,char **,char * const envp[])1092 int main(int, char **, char *const envp[]) {
1093 initLogging();
1094 load();
1095
1096 const char *args[] = {
1097 platformBpfLoader,
1098 NULL,
1099 };
1100 execve(args[0], (char **)args, envp);
1101 ALOGE("FATAL: execve('%s'): %d[%s]", platformBpfLoader, errno,
1102 strerror(errno));
1103 return 1;
1104 }
1105