xref: /aosp_15_r20/external/sandboxed-api/sandboxed_api/sandbox2/policybuilder.cc (revision ec63e07ab9515d95e79c211197c445ef84cefa6a)
1 // Copyright 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "sandboxed_api/sandbox2/policybuilder.h"
16 
17 #include <fcntl.h>  // For the fcntl flags
18 #include <linux/bpf_common.h>
19 #include <linux/filter.h>
20 #include <linux/futex.h>
21 #include <linux/random.h>  // For GRND_NONBLOCK
22 #include <linux/seccomp.h>
23 #include <stddef.h>
24 #include <sys/ioctl.h>
25 #include <sys/mman.h>  // For mmap arguments
26 #include <sys/prctl.h>
27 #include <sys/socket.h>
28 #include <sys/stat.h>
29 #include <sys/statvfs.h>
30 #include <syscall.h>
31 #include <unistd.h>
32 
33 #include <array>
34 #include <cerrno>
35 #include <csignal>
36 #include <cstdint>
37 #include <cstdlib>
38 #include <deque>
39 #include <functional>
40 #include <iterator>
41 #include <limits>
42 #include <memory>
43 #include <optional>
44 #include <string>
45 #include <utility>
46 #include <vector>
47 
48 #include "absl/container/flat_hash_set.h"
49 #include "absl/log/log.h"
50 #include "absl/memory/memory.h"
51 #include "absl/status/status.h"
52 #include "absl/status/statusor.h"
53 #include "absl/strings/match.h"
54 #include "absl/strings/str_cat.h"
55 #include "absl/strings/string_view.h"
56 #include "absl/types/span.h"
57 #include "sandboxed_api/config.h"
58 #include "sandboxed_api/sandbox2/allow_all_syscalls.h"
59 #include "sandboxed_api/sandbox2/allow_unrestricted_networking.h"
60 #include "sandboxed_api/sandbox2/namespace.h"
61 #include "sandboxed_api/sandbox2/policy.h"
62 #include "sandboxed_api/sandbox2/syscall.h"
63 #include "sandboxed_api/sandbox2/trace_all_syscalls.h"
64 #include "sandboxed_api/sandbox2/util/bpf_helper.h"
65 #include "sandboxed_api/sandbox2/violation.pb.h"
66 #include "sandboxed_api/util/path.h"
67 
68 #if defined(SAPI_X86_64)
69 #include <asm/prctl.h>
70 #elif defined(SAPI_PPC64_LE)
71 #include <asm/termbits.h>  // On PPC, TCGETS macro needs termios
72 #endif
73 
74 #ifndef MAP_FIXED_NOREPLACE
75 #define MAP_FIXED_NOREPLACE 0x100000
76 #endif
77 #ifndef PR_SET_VMA
78 #define PR_SET_VMA 0x53564d41
79 #endif
80 #ifndef PR_SET_VMA_ANON_NAME
81 #define PR_SET_VMA_ANON_NAME 0
82 #endif
83 
84 namespace sandbox2 {
85 namespace {
86 
87 namespace file = ::sapi::file;
88 
89 constexpr std::array<uint32_t, 2> kMmapSyscalls = {
90 #ifdef __NR_mmap2
91     __NR_mmap2,
92 #endif
93 #ifdef __NR_mmap
94     __NR_mmap,
95 #endif
96 };
97 
CheckBpfBounds(const sock_filter & filter,size_t max_jmp)98 bool CheckBpfBounds(const sock_filter& filter, size_t max_jmp) {
99   if (BPF_CLASS(filter.code) == BPF_JMP) {
100     if (BPF_OP(filter.code) == BPF_JA) {
101       return filter.k <= max_jmp;
102     }
103     return filter.jt <= max_jmp && filter.jf <= max_jmp;
104   }
105   return true;
106 }
107 
IsOnReadOnlyDev(const std::string & path)108 bool IsOnReadOnlyDev(const std::string& path) {
109   struct statvfs vfs;
110   if (TEMP_FAILURE_RETRY(statvfs(path.c_str(), &vfs)) == -1) {
111     PLOG(ERROR) << "Could not statvfs: " << path.c_str();
112     return false;
113   }
114   return vfs.f_flag & ST_RDONLY;
115 }
116 
117 }  // namespace
118 
Allow(UnrestrictedNetworking tag)119 PolicyBuilder& PolicyBuilder::Allow(UnrestrictedNetworking tag) {
120   EnableNamespaces();  // NOLINT(clang-diagnostic-deprecated-declarations)
121   allow_unrestricted_networking_ = true;
122   return *this;
123 }
124 
AllowSyscall(uint32_t num)125 PolicyBuilder& PolicyBuilder::AllowSyscall(uint32_t num) {
126   if (handled_syscalls_.insert(num).second) {
127     user_policy_.insert(user_policy_.end(), {SYSCALL(num, ALLOW)});
128   }
129   return *this;
130 }
131 
AllowSyscalls(absl::Span<const uint32_t> nums)132 PolicyBuilder& PolicyBuilder::AllowSyscalls(absl::Span<const uint32_t> nums) {
133   for (auto num : nums) {
134     AllowSyscall(num);
135   }
136   return *this;
137 }
138 
BlockSyscallsWithErrno(absl::Span<const uint32_t> nums,int error)139 PolicyBuilder& PolicyBuilder::BlockSyscallsWithErrno(
140     absl::Span<const uint32_t> nums, int error) {
141   for (auto num : nums) {
142     BlockSyscallWithErrno(num, error);
143   }
144   return *this;
145 }
146 
BlockSyscallWithErrno(uint32_t num,int error)147 PolicyBuilder& PolicyBuilder::BlockSyscallWithErrno(uint32_t num, int error) {
148   if (handled_syscalls_.insert(num).second) {
149     user_policy_.insert(user_policy_.end(), {SYSCALL(num, ERRNO(error))});
150     if (num == __NR_bpf) {
151       user_policy_handles_bpf_ = true;
152     }
153     if (num == __NR_ptrace) {
154       user_policy_handles_ptrace_ = true;
155     }
156   }
157   return *this;
158 }
159 
OverridableBlockSyscallWithErrno(uint32_t num,int error)160 PolicyBuilder& PolicyBuilder::OverridableBlockSyscallWithErrno(uint32_t num,
161                                                                int error) {
162   overridable_policy_.insert(overridable_policy_.end(),
163                              {SYSCALL(num, ERRNO(error))});
164   return *this;
165 }
166 
AllowEpollWait()167 PolicyBuilder& PolicyBuilder::AllowEpollWait() {
168   return AllowSyscalls({
169 #ifdef __NR_epoll_wait
170       __NR_epoll_wait,
171 #endif
172 #ifdef __NR_epoll_pwait
173       __NR_epoll_pwait,
174 #endif
175 #ifdef __NR_epoll_pwait2
176       __NR_epoll_pwait2,
177 #endif
178   });
179 }
180 
AllowEpoll()181 PolicyBuilder& PolicyBuilder::AllowEpoll() {
182   AllowSyscalls({
183 #ifdef __NR_epoll_create
184       __NR_epoll_create,
185 #endif
186 #ifdef __NR_epoll_create1
187       __NR_epoll_create1,
188 #endif
189 #ifdef __NR_epoll_ctl
190       __NR_epoll_ctl,
191 #endif
192   });
193 
194   return AllowEpollWait();
195 }
196 
AllowInotifyInit()197 PolicyBuilder& PolicyBuilder::AllowInotifyInit() {
198   return AllowSyscalls({
199 #ifdef __NR_inotify_init
200       __NR_inotify_init,
201 #endif
202 #ifdef __NR_inotify_init1
203       __NR_inotify_init1,
204 #endif
205   });
206 }
207 
AllowSelect()208 PolicyBuilder& PolicyBuilder::AllowSelect() {
209   return AllowSyscalls({
210 #ifdef __NR_select
211       __NR_select,
212 #endif
213 #ifdef __NR_pselect6
214       __NR_pselect6,
215 #endif
216   });
217 }
218 
AllowExit()219 PolicyBuilder& PolicyBuilder::AllowExit() {
220   return AllowSyscalls({__NR_exit, __NR_exit_group});
221 }
222 
AllowScudoMalloc()223 PolicyBuilder& PolicyBuilder::AllowScudoMalloc() {
224   AllowTime();
225   AllowSyscalls({__NR_munmap, __NR_nanosleep});
226   AllowFutexOp(FUTEX_WAKE);
227   AllowLimitedMadvise();
228   AllowGetRandom();
229   AllowGetPIDs();
230   AllowWipeOnFork();
231 #ifdef __NR_open
232   OverridableBlockSyscallWithErrno(__NR_open, ENOENT);
233 #endif
234 #ifdef __NR_openat
235   OverridableBlockSyscallWithErrno(__NR_openat, ENOENT);
236 #endif
237 
238   return AddPolicyOnMmap([](bpf_labels& labels) -> std::vector<sock_filter> {
239     return {
240         ARG_32(2),  // prot
241         JEQ32(PROT_NONE, JUMP(&labels, prot_none)),
242         JNE32(PROT_READ | PROT_WRITE, JUMP(&labels, mmap_end)),
243 
244         // PROT_READ | PROT_WRITE
245         ARG_32(3),  // flags
246         BPF_STMT(BPF_ALU | BPF_AND | BPF_K,
247                  ~uint32_t{MAP_FIXED | MAP_NORESERVE}),
248         JEQ32(MAP_PRIVATE | MAP_ANONYMOUS, ALLOW),
249         JUMP(&labels, mmap_end),
250 
251         // PROT_NONE
252         LABEL(&labels, prot_none),
253         ARG_32(3),  // flags
254         JEQ32(MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, ALLOW),
255 
256         LABEL(&labels, mmap_end),
257     };
258   });
259 }
260 
AllowTcMalloc()261 PolicyBuilder& PolicyBuilder::AllowTcMalloc() {
262   AllowTime();
263   AllowRestartableSequences(kRequireFastFences);
264   AllowSyscalls(
265       {__NR_munmap, __NR_nanosleep, __NR_brk, __NR_mincore, __NR_membarrier});
266   AllowLimitedMadvise();
267   AllowPrctlSetVma();
268   AllowPoll();
269   AllowGetPIDs();
270 
271   AddPolicyOnSyscall(__NR_mprotect, {
272                                         ARG_32(2),
273                                         JEQ32(PROT_READ | PROT_WRITE, ALLOW),
274                                         JEQ32(PROT_NONE, ALLOW),
275                                     });
276 
277   return AddPolicyOnMmap([](bpf_labels& labels) -> std::vector<sock_filter> {
278     return {
279         ARG_32(2),  // prot
280         JEQ32(PROT_NONE, JUMP(&labels, prot_none)),
281         JNE32(PROT_READ | PROT_WRITE, JUMP(&labels, mmap_end)),
282 
283         // PROT_READ | PROT_WRITE
284         ARG_32(3),  // flags
285         JNE32(MAP_ANONYMOUS | MAP_PRIVATE, JUMP(&labels, mmap_end)),
286         ALLOW,
287 
288         // PROT_NONE
289         LABEL(&labels, prot_none),
290         ARG_32(3),  // flags
291         JEQ32(MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, ALLOW),
292         JEQ32(MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED_NOREPLACE, ALLOW),
293         JEQ32(MAP_ANONYMOUS | MAP_PRIVATE, ALLOW),
294 
295         LABEL(&labels, mmap_end),
296     };
297   });
298 }
299 
AllowSystemMalloc()300 PolicyBuilder& PolicyBuilder::AllowSystemMalloc() {
301   AllowSyscalls({__NR_munmap, __NR_brk});
302   AllowFutexOp(FUTEX_WAKE);
303   AddPolicyOnSyscall(__NR_mremap, {
304                                       ARG_32(3),
305                                       JEQ32(MREMAP_MAYMOVE, ALLOW),
306                                   });
307   return AddPolicyOnMmap([](bpf_labels& labels) -> std::vector<sock_filter> {
308     return {
309         ARG_32(2),  // prot
310         JEQ32(PROT_NONE, JUMP(&labels, prot_none)),
311         JNE32(PROT_READ | PROT_WRITE, JUMP(&labels, mmap_end)),
312 
313         // PROT_READ | PROT_WRITE
314         ARG_32(3),  // flags
315         JEQ32(MAP_ANONYMOUS | MAP_PRIVATE, ALLOW),
316 
317         // PROT_NONE
318         LABEL(&labels, prot_none),
319         ARG_32(3),  // flags
320         JEQ32(MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, ALLOW),
321 
322         LABEL(&labels, mmap_end),
323     };
324   });
325 
326   return *this;
327 }
328 
AllowLlvmSanitizers()329 PolicyBuilder& PolicyBuilder::AllowLlvmSanitizers() {
330   if constexpr (!sapi::sanitizers::IsAny()) {
331     return *this;
332   }
333   // *san use a custom allocator that runs mmap/unmap under the hood.  For
334   // example:
335   // https://github.com/llvm/llvm-project/blob/596d534ac3524052df210be8d3c01a33b2260a42/compiler-rt/lib/asan/asan_allocator.cpp#L980
336   // https://github.com/llvm/llvm-project/blob/62ec4ac90738a5f2d209ed28c822223e58aaaeb7/compiler-rt/lib/sanitizer_common/sanitizer_allocator_secondary.h#L98
337   AllowMmapWithoutExec();
338   AllowSyscall(__NR_munmap);
339   AllowSyscall(__NR_sched_yield);
340 
341   // https://github.com/llvm/llvm-project/blob/4bbc3290a25c0dc26007912a96e0f77b2092ee56/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.cpp#L293
342   AddPolicyOnSyscall(__NR_mprotect,
343                      {
344                          ARG_32(2),
345                          BPF_STMT(BPF_AND | BPF_ALU | BPF_K,
346                                   ~uint32_t{PROT_READ | PROT_WRITE}),
347                          JEQ32(PROT_NONE, ALLOW),
348                      });
349 
350   AddPolicyOnSyscall(__NR_madvise, {
351                                        ARG_32(2),
352                                        JEQ32(MADV_DONTDUMP, ALLOW),
353                                        JEQ32(MADV_NOHUGEPAGE, ALLOW),
354                                    });
355   // Sanitizers read from /proc. For example:
356   // https://github.com/llvm/llvm-project/blob/634da7a1c61ee8c173e90a841eb1f4ea03caa20b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp#L1155
357   AddDirectoryIfNamespaced("/proc");
358   AllowOpen();
359   // Sanitizers need pid for reports. For example:
360   // https://github.com/llvm/llvm-project/blob/634da7a1c61ee8c173e90a841eb1f4ea03caa20b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp#L740
361   AllowGetPIDs();
362   // Sanitizers may try color output. For example:
363   // https://github.com/llvm/llvm-project/blob/87dd3d350c4ce0115b2cdf91d85ddd05ae2661aa/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp#L157
364   OverridableBlockSyscallWithErrno(__NR_ioctl, EPERM);
365   // https://github.com/llvm/llvm-project/blob/9aa39481d9eb718e872993791547053a3c1f16d5/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp#L150
366   // https://sourceware.org/git/?p=glibc.git;a=blob;f=nptl/pthread_getattr_np.c;h=de7edfa0928224eb8375e2fe894d6677570fbb3b;hb=HEAD#l188
367   AllowSyscall(__NR_sched_getaffinity);
368   // https://github.com/llvm/llvm-project/blob/02c2b472b510ff55679844c087b66e7837e13dc2/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp#L434
369 #ifdef __NR_readlink
370   OverridableBlockSyscallWithErrno(__NR_readlink, ENOENT);
371 #endif
372   OverridableBlockSyscallWithErrno(__NR_readlinkat, ENOENT);
373   if constexpr (sapi::sanitizers::IsASan()) {
374     AllowSyscall(__NR_sigaltstack);
375   }
376   if constexpr (sapi::sanitizers::IsTSan()) {
377     AllowSyscall(__NR_set_robust_list);
378   }
379   return *this;
380 }
381 
AllowLlvmCoverage()382 PolicyBuilder& PolicyBuilder::AllowLlvmCoverage() {
383   if (!sapi::IsCoverageRun()) {
384     return *this;
385   }
386   AllowStat();
387   AllowGetPIDs();
388   AllowOpen();
389   AllowRead();
390   AllowWrite();
391   AllowMkdir();
392   AllowSafeFcntl();
393   AllowSyscalls({
394       __NR_munmap, __NR_close, __NR_lseek,
395 #ifdef __NR__llseek
396       __NR__llseek,  // Newer glibc on PPC
397 #endif
398   });
399   AllowTcMalloc();
400   AddPolicyOnMmap([](bpf_labels& labels) -> std::vector<sock_filter> {
401     return {
402         ARG_32(2),  // prot
403         JNE32(PROT_READ | PROT_WRITE, JUMP(&labels, mmap_end)),
404         ARG_32(3),  // flags
405         JEQ32(MAP_SHARED, ALLOW),
406         LABEL(&labels, mmap_end),
407     };
408   });
409   AddDirectoryIfNamespaced(getenv("COVERAGE_DIR"), /*is_ro=*/false);
410   return *this;
411 }
412 
AllowLimitedMadvise()413 PolicyBuilder& PolicyBuilder::AllowLimitedMadvise() {
414   return AddPolicyOnSyscall(__NR_madvise, {
415                                               ARG_32(2),
416                                               JEQ32(MADV_DONTNEED, ALLOW),
417                                               JEQ32(MADV_REMOVE, ALLOW),
418                                               JEQ32(MADV_HUGEPAGE, ALLOW),
419                                               JEQ32(MADV_NOHUGEPAGE, ALLOW),
420                                           });
421 }
422 
AllowMmapWithoutExec()423 PolicyBuilder& PolicyBuilder::AllowMmapWithoutExec() {
424   return AddPolicyOnMmap({
425       ARG_32(2),
426       BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, PROT_EXEC, 1, 0),
427       ALLOW,
428   });
429 }
430 
AllowMmap()431 PolicyBuilder& PolicyBuilder::AllowMmap() {
432   return AllowSyscalls(kMmapSyscalls);
433 }
434 
AllowOpen()435 PolicyBuilder& PolicyBuilder::AllowOpen() {
436 #ifdef __NR_creat
437   AllowSyscall(__NR_creat);
438 #endif
439 #ifdef __NR_open
440   AllowSyscall(__NR_open);
441 #endif
442 #ifdef __NR_openat
443   AllowSyscall(__NR_openat);
444 #endif
445   return *this;
446 }
447 
AllowStat()448 PolicyBuilder& PolicyBuilder::AllowStat() {
449 #ifdef __NR_fstat
450   AllowSyscall(__NR_fstat);
451 #endif
452 #ifdef __NR_fstat64
453   AllowSyscall(__NR_fstat64);
454 #endif
455 #ifdef __NR_fstatat
456   AllowSyscall(__NR_fstatat);
457 #endif
458 #ifdef __NR_fstatat64
459   AllowSyscall(__NR_fstatat64);
460 #endif
461 #ifdef __NR_fstatfs
462   AllowSyscall(__NR_fstatfs);
463 #endif
464 #ifdef __NR_fstatfs64
465   AllowSyscall(__NR_fstatfs64);
466 #endif
467 #ifdef __NR_lstat
468   AllowSyscall(__NR_lstat);
469 #endif
470 #ifdef __NR_lstat64
471   AllowSyscall(__NR_lstat64);
472 #endif
473 #ifdef __NR_newfstatat
474   AllowSyscall(__NR_newfstatat);
475 #endif
476 #ifdef __NR_oldfstat
477   AllowSyscall(__NR_oldfstat);
478 #endif
479 #ifdef __NR_oldlstat
480   AllowSyscall(__NR_oldlstat);
481 #endif
482 #ifdef __NR_oldstat
483   AllowSyscall(__NR_oldstat);
484 #endif
485 #ifdef __NR_stat
486   AllowSyscall(__NR_stat);
487 #endif
488 #ifdef __NR_stat64
489   AllowSyscall(__NR_stat64);
490 #endif
491 #ifdef __NR_statfs
492   AllowSyscall(__NR_statfs);
493 #endif
494 #ifdef __NR_statfs64
495   AllowSyscall(__NR_statfs64);
496 #endif
497   return *this;
498 }
499 
AllowAccess()500 PolicyBuilder& PolicyBuilder::AllowAccess() {
501 #ifdef __NR_access
502   AllowSyscall(__NR_access);
503 #endif
504 #ifdef __NR_faccessat
505   AllowSyscall(__NR_faccessat);
506 #endif
507 #ifdef __NR_faccessat2
508   AllowSyscall(__NR_faccessat2);
509 #endif
510   return *this;
511 }
512 
AllowDup()513 PolicyBuilder& PolicyBuilder::AllowDup() {
514   AllowSyscall(__NR_dup);
515 #ifdef __NR_dup2
516   AllowSyscall(__NR_dup2);
517 #endif
518   AllowSyscall(__NR_dup3);
519   return *this;
520 }
521 
AllowPipe()522 PolicyBuilder& PolicyBuilder::AllowPipe() {
523 #ifdef __NR_pipe
524   AllowSyscall(__NR_pipe);
525 #endif
526   AllowSyscall(__NR_pipe2);
527   return *this;
528 }
529 
AllowChmod()530 PolicyBuilder& PolicyBuilder::AllowChmod() {
531 #ifdef __NR_chmod
532   AllowSyscall(__NR_chmod);
533 #endif
534   AllowSyscall(__NR_fchmod);
535   AllowSyscall(__NR_fchmodat);
536   return *this;
537 }
538 
AllowChown()539 PolicyBuilder& PolicyBuilder::AllowChown() {
540 #ifdef __NR_chown
541   AllowSyscall(__NR_chown);
542 #endif
543 #ifdef __NR_lchown
544   AllowSyscall(__NR_lchown);
545 #endif
546   AllowSyscall(__NR_fchown);
547   AllowSyscall(__NR_fchownat);
548   return *this;
549 }
550 
AllowRead()551 PolicyBuilder& PolicyBuilder::AllowRead() {
552   return AllowSyscalls({
553       __NR_read,
554       __NR_readv,
555       __NR_preadv,
556       __NR_pread64,
557   });
558 }
559 
AllowWrite()560 PolicyBuilder& PolicyBuilder::AllowWrite() {
561   return AllowSyscalls({
562       __NR_write,
563       __NR_writev,
564       __NR_pwritev,
565       __NR_pwrite64,
566   });
567 }
568 
AllowReaddir()569 PolicyBuilder& PolicyBuilder::AllowReaddir() {
570   return AllowSyscalls({
571 #ifdef __NR_getdents
572       __NR_getdents,
573 #endif
574 #ifdef __NR_getdents64
575       __NR_getdents64,
576 #endif
577   });
578 }
579 
AllowReadlink()580 PolicyBuilder& PolicyBuilder::AllowReadlink() {
581   return AllowSyscalls({
582 #ifdef __NR_readlink
583       __NR_readlink,
584 #endif
585 #ifdef __NR_readlinkat
586       __NR_readlinkat,
587 #endif
588   });
589 }
590 
AllowLink()591 PolicyBuilder& PolicyBuilder::AllowLink() {
592   return AllowSyscalls({
593 #ifdef __NR_link
594       __NR_link,
595 #endif
596 #ifdef __NR_linkat
597       __NR_linkat,
598 #endif
599   });
600 }
601 
AllowSymlink()602 PolicyBuilder& PolicyBuilder::AllowSymlink() {
603   return AllowSyscalls({
604 #ifdef __NR_symlink
605       __NR_symlink,
606 #endif
607 #ifdef __NR_symlinkat
608       __NR_symlinkat,
609 #endif
610   });
611 }
612 
AllowMkdir()613 PolicyBuilder& PolicyBuilder::AllowMkdir() {
614   return AllowSyscalls({
615 #ifdef __NR_mkdir
616       __NR_mkdir,
617 #endif
618 #ifdef __NR_mkdirat
619       __NR_mkdirat,
620 #endif
621   });
622 }
623 
AllowUtime()624 PolicyBuilder& PolicyBuilder::AllowUtime() {
625   return AllowSyscalls({
626 #ifdef __NR_futimens
627       __NR_futimens,
628 #endif
629 #ifdef __NR_utime
630       __NR_utime,
631 #endif
632 #ifdef __NR_utimes
633       __NR_utimes,
634 #endif
635 #ifdef __NR_utimensat
636       __NR_utimensat,
637 #endif
638   });
639 }
640 
AllowSafeFcntl()641 PolicyBuilder& PolicyBuilder::AllowSafeFcntl() {
642   return AddPolicyOnSyscalls({__NR_fcntl,
643 #ifdef __NR_fcntl64
644                               __NR_fcntl64
645 #endif
646                              },
647                              {
648                                  ARG_32(1),
649                                  JEQ32(F_GETFD, ALLOW),
650                                  JEQ32(F_SETFD, ALLOW),
651                                  JEQ32(F_GETFL, ALLOW),
652                                  JEQ32(F_SETFL, ALLOW),
653                                  JEQ32(F_GETLK, ALLOW),
654                                  JEQ32(F_SETLK, ALLOW),
655                                  JEQ32(F_SETLKW, ALLOW),
656                                  JEQ32(F_DUPFD, ALLOW),
657                                  JEQ32(F_DUPFD_CLOEXEC, ALLOW),
658                              });
659 }
660 
AllowFork()661 PolicyBuilder& PolicyBuilder::AllowFork() {
662   return AllowSyscalls({
663 #ifdef __NR_fork
664       __NR_fork,
665 #endif
666 #ifdef __NR_vfork
667       __NR_vfork,
668 #endif
669       __NR_clone});
670 }
671 
AllowWait()672 PolicyBuilder& PolicyBuilder::AllowWait() {
673   return AllowSyscalls({
674 #ifdef __NR_waitpid
675       __NR_waitpid,
676 #endif
677       __NR_wait4});
678 }
679 
AllowAlarm()680 PolicyBuilder& PolicyBuilder::AllowAlarm() {
681   return AllowSyscalls({
682 #ifdef __NR_alarm
683       __NR_alarm,
684 #endif
685       __NR_setitimer});
686 }
687 
AllowHandleSignals()688 PolicyBuilder& PolicyBuilder::AllowHandleSignals() {
689   return AllowSyscalls({
690       __NR_rt_sigaction,
691       __NR_rt_sigreturn,
692       __NR_rt_sigprocmask,
693 #ifdef __NR_signal
694       __NR_signal,
695 #endif
696 #ifdef __NR_sigaction
697       __NR_sigaction,
698 #endif
699 #ifdef __NR_sigreturn
700       __NR_sigreturn,
701 #endif
702 #ifdef __NR_sigprocmask
703       __NR_sigprocmask,
704 #endif
705 #ifdef __NR_sigaltstack
706       __NR_sigaltstack,
707 #endif
708   });
709 }
710 
AllowTCGETS()711 PolicyBuilder& PolicyBuilder::AllowTCGETS() {
712   return AddPolicyOnSyscall(__NR_ioctl, {
713                                             ARG_32(1),
714                                             JEQ32(TCGETS, ALLOW),
715                                         });
716 }
717 
AllowTime()718 PolicyBuilder& PolicyBuilder::AllowTime() {
719   return AllowSyscalls({
720 #ifdef __NR_time
721       __NR_time,
722 #endif
723       __NR_gettimeofday, __NR_clock_gettime});
724 }
725 
AllowSleep()726 PolicyBuilder& PolicyBuilder::AllowSleep() {
727   return AllowSyscalls({
728       __NR_clock_nanosleep,
729       __NR_nanosleep,
730   });
731 }
732 
AllowGetIDs()733 PolicyBuilder& PolicyBuilder::AllowGetIDs() {
734   return AllowSyscalls({
735       __NR_getuid,
736       __NR_geteuid,
737       __NR_getresuid,
738       __NR_getgid,
739       __NR_getegid,
740       __NR_getresgid,
741 #ifdef __NR_getuid32
742       __NR_getuid32,
743       __NR_geteuid32,
744       __NR_getresuid32,
745       __NR_getgid32,
746       __NR_getegid32,
747       __NR_getresgid32,
748 #endif
749       __NR_getgroups,
750   });
751 }
752 
AllowRestartableSequences(CpuFenceMode cpu_fence_mode)753 PolicyBuilder& PolicyBuilder::AllowRestartableSequences(
754     CpuFenceMode cpu_fence_mode) {
755 #ifdef __NR_rseq
756   AllowSyscall(__NR_rseq);
757 #endif
758   AddPolicyOnMmap([](bpf_labels& labels) -> std::vector<sock_filter> {
759     return {
760         ARG_32(2),  // prot
761         JNE32(PROT_READ | PROT_WRITE, JUMP(&labels, mmap_end)),
762 
763         ARG_32(3),  // flags
764         JNE32(MAP_PRIVATE | MAP_ANONYMOUS, JUMP(&labels, mmap_end)),
765 
766         ALLOW,
767         LABEL(&labels, mmap_end),
768     };
769   });
770   AllowSyscall(__NR_getcpu);
771   AllowSyscall(__NR_membarrier);
772   AllowFutexOp(FUTEX_WAIT);
773   AllowFutexOp(FUTEX_WAKE);
774   AllowRead();
775   AllowOpen();
776   AllowPoll();
777   AllowSyscall(__NR_close);
778   AddPolicyOnSyscall(__NR_rt_sigprocmask, {
779                                               ARG_32(0),
780                                               JEQ32(SIG_SETMASK, ALLOW),
781                                           });
782   AllowPrctlSetVma();
783   if (cpu_fence_mode == kAllowSlowFences) {
784     AllowSyscall(__NR_sched_getaffinity);
785     AllowSyscall(__NR_sched_setaffinity);
786   }
787   AddFileIfNamespaced("/proc/cpuinfo");
788   AddFileIfNamespaced("/proc/stat");
789   AddDirectoryIfNamespaced("/sys/devices/system/cpu");
790   if (cpu_fence_mode == kAllowSlowFences) {
791     AddFileIfNamespaced("/proc/self/cpuset");
792   }
793   return *this;
794 }
795 
AllowGetPIDs()796 PolicyBuilder& PolicyBuilder::AllowGetPIDs() {
797   return AllowSyscalls({
798       __NR_getpid,
799       __NR_getppid,
800       __NR_gettid,
801   });
802 }
803 
AllowGetPGIDs()804 PolicyBuilder& PolicyBuilder::AllowGetPGIDs() {
805   return AllowSyscalls({
806       __NR_getpgid,
807 #ifdef __NR_getpgrp
808       __NR_getpgrp,
809 #endif
810   });
811 }
812 
AllowGetRlimit()813 PolicyBuilder& PolicyBuilder::AllowGetRlimit() {
814 #ifdef __NR_prlimit64
815   AddPolicyOnSyscall(__NR_prlimit64, {ARG(2), JEQ64(0, 0, ALLOW)});
816 #endif
817   return AllowSyscalls({
818 #ifdef __NR_getrlimit
819       __NR_getrlimit,
820 #endif
821 #ifdef __NR_ugetrlimit
822       __NR_ugetrlimit,
823 #endif
824   });
825 }
826 
AllowSetRlimit()827 PolicyBuilder& PolicyBuilder::AllowSetRlimit() {
828   return AllowSyscalls({
829 #ifdef __NR_prlimit64
830       __NR_prlimit64,
831 #endif
832 #ifdef __NR_setrlimit
833       __NR_setrlimit,
834 #endif
835 #ifdef __NR_usetrlimit
836       __NR_usetrlimit,
837 #endif
838   });
839 }
840 
AllowGetRandom()841 PolicyBuilder& PolicyBuilder::AllowGetRandom() {
842   return AddPolicyOnSyscall(__NR_getrandom, {
843                                                 ARG_32(2),
844                                                 JEQ32(0, ALLOW),
845                                                 JEQ32(GRND_NONBLOCK, ALLOW),
846                                             });
847 }
848 
AllowWipeOnFork()849 PolicyBuilder& PolicyBuilder::AllowWipeOnFork() {
850   // System headers may not be recent enough to include MADV_WIPEONFORK.
851   static constexpr uint32_t kMadv_WipeOnFork = 18;
852   // The -1 value is used by code to probe that the kernel returns -EINVAL for
853   // unknown values because some environments, like qemu, ignore madvise
854   // completely, but code needs to know whether WIPEONFORK took effect.
855   return AddPolicyOnSyscall(__NR_madvise,
856                             {
857                                 ARG_32(2),
858                                 JEQ32(kMadv_WipeOnFork, ALLOW),
859                                 JEQ32(static_cast<uint32_t>(-1), ALLOW),
860                             });
861 }
862 
AllowLogForwarding()863 PolicyBuilder& PolicyBuilder::AllowLogForwarding() {
864   AllowWrite();
865   AllowSystemMalloc();
866   AllowTcMalloc();
867 
868   // From comms
869   AllowGetPIDs();
870   AllowSyscalls({// from logging code
871                  __NR_clock_gettime,
872                  // From comms
873                  __NR_gettid, __NR_close});
874 
875   // For generating stacktraces in logging (e.g. `LOG(FATAL)`)
876   AddPolicyOnSyscall(__NR_rt_sigprocmask, {
877                                               ARG_32(0),
878                                               JEQ32(SIG_BLOCK, ALLOW),
879                                           });
880   AllowGetRlimit();
881 
882   // For LOG(FATAL)
883   return AddPolicyOnSyscall(__NR_kill,
884                             [](bpf_labels& labels) -> std::vector<sock_filter> {
885                               return {
886                                   ARG_32(0),
887                                   JNE32(0, JUMP(&labels, pid_not_null)),
888                                   ARG_32(1),
889                                   JEQ32(SIGABRT, ALLOW),
890                                   LABEL(&labels, pid_not_null),
891                               };
892                             });
893 }
894 
AllowUnlink()895 PolicyBuilder& PolicyBuilder::AllowUnlink() {
896   AllowSyscalls({
897 #ifdef __NR_rmdir
898       __NR_rmdir,
899 #endif
900 #ifdef __NR_unlink
901       __NR_unlink,
902 #endif
903       __NR_unlinkat,
904   });
905   return *this;
906 }
907 
AllowPoll()908 PolicyBuilder& PolicyBuilder::AllowPoll() {
909   AllowSyscalls({
910 #ifdef __NR_poll
911       __NR_poll,
912 #endif
913       __NR_ppoll,
914   });
915   return *this;
916 }
917 
AllowRename()918 PolicyBuilder& PolicyBuilder::AllowRename() {
919   AllowSyscalls({
920 #ifdef __NR_rename
921       __NR_rename,
922 #endif
923       __NR_renameat,
924 #ifdef __NR_renameat2
925       __NR_renameat2,
926 #endif
927   });
928   return *this;
929 }
930 
AllowEventFd()931 PolicyBuilder& PolicyBuilder::AllowEventFd() {
932   AllowSyscalls({
933 #ifdef __NR_eventfd
934       __NR_eventfd,
935 #endif
936       __NR_eventfd2,
937   });
938   return *this;
939 }
940 
AllowPrctlSetName()941 PolicyBuilder& PolicyBuilder::AllowPrctlSetName() {
942   AddPolicyOnSyscall(__NR_prctl, {ARG_32(0), JEQ32(PR_SET_NAME, ALLOW)});
943   return *this;
944 }
945 
AllowPrctlSetVma()946 PolicyBuilder& PolicyBuilder::AllowPrctlSetVma() {
947   AddPolicyOnSyscall(__NR_prctl,
948                      [](bpf_labels& labels) -> std::vector<sock_filter> {
949                        return {
950                            ARG_32(0),
951                            JNE32(PR_SET_VMA, JUMP(&labels, prctlsetvma_end)),
952                            ARG_32(1),
953                            JEQ32(PR_SET_VMA_ANON_NAME, ALLOW),
954                            LABEL(&labels, prctlsetvma_end),
955                        };
956                      });
957   return *this;
958 }
959 
AllowFutexOp(int op)960 PolicyBuilder& PolicyBuilder::AllowFutexOp(int op) {
961   return AddPolicyOnSyscall(
962       __NR_futex, {
963                       ARG_32(1),
964                       // a <- a & FUTEX_CMD_MASK
965                       BPF_STMT(BPF_ALU + BPF_AND + BPF_K,
966                                static_cast<uint32_t>(FUTEX_CMD_MASK)),
967                       JEQ32(static_cast<uint32_t>(op) & FUTEX_CMD_MASK, ALLOW),
968                   });
969 }
970 
AllowStaticStartup()971 PolicyBuilder& PolicyBuilder::AllowStaticStartup() {
972   AllowGetRlimit();
973   AllowSyscalls({
974     // These syscalls take a pointer, so no restriction.
975     __NR_uname, __NR_brk, __NR_set_tid_address,
976 
977 #if defined(__ARM_NR_set_tls)
978         // libc sets the TLS during startup
979         __ARM_NR_set_tls,
980 #endif
981 
982         // This syscall takes a pointer and a length.
983         // We could restrict length, but it might change, so not worth it.
984         __NR_set_robust_list,
985   });
986 
987   AllowFutexOp(FUTEX_WAIT_BITSET);
988 
989   AddPolicyOnSyscall(__NR_rt_sigaction,
990                      {
991                          ARG_32(0),
992                          // This is real-time signals used internally by libc.
993                          JEQ32(__SIGRTMIN + 0, ALLOW),
994                          JEQ32(__SIGRTMIN + 1, ALLOW),
995                      });
996 
997   AllowSyscall(__NR_rt_sigprocmask);
998 
999 #ifdef SAPI_X86_64
1000   // The second argument is a pointer.
1001   AddPolicyOnSyscall(__NR_arch_prctl, {
1002                                           ARG_32(0),
1003                                           JEQ32(ARCH_SET_FS, ALLOW),
1004                                       });
1005 #endif
1006 
1007   if constexpr (sapi::host_cpu::IsArm64()) {
1008     OverridableBlockSyscallWithErrno(__NR_readlinkat, ENOENT);
1009   }
1010 #ifdef __NR_readlink
1011   OverridableBlockSyscallWithErrno(__NR_readlink, ENOENT);
1012 #endif
1013 
1014   AllowGetRlimit();
1015   AddPolicyOnSyscall(__NR_mprotect, {
1016                                         ARG_32(2),
1017                                         JEQ32(PROT_READ, ALLOW),
1018                                     });
1019 
1020   OverridableBlockSyscallWithErrno(__NR_sigaltstack, ENOSYS);
1021 
1022   return *this;
1023 }
1024 
AllowDynamicStartup()1025 PolicyBuilder& PolicyBuilder::AllowDynamicStartup() {
1026 #ifdef __ANDROID__
1027   AllowSafeFcntl();
1028   AllowGetIDs();
1029   AllowGetPIDs();
1030   AllowGetRandom();
1031   AllowSyscalls({
1032 #ifdef __NR_fstatfs
1033       __NR_fstatfs,
1034 #endif
1035 #ifdef __NR_fstatfs64
1036       __NR_fstatfs64,
1037 #endif
1038       __NR_readlinkat,
1039       __NR_sched_getaffinity,
1040       __NR_sched_getscheduler,
1041   });
1042   AllowHandleSignals();
1043   AllowFutexOp(FUTEX_WAKE_PRIVATE);
1044   AddPolicyOnSyscall(__NR_prctl,
1045                      [](bpf_labels& labels) -> std::vector<sock_filter> {
1046                        return {
1047                            ARG_32(0),  // option
1048                            JEQ32(PR_GET_DUMPABLE, ALLOW),
1049                            JNE32(PR_SET_VMA, JUMP(&labels, prctl_end)),
1050 
1051                            ARG_32(1),  // arg2
1052                            JEQ32(PR_SET_VMA_ANON_NAME, ALLOW),
1053 
1054                            LABEL(&labels, prctl_end),
1055                        };
1056                      });
1057   AddPolicyOnSyscall(__NR_mremap,
1058                      {
1059                          ARG_32(3),
1060                          JEQ32(MREMAP_MAYMOVE | MREMAP_FIXED, ALLOW),
1061                      });
1062   AddPolicyOnMmap([](bpf_labels& labels) -> std::vector<sock_filter> {
1063     return {
1064         ARG_32(2),  // prot
1065         JEQ32(PROT_NONE, JUMP(&labels, prot_none)),
1066         JEQ32(PROT_READ, JUMP(&labels, prot_read)),
1067         JEQ32(PROT_READ | PROT_WRITE, JUMP(&labels, prot_RW_or_RX)),
1068         JEQ32(PROT_READ | PROT_EXEC, JUMP(&labels, prot_RW_or_RX)),
1069 
1070         // PROT_NONE
1071         LABEL(&labels, prot_none),
1072         ARG_32(3),  // flags
1073         JEQ32(MAP_PRIVATE | MAP_ANONYMOUS, ALLOW),
1074         JEQ32(MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, ALLOW),
1075         JUMP(&labels, mmap_end),
1076 
1077         // PROT_READ
1078         LABEL(&labels, prot_read),
1079         ARG_32(3),  // flags
1080         JEQ32(MAP_SHARED, ALLOW),
1081         JEQ32(MAP_PRIVATE, ALLOW),
1082         JEQ32(MAP_PRIVATE | MAP_FIXED, ALLOW),
1083         JUMP(&labels, mmap_end),
1084 
1085         // PROT_READ | PROT_WRITE
1086         // PROT_READ | PROT_EXEC
1087         LABEL(&labels, prot_RW_or_RX),
1088         ARG_32(3),  // flags
1089         JEQ32(MAP_PRIVATE | MAP_FIXED, ALLOW),
1090 
1091         LABEL(&labels, mmap_end),
1092     };
1093   });
1094 #endif
1095 
1096   AllowAccess();
1097   AllowOpen();
1098   AllowRead();
1099   AllowStat();
1100   AllowSyscalls({__NR_lseek,
1101 #ifdef __NR__llseek
1102                  __NR__llseek,  // Newer glibc on PPC
1103 #endif
1104                  __NR_close, __NR_munmap});
1105   AddPolicyOnSyscall(__NR_mprotect, {
1106                                         ARG_32(2),
1107                                         JEQ32(PROT_READ, ALLOW),
1108                                         JEQ32(PROT_NONE, ALLOW),
1109                                         JEQ32(PROT_READ | PROT_WRITE, ALLOW),
1110                                         JEQ32(PROT_READ | PROT_EXEC, ALLOW),
1111                                     });
1112   AllowStaticStartup();
1113 
1114   return AddPolicyOnMmap([](bpf_labels& labels) -> std::vector<sock_filter> {
1115     return {
1116         ARG_32(2),  // prot
1117         JEQ32(PROT_READ | PROT_EXEC, JUMP(&labels, prot_exec)),
1118         JEQ32(PROT_READ | PROT_WRITE, JUMP(&labels, prot_read_write)),
1119         JNE32(PROT_READ, JUMP(&labels, mmap_end)),
1120 
1121         // PROT_READ
1122         ARG_32(3),  // flags
1123         JEQ32(MAP_PRIVATE, ALLOW),
1124         JUMP(&labels, mmap_end),
1125 
1126         // PROT_READ | PROT_WRITE
1127         LABEL(&labels, prot_read_write),
1128         ARG_32(3),  // flags
1129         JEQ32(MAP_FILE | MAP_PRIVATE | MAP_FIXED | MAP_DENYWRITE, ALLOW),
1130         JEQ32(MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, ALLOW),
1131         JEQ32(MAP_ANONYMOUS | MAP_PRIVATE, ALLOW),
1132         JUMP(&labels, mmap_end),
1133 
1134         // PROT_READ | PROT_EXEC
1135         LABEL(&labels, prot_exec),
1136         ARG_32(3),  // flags
1137         JEQ32(MAP_FILE | MAP_PRIVATE | MAP_DENYWRITE, ALLOW),
1138 
1139         LABEL(&labels, mmap_end),
1140     };
1141   });
1142 }
1143 
AddPolicyOnSyscall(uint32_t num,absl::Span<const sock_filter> policy)1144 PolicyBuilder& PolicyBuilder::AddPolicyOnSyscall(
1145     uint32_t num, absl::Span<const sock_filter> policy) {
1146   return AddPolicyOnSyscalls({num}, policy);
1147 }
1148 
AddPolicyOnSyscall(uint32_t num,BpfFunc f)1149 PolicyBuilder& PolicyBuilder::AddPolicyOnSyscall(uint32_t num, BpfFunc f) {
1150   return AddPolicyOnSyscalls({num}, f);
1151 }
1152 
AddPolicyOnSyscalls(absl::Span<const uint32_t> nums,absl::Span<const sock_filter> policy)1153 PolicyBuilder& PolicyBuilder::AddPolicyOnSyscalls(
1154     absl::Span<const uint32_t> nums, absl::Span<const sock_filter> policy) {
1155   if (nums.empty()) {
1156     SetError(absl::InvalidArgumentError(
1157         "Cannot add a policy for empty list of syscalls"));
1158     return *this;
1159   }
1160   std::deque<sock_filter> out;
1161   // Insert and verify the policy.
1162   out.insert(out.end(), policy.begin(), policy.end());
1163   for (size_t i = 0; i < out.size(); ++i) {
1164     sock_filter& filter = out[i];
1165     const size_t max_jump = out.size() - i - 1;
1166     if (!CheckBpfBounds(filter, max_jump)) {
1167       SetError(absl::InvalidArgumentError("bpf jump out of bounds"));
1168       return *this;
1169     }
1170     // Syscall arch is expected as TRACE value
1171     if (filter.code == (BPF_RET | BPF_K) &&
1172         (filter.k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRACE &&
1173         (filter.k & SECCOMP_RET_DATA) != Syscall::GetHostArch()) {
1174       LOG(WARNING) << "SANDBOX2_TRACE should be used in policy instead of "
1175                       "TRACE(value)";
1176       filter = SANDBOX2_TRACE;
1177     }
1178   }
1179   // Pre-/Postcondition: Syscall number loaded into A register
1180   out.push_back(LOAD_SYSCALL_NR);
1181   if (out.size() > std::numeric_limits<uint32_t>::max()) {
1182     SetError(absl::InvalidArgumentError("syscall policy is too long"));
1183     return *this;
1184   }
1185   // Create jumps for each syscall.
1186   size_t do_policy_loc = out.size();
1187   // Iterate in reverse order and prepend instruction, so that jumps can be
1188   // calculated easily.
1189   constexpr size_t kMaxShortJump = 255;
1190   bool last = true;
1191   for (auto it = std::rbegin(nums); it != std::rend(nums); ++it) {
1192     if (*it == __NR_bpf || *it == __NR_ptrace) {
1193       SetError(absl::InvalidArgumentError(
1194           "cannot add policy for bpf/ptrace syscall"));
1195       return *this;
1196     }
1197     // If syscall is not matched try with the next one.
1198     uint8_t jf = 0;
1199     // If last syscall on the list does not match skip the policy by jumping
1200     // over it.
1201     if (last) {
1202       if (out.size() > kMaxShortJump) {
1203         out.push_front(
1204             BPF_STMT(BPF_JMP + BPF_JA, static_cast<uint32_t>(out.size())));
1205       } else {
1206         jf = out.size();
1207       }
1208       last = false;
1209     }
1210     // Add a helper absolute jump if needed - the policy/last helper jump is
1211     // out of reach of a short jump.
1212     if ((out.size() - do_policy_loc) > kMaxShortJump) {
1213       out.push_front(BPF_STMT(
1214           BPF_JMP + BPF_JA, static_cast<uint32_t>(out.size() - policy.size())));
1215       do_policy_loc = out.size();
1216       ++jf;
1217     }
1218     uint8_t jt = out.size() - do_policy_loc;
1219     out.push_front(BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, *it, jt, jf));
1220   }
1221   user_policy_.insert(user_policy_.end(), out.begin(), out.end());
1222   return *this;
1223 }
1224 
AddPolicyOnSyscalls(absl::Span<const uint32_t> nums,BpfFunc f)1225 PolicyBuilder& PolicyBuilder::AddPolicyOnSyscalls(
1226     absl::Span<const uint32_t> nums, BpfFunc f) {
1227   return AddPolicyOnSyscalls(nums, ResolveBpfFunc(f));
1228 }
1229 
AddPolicyOnMmap(absl::Span<const sock_filter> policy)1230 PolicyBuilder& PolicyBuilder::AddPolicyOnMmap(
1231     absl::Span<const sock_filter> policy) {
1232   return AddPolicyOnSyscalls(kMmapSyscalls, policy);
1233 }
1234 
AddPolicyOnMmap(BpfFunc f)1235 PolicyBuilder& PolicyBuilder::AddPolicyOnMmap(BpfFunc f) {
1236   return AddPolicyOnSyscalls(kMmapSyscalls, f);
1237 }
1238 
DangerDefaultAllowAll()1239 PolicyBuilder& PolicyBuilder::DangerDefaultAllowAll() {
1240   return DefaultAction(AllowAllSyscalls());
1241 }
1242 
DefaultAction(AllowAllSyscalls)1243 PolicyBuilder& PolicyBuilder::DefaultAction(AllowAllSyscalls) {
1244   default_action_ = ALLOW;
1245   return *this;
1246 }
1247 
DefaultAction(TraceAllSyscalls)1248 PolicyBuilder& PolicyBuilder::DefaultAction(TraceAllSyscalls) {
1249   default_action_ = SANDBOX2_TRACE;
1250   return *this;
1251 }
1252 
ValidateAbsolutePath(absl::string_view path)1253 absl::StatusOr<std::string> PolicyBuilder::ValidateAbsolutePath(
1254     absl::string_view path) {
1255   if (!file::IsAbsolutePath(path)) {
1256     return absl::InvalidArgumentError(
1257         absl::StrCat("Path is not absolute: '", path, "'"));
1258   }
1259   return ValidatePath(path);
1260 }
1261 
ValidatePath(absl::string_view path)1262 absl::StatusOr<std::string> PolicyBuilder::ValidatePath(
1263     absl::string_view path) {
1264   std::string fixed_path = file::CleanPath(path);
1265   if (fixed_path != path) {
1266     return absl::InvalidArgumentError(absl::StrCat(
1267         "Path was not normalized. '", path, "' != '", fixed_path, "'"));
1268   }
1269   return fixed_path;
1270 }
1271 
ResolveBpfFunc(BpfFunc f)1272 std::vector<sock_filter> PolicyBuilder::ResolveBpfFunc(BpfFunc f) {
1273   bpf_labels l = {0};
1274 
1275   std::vector<sock_filter> policy = f(l);
1276   if (bpf_resolve_jumps(&l, policy.data(), policy.size()) != 0) {
1277     SetError(absl::InternalError("Cannot resolve bpf jumps"));
1278   }
1279 
1280   return policy;
1281 }
1282 
TryBuild()1283 absl::StatusOr<std::unique_ptr<Policy>> PolicyBuilder::TryBuild() {
1284   if (!last_status_.ok()) {
1285     return last_status_;
1286   }
1287 
1288   if (user_policy_.size() > kMaxUserPolicyLength) {
1289     return absl::FailedPreconditionError(
1290         absl::StrCat("User syscall policy is to long (", user_policy_.size(),
1291                      " > ", kMaxUserPolicyLength, ")."));
1292   }
1293 
1294   // Using `new` to access a non-public constructor.
1295   auto output = absl::WrapUnique(new Policy());
1296 
1297   if (already_built_) {
1298     return absl::FailedPreconditionError("Can only build policy once.");
1299   }
1300 
1301   if (use_namespaces_) {
1302     if (allow_unrestricted_networking_ && hostname_ != kDefaultHostname) {
1303       return absl::FailedPreconditionError(
1304           "Cannot set hostname without network namespaces.");
1305     }
1306     output->namespace_ =
1307         Namespace(allow_unrestricted_networking_, std::move(mounts_), hostname_,
1308                   allow_mount_propagation_);
1309   }
1310 
1311   output->collect_stacktrace_on_signal_ = collect_stacktrace_on_signal_;
1312   output->collect_stacktrace_on_violation_ = collect_stacktrace_on_violation_;
1313   output->collect_stacktrace_on_timeout_ = collect_stacktrace_on_timeout_;
1314   output->collect_stacktrace_on_kill_ = collect_stacktrace_on_kill_;
1315   output->collect_stacktrace_on_exit_ = collect_stacktrace_on_exit_;
1316   output->user_policy_ = std::move(user_policy_);
1317   if (default_action_) {
1318     output->user_policy_.push_back(*default_action_);
1319   }
1320   output->user_policy_.insert(output->user_policy_.end(),
1321                               overridable_policy_.begin(),
1322                               overridable_policy_.end());
1323   output->user_policy_handles_bpf_ = user_policy_handles_bpf_;
1324   output->user_policy_handles_ptrace_ = user_policy_handles_ptrace_;
1325 
1326   PolicyBuilderDescription pb_description;
1327 
1328   StoreDescription(&pb_description);
1329   output->policy_builder_description_ = pb_description;
1330   output->allowed_hosts_ = std::move(allowed_hosts_);
1331   already_built_ = true;
1332   return std::move(output);
1333 }
1334 
AddFile(absl::string_view path,bool is_ro)1335 PolicyBuilder& PolicyBuilder::AddFile(absl::string_view path, bool is_ro) {
1336   return AddFileAt(path, path, is_ro);
1337 }
1338 
AddFileAt(absl::string_view outside,absl::string_view inside,bool is_ro)1339 PolicyBuilder& PolicyBuilder::AddFileAt(absl::string_view outside,
1340                                         absl::string_view inside, bool is_ro) {
1341   EnableNamespaces();  // NOLINT(clang-diagnostic-deprecated-declarations)
1342   return AddFileAtIfNamespaced(outside, inside, is_ro);
1343 }
1344 
AddFileIfNamespaced(absl::string_view path,bool is_ro)1345 PolicyBuilder& PolicyBuilder::AddFileIfNamespaced(absl::string_view path,
1346                                                   bool is_ro) {
1347   return AddFileAtIfNamespaced(path, path, is_ro);
1348 }
1349 
AddFileAtIfNamespaced(absl::string_view outside,absl::string_view inside,bool is_ro)1350 PolicyBuilder& PolicyBuilder::AddFileAtIfNamespaced(absl::string_view outside,
1351                                                     absl::string_view inside,
1352                                                     bool is_ro) {
1353   auto valid_outside = ValidateAbsolutePath(outside);
1354   if (!valid_outside.ok()) {
1355     SetError(valid_outside.status());
1356     return *this;
1357   }
1358 
1359   if (absl::StartsWith(*valid_outside, "/proc/self") &&
1360       *valid_outside != "/proc/self/cpuset") {
1361     SetError(absl::InvalidArgumentError(
1362         absl::StrCat("Cannot add /proc/self mounts, you need to mount the "
1363                      "whole /proc instead. You tried to mount ",
1364                      outside)));
1365     return *this;
1366   }
1367 
1368   if (!is_ro && IsOnReadOnlyDev(*valid_outside)) {
1369     SetError(absl::FailedPreconditionError(
1370         absl::StrCat("Cannot add ", outside,
1371                      " as read-write as it's on a read-only device")));
1372     return *this;
1373   }
1374 
1375   if (auto status = mounts_.AddFileAt(*valid_outside, inside, is_ro);
1376       !status.ok()) {
1377     SetError(
1378         absl::InternalError(absl::StrCat("Could not add file ", outside, " => ",
1379                                          inside, ": ", status.message())));
1380   }
1381   return *this;
1382 }
1383 
AddLibrariesForBinary(absl::string_view path,absl::string_view ld_library_path)1384 PolicyBuilder& PolicyBuilder::AddLibrariesForBinary(
1385     absl::string_view path, absl::string_view ld_library_path) {
1386   EnableNamespaces();  // NOLINT(clang-diagnostic-deprecated-declarations)
1387 
1388   auto valid_path = ValidatePath(path);
1389   if (!valid_path.ok()) {
1390     SetError(valid_path.status());
1391     return *this;
1392   }
1393 
1394   if (auto status = mounts_.AddMappingsForBinary(*valid_path, ld_library_path);
1395       !status.ok()) {
1396     SetError(absl::InternalError(absl::StrCat(
1397         "Could not add libraries for ", *valid_path, ": ", status.message())));
1398   }
1399   return *this;
1400 }
1401 
AddLibrariesForBinary(int fd,absl::string_view ld_library_path)1402 PolicyBuilder& PolicyBuilder::AddLibrariesForBinary(
1403     int fd, absl::string_view ld_library_path) {
1404   return AddLibrariesForBinary(absl::StrCat("/proc/self/fd/", fd),
1405                                ld_library_path);
1406 }
1407 
AddDirectory(absl::string_view path,bool is_ro)1408 PolicyBuilder& PolicyBuilder::AddDirectory(absl::string_view path, bool is_ro) {
1409   return AddDirectoryAt(path, path, is_ro);
1410 }
1411 
AddDirectoryAt(absl::string_view outside,absl::string_view inside,bool is_ro)1412 PolicyBuilder& PolicyBuilder::AddDirectoryAt(absl::string_view outside,
1413                                              absl::string_view inside,
1414                                              bool is_ro) {
1415   EnableNamespaces();  // NOLINT(clang-diagnostic-deprecated-declarations)
1416   return AddDirectoryAtIfNamespaced(outside, inside, is_ro);
1417 }
1418 
AddDirectoryIfNamespaced(absl::string_view path,bool is_ro)1419 PolicyBuilder& PolicyBuilder::AddDirectoryIfNamespaced(absl::string_view path,
1420                                                        bool is_ro) {
1421   return AddDirectoryAtIfNamespaced(path, path, is_ro);
1422 }
1423 
AddDirectoryAtIfNamespaced(absl::string_view outside,absl::string_view inside,bool is_ro)1424 PolicyBuilder& PolicyBuilder::AddDirectoryAtIfNamespaced(
1425     absl::string_view outside, absl::string_view inside, bool is_ro) {
1426   auto valid_outside = ValidateAbsolutePath(outside);
1427   if (!valid_outside.ok()) {
1428     SetError(valid_outside.status());
1429     return *this;
1430   }
1431 
1432   if (absl::StartsWith(*valid_outside, "/proc/self")) {
1433     SetError(absl::InvalidArgumentError(
1434         absl::StrCat("Cannot add /proc/self mounts, you need to mount the "
1435                      "whole /proc instead. You tried to mount ",
1436                      outside)));
1437     return *this;
1438   }
1439 
1440   if (!is_ro && IsOnReadOnlyDev(*valid_outside)) {
1441     SetError(absl::FailedPreconditionError(
1442         absl::StrCat("Cannot add ", outside,
1443                      " as read-write as it's on a read-only device")));
1444     return *this;
1445   }
1446 
1447   if (absl::Status status =
1448           mounts_.AddDirectoryAt(*valid_outside, inside, is_ro);
1449       !status.ok()) {
1450     SetError(absl::InternalError(absl::StrCat("Could not add directory ",
1451                                               outside, " => ", inside, ": ",
1452                                               status.message())));
1453     return *this;
1454   }
1455   return *this;
1456 }
1457 
AddTmpfs(absl::string_view inside,size_t size)1458 PolicyBuilder& PolicyBuilder::AddTmpfs(absl::string_view inside, size_t size) {
1459   EnableNamespaces();  // NOLINT(clang-diagnostic-deprecated-declarations)
1460 
1461   if (auto status = mounts_.AddTmpfs(inside, size); !status.ok()) {
1462     SetError(absl::InternalError(absl::StrCat("Could not mount tmpfs ", inside,
1463                                               ": ", status.message())));
1464   }
1465   return *this;
1466 }
1467 
1468 // Use Allow(UnrestrictedNetworking()) instead.
AllowUnrestrictedNetworking()1469 PolicyBuilder& PolicyBuilder::AllowUnrestrictedNetworking() {
1470   return Allow(UnrestrictedNetworking());
1471 }
1472 
SetHostname(absl::string_view hostname)1473 PolicyBuilder& PolicyBuilder::SetHostname(absl::string_view hostname) {
1474   EnableNamespaces();  // NOLINT(clang-diagnostic-deprecated-declarations)
1475   hostname_ = std::string(hostname);
1476 
1477   return *this;
1478 }
1479 
CollectStacktracesOnViolation(bool enable)1480 PolicyBuilder& PolicyBuilder::CollectStacktracesOnViolation(bool enable) {
1481   collect_stacktrace_on_violation_ = enable;
1482   return *this;
1483 }
1484 
CollectStacktracesOnSignal(bool enable)1485 PolicyBuilder& PolicyBuilder::CollectStacktracesOnSignal(bool enable) {
1486   collect_stacktrace_on_signal_ = enable;
1487   return *this;
1488 }
1489 
CollectStacktracesOnTimeout(bool enable)1490 PolicyBuilder& PolicyBuilder::CollectStacktracesOnTimeout(bool enable) {
1491   collect_stacktrace_on_timeout_ = enable;
1492   return *this;
1493 }
1494 
CollectStacktracesOnKill(bool enable)1495 PolicyBuilder& PolicyBuilder::CollectStacktracesOnKill(bool enable) {
1496   collect_stacktrace_on_kill_ = enable;
1497   return *this;
1498 }
1499 
CollectStacktracesOnExit(bool enable)1500 PolicyBuilder& PolicyBuilder::CollectStacktracesOnExit(bool enable) {
1501   collect_stacktrace_on_exit_ = enable;
1502   return *this;
1503 }
1504 
AddNetworkProxyPolicy()1505 PolicyBuilder& PolicyBuilder::AddNetworkProxyPolicy() {
1506   if (allowed_hosts_) {
1507     SetError(absl::FailedPreconditionError(
1508         "AddNetworkProxyPolicy or AddNetworkProxyHandlerPolicy can be called "
1509         "at most once"));
1510     return *this;
1511   }
1512 
1513   allowed_hosts_ = AllowedHosts();
1514 
1515   AllowFutexOp(FUTEX_WAKE);
1516   AllowFutexOp(FUTEX_WAIT);
1517   AllowFutexOp(FUTEX_WAIT_BITSET);
1518   AllowDup();
1519   AllowSyscalls({
1520       __NR_recvmsg,
1521       __NR_close,
1522       __NR_gettid,
1523   });
1524   AddPolicyOnSyscall(__NR_socket, {
1525                                       ARG_32(0),
1526                                       JEQ32(AF_INET, ALLOW),
1527                                       JEQ32(AF_INET6, ALLOW),
1528                                   });
1529   AddPolicyOnSyscall(__NR_getsockopt,
1530                      [](bpf_labels& labels) -> std::vector<sock_filter> {
1531                        return {
1532                            ARG_32(1),
1533                            JNE32(SOL_SOCKET, JUMP(&labels, getsockopt_end)),
1534                            ARG_32(2),
1535                            JEQ32(SO_TYPE, ALLOW),
1536                            LABEL(&labels, getsockopt_end),
1537                        };
1538                      });
1539 #ifdef SAPI_PPC64_LE
1540   AddPolicyOnSyscall(__NR_socketcall, {
1541                                           ARG_32(0),
1542                                           JEQ32(SYS_SOCKET, ALLOW),
1543                                           JEQ32(SYS_GETSOCKOPT, ALLOW),
1544                                           JEQ32(SYS_RECVMSG, ALLOW),
1545                                       });
1546 #endif
1547   return *this;
1548 }
1549 
AddNetworkProxyHandlerPolicy()1550 PolicyBuilder& PolicyBuilder::AddNetworkProxyHandlerPolicy() {
1551   AddNetworkProxyPolicy();
1552   AllowSyscall(__NR_rt_sigreturn);
1553 
1554   AddPolicyOnSyscall(__NR_rt_sigaction, {
1555                                             ARG_32(0),
1556                                             JEQ32(SIGSYS, ALLOW),
1557                                         });
1558 
1559   AddPolicyOnSyscall(__NR_rt_sigprocmask, {
1560                                               ARG_32(0),
1561                                               JEQ32(SIG_UNBLOCK, ALLOW),
1562                                           });
1563 
1564   AddPolicyOnSyscall(__NR_connect, {TRAP(0)});
1565 #ifdef SAPI_PPC64_LE
1566   AddPolicyOnSyscall(__NR_socketcall, {
1567                                           ARG_32(0),
1568                                           JEQ32(SYS_CONNECT, TRAP(0)),
1569                                       });
1570 #endif
1571   return *this;
1572 }
1573 
TrapPtrace()1574 PolicyBuilder& PolicyBuilder::TrapPtrace() {
1575   if (handled_syscalls_.insert(__NR_ptrace).second) {
1576     user_policy_.insert(user_policy_.end(), {SYSCALL(__NR_ptrace, TRAP(0))});
1577     user_policy_handles_ptrace_ = true;
1578   }
1579   return *this;
1580 }
1581 
SetRootWritable()1582 PolicyBuilder& PolicyBuilder::SetRootWritable() {
1583   EnableNamespaces();  // NOLINT(clang-diagnostic-deprecated-declarations)
1584   mounts_.SetRootWritable();
1585 
1586   return *this;
1587 }
1588 
StoreDescription(PolicyBuilderDescription * pb_description)1589 void PolicyBuilder::StoreDescription(PolicyBuilderDescription* pb_description) {
1590   for (const auto& handled_syscall : handled_syscalls_) {
1591     pb_description->add_handled_syscalls(handled_syscall);
1592   }
1593 }
1594 
AllowIPv4(const std::string & ip_and_mask,uint32_t port)1595 PolicyBuilder& PolicyBuilder::AllowIPv4(const std::string& ip_and_mask,
1596                                         uint32_t port) {
1597   if (!allowed_hosts_) {
1598     SetError(absl::FailedPreconditionError(
1599         "AddNetworkProxyPolicy or AddNetworkProxyHandlerPolicy must be called "
1600         "before adding IP rules"));
1601     return *this;
1602   }
1603 
1604   absl::Status status = allowed_hosts_->AllowIPv4(ip_and_mask, port);
1605   if (!status.ok()) {
1606     SetError(status);
1607   }
1608   return *this;
1609 }
1610 
AllowIPv6(const std::string & ip_and_mask,uint32_t port)1611 PolicyBuilder& PolicyBuilder::AllowIPv6(const std::string& ip_and_mask,
1612                                         uint32_t port) {
1613   if (!allowed_hosts_) {
1614     SetError(absl::FailedPreconditionError(
1615         "AddNetworkProxyPolicy or AddNetworkProxyHandlerPolicy must be called "
1616         "before adding IP rules"));
1617     return *this;
1618   }
1619 
1620   absl::Status status = allowed_hosts_->AllowIPv6(ip_and_mask, port);
1621   if (!status.ok()) {
1622     SetError(status);
1623   }
1624   return *this;
1625 }
1626 
SetError(const absl::Status & status)1627 PolicyBuilder& PolicyBuilder::SetError(const absl::Status& status) {
1628   LOG(ERROR) << status;
1629   last_status_ = status;
1630   return *this;
1631 }
1632 
1633 }  // namespace sandbox2
1634