1 // Copyright 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "sandboxed_api/sandbox2/policybuilder.h"
16
17 #include <fcntl.h> // For the fcntl flags
18 #include <linux/bpf_common.h>
19 #include <linux/filter.h>
20 #include <linux/futex.h>
21 #include <linux/random.h> // For GRND_NONBLOCK
22 #include <linux/seccomp.h>
23 #include <stddef.h>
24 #include <sys/ioctl.h>
25 #include <sys/mman.h> // For mmap arguments
26 #include <sys/prctl.h>
27 #include <sys/socket.h>
28 #include <sys/stat.h>
29 #include <sys/statvfs.h>
30 #include <syscall.h>
31 #include <unistd.h>
32
33 #include <array>
34 #include <cerrno>
35 #include <csignal>
36 #include <cstdint>
37 #include <cstdlib>
38 #include <deque>
39 #include <functional>
40 #include <iterator>
41 #include <limits>
42 #include <memory>
43 #include <optional>
44 #include <string>
45 #include <utility>
46 #include <vector>
47
48 #include "absl/container/flat_hash_set.h"
49 #include "absl/log/log.h"
50 #include "absl/memory/memory.h"
51 #include "absl/status/status.h"
52 #include "absl/status/statusor.h"
53 #include "absl/strings/match.h"
54 #include "absl/strings/str_cat.h"
55 #include "absl/strings/string_view.h"
56 #include "absl/types/span.h"
57 #include "sandboxed_api/config.h"
58 #include "sandboxed_api/sandbox2/allow_all_syscalls.h"
59 #include "sandboxed_api/sandbox2/allow_unrestricted_networking.h"
60 #include "sandboxed_api/sandbox2/namespace.h"
61 #include "sandboxed_api/sandbox2/policy.h"
62 #include "sandboxed_api/sandbox2/syscall.h"
63 #include "sandboxed_api/sandbox2/trace_all_syscalls.h"
64 #include "sandboxed_api/sandbox2/util/bpf_helper.h"
65 #include "sandboxed_api/sandbox2/violation.pb.h"
66 #include "sandboxed_api/util/path.h"
67
68 #if defined(SAPI_X86_64)
69 #include <asm/prctl.h>
70 #elif defined(SAPI_PPC64_LE)
71 #include <asm/termbits.h> // On PPC, TCGETS macro needs termios
72 #endif
73
74 #ifndef MAP_FIXED_NOREPLACE
75 #define MAP_FIXED_NOREPLACE 0x100000
76 #endif
77 #ifndef PR_SET_VMA
78 #define PR_SET_VMA 0x53564d41
79 #endif
80 #ifndef PR_SET_VMA_ANON_NAME
81 #define PR_SET_VMA_ANON_NAME 0
82 #endif
83
84 namespace sandbox2 {
85 namespace {
86
87 namespace file = ::sapi::file;
88
89 constexpr std::array<uint32_t, 2> kMmapSyscalls = {
90 #ifdef __NR_mmap2
91 __NR_mmap2,
92 #endif
93 #ifdef __NR_mmap
94 __NR_mmap,
95 #endif
96 };
97
CheckBpfBounds(const sock_filter & filter,size_t max_jmp)98 bool CheckBpfBounds(const sock_filter& filter, size_t max_jmp) {
99 if (BPF_CLASS(filter.code) == BPF_JMP) {
100 if (BPF_OP(filter.code) == BPF_JA) {
101 return filter.k <= max_jmp;
102 }
103 return filter.jt <= max_jmp && filter.jf <= max_jmp;
104 }
105 return true;
106 }
107
IsOnReadOnlyDev(const std::string & path)108 bool IsOnReadOnlyDev(const std::string& path) {
109 struct statvfs vfs;
110 if (TEMP_FAILURE_RETRY(statvfs(path.c_str(), &vfs)) == -1) {
111 PLOG(ERROR) << "Could not statvfs: " << path.c_str();
112 return false;
113 }
114 return vfs.f_flag & ST_RDONLY;
115 }
116
117 } // namespace
118
Allow(UnrestrictedNetworking tag)119 PolicyBuilder& PolicyBuilder::Allow(UnrestrictedNetworking tag) {
120 EnableNamespaces(); // NOLINT(clang-diagnostic-deprecated-declarations)
121 allow_unrestricted_networking_ = true;
122 return *this;
123 }
124
AllowSyscall(uint32_t num)125 PolicyBuilder& PolicyBuilder::AllowSyscall(uint32_t num) {
126 if (handled_syscalls_.insert(num).second) {
127 user_policy_.insert(user_policy_.end(), {SYSCALL(num, ALLOW)});
128 }
129 return *this;
130 }
131
AllowSyscalls(absl::Span<const uint32_t> nums)132 PolicyBuilder& PolicyBuilder::AllowSyscalls(absl::Span<const uint32_t> nums) {
133 for (auto num : nums) {
134 AllowSyscall(num);
135 }
136 return *this;
137 }
138
BlockSyscallsWithErrno(absl::Span<const uint32_t> nums,int error)139 PolicyBuilder& PolicyBuilder::BlockSyscallsWithErrno(
140 absl::Span<const uint32_t> nums, int error) {
141 for (auto num : nums) {
142 BlockSyscallWithErrno(num, error);
143 }
144 return *this;
145 }
146
BlockSyscallWithErrno(uint32_t num,int error)147 PolicyBuilder& PolicyBuilder::BlockSyscallWithErrno(uint32_t num, int error) {
148 if (handled_syscalls_.insert(num).second) {
149 user_policy_.insert(user_policy_.end(), {SYSCALL(num, ERRNO(error))});
150 if (num == __NR_bpf) {
151 user_policy_handles_bpf_ = true;
152 }
153 if (num == __NR_ptrace) {
154 user_policy_handles_ptrace_ = true;
155 }
156 }
157 return *this;
158 }
159
OverridableBlockSyscallWithErrno(uint32_t num,int error)160 PolicyBuilder& PolicyBuilder::OverridableBlockSyscallWithErrno(uint32_t num,
161 int error) {
162 overridable_policy_.insert(overridable_policy_.end(),
163 {SYSCALL(num, ERRNO(error))});
164 return *this;
165 }
166
AllowEpollWait()167 PolicyBuilder& PolicyBuilder::AllowEpollWait() {
168 return AllowSyscalls({
169 #ifdef __NR_epoll_wait
170 __NR_epoll_wait,
171 #endif
172 #ifdef __NR_epoll_pwait
173 __NR_epoll_pwait,
174 #endif
175 #ifdef __NR_epoll_pwait2
176 __NR_epoll_pwait2,
177 #endif
178 });
179 }
180
AllowEpoll()181 PolicyBuilder& PolicyBuilder::AllowEpoll() {
182 AllowSyscalls({
183 #ifdef __NR_epoll_create
184 __NR_epoll_create,
185 #endif
186 #ifdef __NR_epoll_create1
187 __NR_epoll_create1,
188 #endif
189 #ifdef __NR_epoll_ctl
190 __NR_epoll_ctl,
191 #endif
192 });
193
194 return AllowEpollWait();
195 }
196
AllowInotifyInit()197 PolicyBuilder& PolicyBuilder::AllowInotifyInit() {
198 return AllowSyscalls({
199 #ifdef __NR_inotify_init
200 __NR_inotify_init,
201 #endif
202 #ifdef __NR_inotify_init1
203 __NR_inotify_init1,
204 #endif
205 });
206 }
207
AllowSelect()208 PolicyBuilder& PolicyBuilder::AllowSelect() {
209 return AllowSyscalls({
210 #ifdef __NR_select
211 __NR_select,
212 #endif
213 #ifdef __NR_pselect6
214 __NR_pselect6,
215 #endif
216 });
217 }
218
AllowExit()219 PolicyBuilder& PolicyBuilder::AllowExit() {
220 return AllowSyscalls({__NR_exit, __NR_exit_group});
221 }
222
AllowScudoMalloc()223 PolicyBuilder& PolicyBuilder::AllowScudoMalloc() {
224 AllowTime();
225 AllowSyscalls({__NR_munmap, __NR_nanosleep});
226 AllowFutexOp(FUTEX_WAKE);
227 AllowLimitedMadvise();
228 AllowGetRandom();
229 AllowGetPIDs();
230 AllowWipeOnFork();
231 #ifdef __NR_open
232 OverridableBlockSyscallWithErrno(__NR_open, ENOENT);
233 #endif
234 #ifdef __NR_openat
235 OverridableBlockSyscallWithErrno(__NR_openat, ENOENT);
236 #endif
237
238 return AddPolicyOnMmap([](bpf_labels& labels) -> std::vector<sock_filter> {
239 return {
240 ARG_32(2), // prot
241 JEQ32(PROT_NONE, JUMP(&labels, prot_none)),
242 JNE32(PROT_READ | PROT_WRITE, JUMP(&labels, mmap_end)),
243
244 // PROT_READ | PROT_WRITE
245 ARG_32(3), // flags
246 BPF_STMT(BPF_ALU | BPF_AND | BPF_K,
247 ~uint32_t{MAP_FIXED | MAP_NORESERVE}),
248 JEQ32(MAP_PRIVATE | MAP_ANONYMOUS, ALLOW),
249 JUMP(&labels, mmap_end),
250
251 // PROT_NONE
252 LABEL(&labels, prot_none),
253 ARG_32(3), // flags
254 JEQ32(MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, ALLOW),
255
256 LABEL(&labels, mmap_end),
257 };
258 });
259 }
260
AllowTcMalloc()261 PolicyBuilder& PolicyBuilder::AllowTcMalloc() {
262 AllowTime();
263 AllowRestartableSequences(kRequireFastFences);
264 AllowSyscalls(
265 {__NR_munmap, __NR_nanosleep, __NR_brk, __NR_mincore, __NR_membarrier});
266 AllowLimitedMadvise();
267 AllowPrctlSetVma();
268 AllowPoll();
269 AllowGetPIDs();
270
271 AddPolicyOnSyscall(__NR_mprotect, {
272 ARG_32(2),
273 JEQ32(PROT_READ | PROT_WRITE, ALLOW),
274 JEQ32(PROT_NONE, ALLOW),
275 });
276
277 return AddPolicyOnMmap([](bpf_labels& labels) -> std::vector<sock_filter> {
278 return {
279 ARG_32(2), // prot
280 JEQ32(PROT_NONE, JUMP(&labels, prot_none)),
281 JNE32(PROT_READ | PROT_WRITE, JUMP(&labels, mmap_end)),
282
283 // PROT_READ | PROT_WRITE
284 ARG_32(3), // flags
285 JNE32(MAP_ANONYMOUS | MAP_PRIVATE, JUMP(&labels, mmap_end)),
286 ALLOW,
287
288 // PROT_NONE
289 LABEL(&labels, prot_none),
290 ARG_32(3), // flags
291 JEQ32(MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, ALLOW),
292 JEQ32(MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED_NOREPLACE, ALLOW),
293 JEQ32(MAP_ANONYMOUS | MAP_PRIVATE, ALLOW),
294
295 LABEL(&labels, mmap_end),
296 };
297 });
298 }
299
AllowSystemMalloc()300 PolicyBuilder& PolicyBuilder::AllowSystemMalloc() {
301 AllowSyscalls({__NR_munmap, __NR_brk});
302 AllowFutexOp(FUTEX_WAKE);
303 AddPolicyOnSyscall(__NR_mremap, {
304 ARG_32(3),
305 JEQ32(MREMAP_MAYMOVE, ALLOW),
306 });
307 return AddPolicyOnMmap([](bpf_labels& labels) -> std::vector<sock_filter> {
308 return {
309 ARG_32(2), // prot
310 JEQ32(PROT_NONE, JUMP(&labels, prot_none)),
311 JNE32(PROT_READ | PROT_WRITE, JUMP(&labels, mmap_end)),
312
313 // PROT_READ | PROT_WRITE
314 ARG_32(3), // flags
315 JEQ32(MAP_ANONYMOUS | MAP_PRIVATE, ALLOW),
316
317 // PROT_NONE
318 LABEL(&labels, prot_none),
319 ARG_32(3), // flags
320 JEQ32(MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, ALLOW),
321
322 LABEL(&labels, mmap_end),
323 };
324 });
325
326 return *this;
327 }
328
AllowLlvmSanitizers()329 PolicyBuilder& PolicyBuilder::AllowLlvmSanitizers() {
330 if constexpr (!sapi::sanitizers::IsAny()) {
331 return *this;
332 }
333 // *san use a custom allocator that runs mmap/unmap under the hood. For
334 // example:
335 // https://github.com/llvm/llvm-project/blob/596d534ac3524052df210be8d3c01a33b2260a42/compiler-rt/lib/asan/asan_allocator.cpp#L980
336 // https://github.com/llvm/llvm-project/blob/62ec4ac90738a5f2d209ed28c822223e58aaaeb7/compiler-rt/lib/sanitizer_common/sanitizer_allocator_secondary.h#L98
337 AllowMmapWithoutExec();
338 AllowSyscall(__NR_munmap);
339 AllowSyscall(__NR_sched_yield);
340
341 // https://github.com/llvm/llvm-project/blob/4bbc3290a25c0dc26007912a96e0f77b2092ee56/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.cpp#L293
342 AddPolicyOnSyscall(__NR_mprotect,
343 {
344 ARG_32(2),
345 BPF_STMT(BPF_AND | BPF_ALU | BPF_K,
346 ~uint32_t{PROT_READ | PROT_WRITE}),
347 JEQ32(PROT_NONE, ALLOW),
348 });
349
350 AddPolicyOnSyscall(__NR_madvise, {
351 ARG_32(2),
352 JEQ32(MADV_DONTDUMP, ALLOW),
353 JEQ32(MADV_NOHUGEPAGE, ALLOW),
354 });
355 // Sanitizers read from /proc. For example:
356 // https://github.com/llvm/llvm-project/blob/634da7a1c61ee8c173e90a841eb1f4ea03caa20b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp#L1155
357 AddDirectoryIfNamespaced("/proc");
358 AllowOpen();
359 // Sanitizers need pid for reports. For example:
360 // https://github.com/llvm/llvm-project/blob/634da7a1c61ee8c173e90a841eb1f4ea03caa20b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp#L740
361 AllowGetPIDs();
362 // Sanitizers may try color output. For example:
363 // https://github.com/llvm/llvm-project/blob/87dd3d350c4ce0115b2cdf91d85ddd05ae2661aa/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp#L157
364 OverridableBlockSyscallWithErrno(__NR_ioctl, EPERM);
365 // https://github.com/llvm/llvm-project/blob/9aa39481d9eb718e872993791547053a3c1f16d5/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp#L150
366 // https://sourceware.org/git/?p=glibc.git;a=blob;f=nptl/pthread_getattr_np.c;h=de7edfa0928224eb8375e2fe894d6677570fbb3b;hb=HEAD#l188
367 AllowSyscall(__NR_sched_getaffinity);
368 // https://github.com/llvm/llvm-project/blob/02c2b472b510ff55679844c087b66e7837e13dc2/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp#L434
369 #ifdef __NR_readlink
370 OverridableBlockSyscallWithErrno(__NR_readlink, ENOENT);
371 #endif
372 OverridableBlockSyscallWithErrno(__NR_readlinkat, ENOENT);
373 if constexpr (sapi::sanitizers::IsASan()) {
374 AllowSyscall(__NR_sigaltstack);
375 }
376 if constexpr (sapi::sanitizers::IsTSan()) {
377 AllowSyscall(__NR_set_robust_list);
378 }
379 return *this;
380 }
381
AllowLlvmCoverage()382 PolicyBuilder& PolicyBuilder::AllowLlvmCoverage() {
383 if (!sapi::IsCoverageRun()) {
384 return *this;
385 }
386 AllowStat();
387 AllowGetPIDs();
388 AllowOpen();
389 AllowRead();
390 AllowWrite();
391 AllowMkdir();
392 AllowSafeFcntl();
393 AllowSyscalls({
394 __NR_munmap, __NR_close, __NR_lseek,
395 #ifdef __NR__llseek
396 __NR__llseek, // Newer glibc on PPC
397 #endif
398 });
399 AllowTcMalloc();
400 AddPolicyOnMmap([](bpf_labels& labels) -> std::vector<sock_filter> {
401 return {
402 ARG_32(2), // prot
403 JNE32(PROT_READ | PROT_WRITE, JUMP(&labels, mmap_end)),
404 ARG_32(3), // flags
405 JEQ32(MAP_SHARED, ALLOW),
406 LABEL(&labels, mmap_end),
407 };
408 });
409 AddDirectoryIfNamespaced(getenv("COVERAGE_DIR"), /*is_ro=*/false);
410 return *this;
411 }
412
AllowLimitedMadvise()413 PolicyBuilder& PolicyBuilder::AllowLimitedMadvise() {
414 return AddPolicyOnSyscall(__NR_madvise, {
415 ARG_32(2),
416 JEQ32(MADV_DONTNEED, ALLOW),
417 JEQ32(MADV_REMOVE, ALLOW),
418 JEQ32(MADV_HUGEPAGE, ALLOW),
419 JEQ32(MADV_NOHUGEPAGE, ALLOW),
420 });
421 }
422
AllowMmapWithoutExec()423 PolicyBuilder& PolicyBuilder::AllowMmapWithoutExec() {
424 return AddPolicyOnMmap({
425 ARG_32(2),
426 BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, PROT_EXEC, 1, 0),
427 ALLOW,
428 });
429 }
430
AllowMmap()431 PolicyBuilder& PolicyBuilder::AllowMmap() {
432 return AllowSyscalls(kMmapSyscalls);
433 }
434
AllowOpen()435 PolicyBuilder& PolicyBuilder::AllowOpen() {
436 #ifdef __NR_creat
437 AllowSyscall(__NR_creat);
438 #endif
439 #ifdef __NR_open
440 AllowSyscall(__NR_open);
441 #endif
442 #ifdef __NR_openat
443 AllowSyscall(__NR_openat);
444 #endif
445 return *this;
446 }
447
AllowStat()448 PolicyBuilder& PolicyBuilder::AllowStat() {
449 #ifdef __NR_fstat
450 AllowSyscall(__NR_fstat);
451 #endif
452 #ifdef __NR_fstat64
453 AllowSyscall(__NR_fstat64);
454 #endif
455 #ifdef __NR_fstatat
456 AllowSyscall(__NR_fstatat);
457 #endif
458 #ifdef __NR_fstatat64
459 AllowSyscall(__NR_fstatat64);
460 #endif
461 #ifdef __NR_fstatfs
462 AllowSyscall(__NR_fstatfs);
463 #endif
464 #ifdef __NR_fstatfs64
465 AllowSyscall(__NR_fstatfs64);
466 #endif
467 #ifdef __NR_lstat
468 AllowSyscall(__NR_lstat);
469 #endif
470 #ifdef __NR_lstat64
471 AllowSyscall(__NR_lstat64);
472 #endif
473 #ifdef __NR_newfstatat
474 AllowSyscall(__NR_newfstatat);
475 #endif
476 #ifdef __NR_oldfstat
477 AllowSyscall(__NR_oldfstat);
478 #endif
479 #ifdef __NR_oldlstat
480 AllowSyscall(__NR_oldlstat);
481 #endif
482 #ifdef __NR_oldstat
483 AllowSyscall(__NR_oldstat);
484 #endif
485 #ifdef __NR_stat
486 AllowSyscall(__NR_stat);
487 #endif
488 #ifdef __NR_stat64
489 AllowSyscall(__NR_stat64);
490 #endif
491 #ifdef __NR_statfs
492 AllowSyscall(__NR_statfs);
493 #endif
494 #ifdef __NR_statfs64
495 AllowSyscall(__NR_statfs64);
496 #endif
497 return *this;
498 }
499
AllowAccess()500 PolicyBuilder& PolicyBuilder::AllowAccess() {
501 #ifdef __NR_access
502 AllowSyscall(__NR_access);
503 #endif
504 #ifdef __NR_faccessat
505 AllowSyscall(__NR_faccessat);
506 #endif
507 #ifdef __NR_faccessat2
508 AllowSyscall(__NR_faccessat2);
509 #endif
510 return *this;
511 }
512
AllowDup()513 PolicyBuilder& PolicyBuilder::AllowDup() {
514 AllowSyscall(__NR_dup);
515 #ifdef __NR_dup2
516 AllowSyscall(__NR_dup2);
517 #endif
518 AllowSyscall(__NR_dup3);
519 return *this;
520 }
521
AllowPipe()522 PolicyBuilder& PolicyBuilder::AllowPipe() {
523 #ifdef __NR_pipe
524 AllowSyscall(__NR_pipe);
525 #endif
526 AllowSyscall(__NR_pipe2);
527 return *this;
528 }
529
AllowChmod()530 PolicyBuilder& PolicyBuilder::AllowChmod() {
531 #ifdef __NR_chmod
532 AllowSyscall(__NR_chmod);
533 #endif
534 AllowSyscall(__NR_fchmod);
535 AllowSyscall(__NR_fchmodat);
536 return *this;
537 }
538
AllowChown()539 PolicyBuilder& PolicyBuilder::AllowChown() {
540 #ifdef __NR_chown
541 AllowSyscall(__NR_chown);
542 #endif
543 #ifdef __NR_lchown
544 AllowSyscall(__NR_lchown);
545 #endif
546 AllowSyscall(__NR_fchown);
547 AllowSyscall(__NR_fchownat);
548 return *this;
549 }
550
AllowRead()551 PolicyBuilder& PolicyBuilder::AllowRead() {
552 return AllowSyscalls({
553 __NR_read,
554 __NR_readv,
555 __NR_preadv,
556 __NR_pread64,
557 });
558 }
559
AllowWrite()560 PolicyBuilder& PolicyBuilder::AllowWrite() {
561 return AllowSyscalls({
562 __NR_write,
563 __NR_writev,
564 __NR_pwritev,
565 __NR_pwrite64,
566 });
567 }
568
AllowReaddir()569 PolicyBuilder& PolicyBuilder::AllowReaddir() {
570 return AllowSyscalls({
571 #ifdef __NR_getdents
572 __NR_getdents,
573 #endif
574 #ifdef __NR_getdents64
575 __NR_getdents64,
576 #endif
577 });
578 }
579
AllowReadlink()580 PolicyBuilder& PolicyBuilder::AllowReadlink() {
581 return AllowSyscalls({
582 #ifdef __NR_readlink
583 __NR_readlink,
584 #endif
585 #ifdef __NR_readlinkat
586 __NR_readlinkat,
587 #endif
588 });
589 }
590
AllowLink()591 PolicyBuilder& PolicyBuilder::AllowLink() {
592 return AllowSyscalls({
593 #ifdef __NR_link
594 __NR_link,
595 #endif
596 #ifdef __NR_linkat
597 __NR_linkat,
598 #endif
599 });
600 }
601
AllowSymlink()602 PolicyBuilder& PolicyBuilder::AllowSymlink() {
603 return AllowSyscalls({
604 #ifdef __NR_symlink
605 __NR_symlink,
606 #endif
607 #ifdef __NR_symlinkat
608 __NR_symlinkat,
609 #endif
610 });
611 }
612
AllowMkdir()613 PolicyBuilder& PolicyBuilder::AllowMkdir() {
614 return AllowSyscalls({
615 #ifdef __NR_mkdir
616 __NR_mkdir,
617 #endif
618 #ifdef __NR_mkdirat
619 __NR_mkdirat,
620 #endif
621 });
622 }
623
AllowUtime()624 PolicyBuilder& PolicyBuilder::AllowUtime() {
625 return AllowSyscalls({
626 #ifdef __NR_futimens
627 __NR_futimens,
628 #endif
629 #ifdef __NR_utime
630 __NR_utime,
631 #endif
632 #ifdef __NR_utimes
633 __NR_utimes,
634 #endif
635 #ifdef __NR_utimensat
636 __NR_utimensat,
637 #endif
638 });
639 }
640
AllowSafeFcntl()641 PolicyBuilder& PolicyBuilder::AllowSafeFcntl() {
642 return AddPolicyOnSyscalls({__NR_fcntl,
643 #ifdef __NR_fcntl64
644 __NR_fcntl64
645 #endif
646 },
647 {
648 ARG_32(1),
649 JEQ32(F_GETFD, ALLOW),
650 JEQ32(F_SETFD, ALLOW),
651 JEQ32(F_GETFL, ALLOW),
652 JEQ32(F_SETFL, ALLOW),
653 JEQ32(F_GETLK, ALLOW),
654 JEQ32(F_SETLK, ALLOW),
655 JEQ32(F_SETLKW, ALLOW),
656 JEQ32(F_DUPFD, ALLOW),
657 JEQ32(F_DUPFD_CLOEXEC, ALLOW),
658 });
659 }
660
AllowFork()661 PolicyBuilder& PolicyBuilder::AllowFork() {
662 return AllowSyscalls({
663 #ifdef __NR_fork
664 __NR_fork,
665 #endif
666 #ifdef __NR_vfork
667 __NR_vfork,
668 #endif
669 __NR_clone});
670 }
671
AllowWait()672 PolicyBuilder& PolicyBuilder::AllowWait() {
673 return AllowSyscalls({
674 #ifdef __NR_waitpid
675 __NR_waitpid,
676 #endif
677 __NR_wait4});
678 }
679
AllowAlarm()680 PolicyBuilder& PolicyBuilder::AllowAlarm() {
681 return AllowSyscalls({
682 #ifdef __NR_alarm
683 __NR_alarm,
684 #endif
685 __NR_setitimer});
686 }
687
AllowHandleSignals()688 PolicyBuilder& PolicyBuilder::AllowHandleSignals() {
689 return AllowSyscalls({
690 __NR_rt_sigaction,
691 __NR_rt_sigreturn,
692 __NR_rt_sigprocmask,
693 #ifdef __NR_signal
694 __NR_signal,
695 #endif
696 #ifdef __NR_sigaction
697 __NR_sigaction,
698 #endif
699 #ifdef __NR_sigreturn
700 __NR_sigreturn,
701 #endif
702 #ifdef __NR_sigprocmask
703 __NR_sigprocmask,
704 #endif
705 #ifdef __NR_sigaltstack
706 __NR_sigaltstack,
707 #endif
708 });
709 }
710
AllowTCGETS()711 PolicyBuilder& PolicyBuilder::AllowTCGETS() {
712 return AddPolicyOnSyscall(__NR_ioctl, {
713 ARG_32(1),
714 JEQ32(TCGETS, ALLOW),
715 });
716 }
717
AllowTime()718 PolicyBuilder& PolicyBuilder::AllowTime() {
719 return AllowSyscalls({
720 #ifdef __NR_time
721 __NR_time,
722 #endif
723 __NR_gettimeofday, __NR_clock_gettime});
724 }
725
AllowSleep()726 PolicyBuilder& PolicyBuilder::AllowSleep() {
727 return AllowSyscalls({
728 __NR_clock_nanosleep,
729 __NR_nanosleep,
730 });
731 }
732
AllowGetIDs()733 PolicyBuilder& PolicyBuilder::AllowGetIDs() {
734 return AllowSyscalls({
735 __NR_getuid,
736 __NR_geteuid,
737 __NR_getresuid,
738 __NR_getgid,
739 __NR_getegid,
740 __NR_getresgid,
741 #ifdef __NR_getuid32
742 __NR_getuid32,
743 __NR_geteuid32,
744 __NR_getresuid32,
745 __NR_getgid32,
746 __NR_getegid32,
747 __NR_getresgid32,
748 #endif
749 __NR_getgroups,
750 });
751 }
752
AllowRestartableSequences(CpuFenceMode cpu_fence_mode)753 PolicyBuilder& PolicyBuilder::AllowRestartableSequences(
754 CpuFenceMode cpu_fence_mode) {
755 #ifdef __NR_rseq
756 AllowSyscall(__NR_rseq);
757 #endif
758 AddPolicyOnMmap([](bpf_labels& labels) -> std::vector<sock_filter> {
759 return {
760 ARG_32(2), // prot
761 JNE32(PROT_READ | PROT_WRITE, JUMP(&labels, mmap_end)),
762
763 ARG_32(3), // flags
764 JNE32(MAP_PRIVATE | MAP_ANONYMOUS, JUMP(&labels, mmap_end)),
765
766 ALLOW,
767 LABEL(&labels, mmap_end),
768 };
769 });
770 AllowSyscall(__NR_getcpu);
771 AllowSyscall(__NR_membarrier);
772 AllowFutexOp(FUTEX_WAIT);
773 AllowFutexOp(FUTEX_WAKE);
774 AllowRead();
775 AllowOpen();
776 AllowPoll();
777 AllowSyscall(__NR_close);
778 AddPolicyOnSyscall(__NR_rt_sigprocmask, {
779 ARG_32(0),
780 JEQ32(SIG_SETMASK, ALLOW),
781 });
782 AllowPrctlSetVma();
783 if (cpu_fence_mode == kAllowSlowFences) {
784 AllowSyscall(__NR_sched_getaffinity);
785 AllowSyscall(__NR_sched_setaffinity);
786 }
787 AddFileIfNamespaced("/proc/cpuinfo");
788 AddFileIfNamespaced("/proc/stat");
789 AddDirectoryIfNamespaced("/sys/devices/system/cpu");
790 if (cpu_fence_mode == kAllowSlowFences) {
791 AddFileIfNamespaced("/proc/self/cpuset");
792 }
793 return *this;
794 }
795
AllowGetPIDs()796 PolicyBuilder& PolicyBuilder::AllowGetPIDs() {
797 return AllowSyscalls({
798 __NR_getpid,
799 __NR_getppid,
800 __NR_gettid,
801 });
802 }
803
AllowGetPGIDs()804 PolicyBuilder& PolicyBuilder::AllowGetPGIDs() {
805 return AllowSyscalls({
806 __NR_getpgid,
807 #ifdef __NR_getpgrp
808 __NR_getpgrp,
809 #endif
810 });
811 }
812
AllowGetRlimit()813 PolicyBuilder& PolicyBuilder::AllowGetRlimit() {
814 #ifdef __NR_prlimit64
815 AddPolicyOnSyscall(__NR_prlimit64, {ARG(2), JEQ64(0, 0, ALLOW)});
816 #endif
817 return AllowSyscalls({
818 #ifdef __NR_getrlimit
819 __NR_getrlimit,
820 #endif
821 #ifdef __NR_ugetrlimit
822 __NR_ugetrlimit,
823 #endif
824 });
825 }
826
AllowSetRlimit()827 PolicyBuilder& PolicyBuilder::AllowSetRlimit() {
828 return AllowSyscalls({
829 #ifdef __NR_prlimit64
830 __NR_prlimit64,
831 #endif
832 #ifdef __NR_setrlimit
833 __NR_setrlimit,
834 #endif
835 #ifdef __NR_usetrlimit
836 __NR_usetrlimit,
837 #endif
838 });
839 }
840
AllowGetRandom()841 PolicyBuilder& PolicyBuilder::AllowGetRandom() {
842 return AddPolicyOnSyscall(__NR_getrandom, {
843 ARG_32(2),
844 JEQ32(0, ALLOW),
845 JEQ32(GRND_NONBLOCK, ALLOW),
846 });
847 }
848
AllowWipeOnFork()849 PolicyBuilder& PolicyBuilder::AllowWipeOnFork() {
850 // System headers may not be recent enough to include MADV_WIPEONFORK.
851 static constexpr uint32_t kMadv_WipeOnFork = 18;
852 // The -1 value is used by code to probe that the kernel returns -EINVAL for
853 // unknown values because some environments, like qemu, ignore madvise
854 // completely, but code needs to know whether WIPEONFORK took effect.
855 return AddPolicyOnSyscall(__NR_madvise,
856 {
857 ARG_32(2),
858 JEQ32(kMadv_WipeOnFork, ALLOW),
859 JEQ32(static_cast<uint32_t>(-1), ALLOW),
860 });
861 }
862
AllowLogForwarding()863 PolicyBuilder& PolicyBuilder::AllowLogForwarding() {
864 AllowWrite();
865 AllowSystemMalloc();
866 AllowTcMalloc();
867
868 // From comms
869 AllowGetPIDs();
870 AllowSyscalls({// from logging code
871 __NR_clock_gettime,
872 // From comms
873 __NR_gettid, __NR_close});
874
875 // For generating stacktraces in logging (e.g. `LOG(FATAL)`)
876 AddPolicyOnSyscall(__NR_rt_sigprocmask, {
877 ARG_32(0),
878 JEQ32(SIG_BLOCK, ALLOW),
879 });
880 AllowGetRlimit();
881
882 // For LOG(FATAL)
883 return AddPolicyOnSyscall(__NR_kill,
884 [](bpf_labels& labels) -> std::vector<sock_filter> {
885 return {
886 ARG_32(0),
887 JNE32(0, JUMP(&labels, pid_not_null)),
888 ARG_32(1),
889 JEQ32(SIGABRT, ALLOW),
890 LABEL(&labels, pid_not_null),
891 };
892 });
893 }
894
AllowUnlink()895 PolicyBuilder& PolicyBuilder::AllowUnlink() {
896 AllowSyscalls({
897 #ifdef __NR_rmdir
898 __NR_rmdir,
899 #endif
900 #ifdef __NR_unlink
901 __NR_unlink,
902 #endif
903 __NR_unlinkat,
904 });
905 return *this;
906 }
907
AllowPoll()908 PolicyBuilder& PolicyBuilder::AllowPoll() {
909 AllowSyscalls({
910 #ifdef __NR_poll
911 __NR_poll,
912 #endif
913 __NR_ppoll,
914 });
915 return *this;
916 }
917
AllowRename()918 PolicyBuilder& PolicyBuilder::AllowRename() {
919 AllowSyscalls({
920 #ifdef __NR_rename
921 __NR_rename,
922 #endif
923 __NR_renameat,
924 #ifdef __NR_renameat2
925 __NR_renameat2,
926 #endif
927 });
928 return *this;
929 }
930
AllowEventFd()931 PolicyBuilder& PolicyBuilder::AllowEventFd() {
932 AllowSyscalls({
933 #ifdef __NR_eventfd
934 __NR_eventfd,
935 #endif
936 __NR_eventfd2,
937 });
938 return *this;
939 }
940
AllowPrctlSetName()941 PolicyBuilder& PolicyBuilder::AllowPrctlSetName() {
942 AddPolicyOnSyscall(__NR_prctl, {ARG_32(0), JEQ32(PR_SET_NAME, ALLOW)});
943 return *this;
944 }
945
AllowPrctlSetVma()946 PolicyBuilder& PolicyBuilder::AllowPrctlSetVma() {
947 AddPolicyOnSyscall(__NR_prctl,
948 [](bpf_labels& labels) -> std::vector<sock_filter> {
949 return {
950 ARG_32(0),
951 JNE32(PR_SET_VMA, JUMP(&labels, prctlsetvma_end)),
952 ARG_32(1),
953 JEQ32(PR_SET_VMA_ANON_NAME, ALLOW),
954 LABEL(&labels, prctlsetvma_end),
955 };
956 });
957 return *this;
958 }
959
AllowFutexOp(int op)960 PolicyBuilder& PolicyBuilder::AllowFutexOp(int op) {
961 return AddPolicyOnSyscall(
962 __NR_futex, {
963 ARG_32(1),
964 // a <- a & FUTEX_CMD_MASK
965 BPF_STMT(BPF_ALU + BPF_AND + BPF_K,
966 static_cast<uint32_t>(FUTEX_CMD_MASK)),
967 JEQ32(static_cast<uint32_t>(op) & FUTEX_CMD_MASK, ALLOW),
968 });
969 }
970
AllowStaticStartup()971 PolicyBuilder& PolicyBuilder::AllowStaticStartup() {
972 AllowGetRlimit();
973 AllowSyscalls({
974 // These syscalls take a pointer, so no restriction.
975 __NR_uname, __NR_brk, __NR_set_tid_address,
976
977 #if defined(__ARM_NR_set_tls)
978 // libc sets the TLS during startup
979 __ARM_NR_set_tls,
980 #endif
981
982 // This syscall takes a pointer and a length.
983 // We could restrict length, but it might change, so not worth it.
984 __NR_set_robust_list,
985 });
986
987 AllowFutexOp(FUTEX_WAIT_BITSET);
988
989 AddPolicyOnSyscall(__NR_rt_sigaction,
990 {
991 ARG_32(0),
992 // This is real-time signals used internally by libc.
993 JEQ32(__SIGRTMIN + 0, ALLOW),
994 JEQ32(__SIGRTMIN + 1, ALLOW),
995 });
996
997 AllowSyscall(__NR_rt_sigprocmask);
998
999 #ifdef SAPI_X86_64
1000 // The second argument is a pointer.
1001 AddPolicyOnSyscall(__NR_arch_prctl, {
1002 ARG_32(0),
1003 JEQ32(ARCH_SET_FS, ALLOW),
1004 });
1005 #endif
1006
1007 if constexpr (sapi::host_cpu::IsArm64()) {
1008 OverridableBlockSyscallWithErrno(__NR_readlinkat, ENOENT);
1009 }
1010 #ifdef __NR_readlink
1011 OverridableBlockSyscallWithErrno(__NR_readlink, ENOENT);
1012 #endif
1013
1014 AllowGetRlimit();
1015 AddPolicyOnSyscall(__NR_mprotect, {
1016 ARG_32(2),
1017 JEQ32(PROT_READ, ALLOW),
1018 });
1019
1020 OverridableBlockSyscallWithErrno(__NR_sigaltstack, ENOSYS);
1021
1022 return *this;
1023 }
1024
AllowDynamicStartup()1025 PolicyBuilder& PolicyBuilder::AllowDynamicStartup() {
1026 #ifdef __ANDROID__
1027 AllowSafeFcntl();
1028 AllowGetIDs();
1029 AllowGetPIDs();
1030 AllowGetRandom();
1031 AllowSyscalls({
1032 #ifdef __NR_fstatfs
1033 __NR_fstatfs,
1034 #endif
1035 #ifdef __NR_fstatfs64
1036 __NR_fstatfs64,
1037 #endif
1038 __NR_readlinkat,
1039 __NR_sched_getaffinity,
1040 __NR_sched_getscheduler,
1041 });
1042 AllowHandleSignals();
1043 AllowFutexOp(FUTEX_WAKE_PRIVATE);
1044 AddPolicyOnSyscall(__NR_prctl,
1045 [](bpf_labels& labels) -> std::vector<sock_filter> {
1046 return {
1047 ARG_32(0), // option
1048 JEQ32(PR_GET_DUMPABLE, ALLOW),
1049 JNE32(PR_SET_VMA, JUMP(&labels, prctl_end)),
1050
1051 ARG_32(1), // arg2
1052 JEQ32(PR_SET_VMA_ANON_NAME, ALLOW),
1053
1054 LABEL(&labels, prctl_end),
1055 };
1056 });
1057 AddPolicyOnSyscall(__NR_mremap,
1058 {
1059 ARG_32(3),
1060 JEQ32(MREMAP_MAYMOVE | MREMAP_FIXED, ALLOW),
1061 });
1062 AddPolicyOnMmap([](bpf_labels& labels) -> std::vector<sock_filter> {
1063 return {
1064 ARG_32(2), // prot
1065 JEQ32(PROT_NONE, JUMP(&labels, prot_none)),
1066 JEQ32(PROT_READ, JUMP(&labels, prot_read)),
1067 JEQ32(PROT_READ | PROT_WRITE, JUMP(&labels, prot_RW_or_RX)),
1068 JEQ32(PROT_READ | PROT_EXEC, JUMP(&labels, prot_RW_or_RX)),
1069
1070 // PROT_NONE
1071 LABEL(&labels, prot_none),
1072 ARG_32(3), // flags
1073 JEQ32(MAP_PRIVATE | MAP_ANONYMOUS, ALLOW),
1074 JEQ32(MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, ALLOW),
1075 JUMP(&labels, mmap_end),
1076
1077 // PROT_READ
1078 LABEL(&labels, prot_read),
1079 ARG_32(3), // flags
1080 JEQ32(MAP_SHARED, ALLOW),
1081 JEQ32(MAP_PRIVATE, ALLOW),
1082 JEQ32(MAP_PRIVATE | MAP_FIXED, ALLOW),
1083 JUMP(&labels, mmap_end),
1084
1085 // PROT_READ | PROT_WRITE
1086 // PROT_READ | PROT_EXEC
1087 LABEL(&labels, prot_RW_or_RX),
1088 ARG_32(3), // flags
1089 JEQ32(MAP_PRIVATE | MAP_FIXED, ALLOW),
1090
1091 LABEL(&labels, mmap_end),
1092 };
1093 });
1094 #endif
1095
1096 AllowAccess();
1097 AllowOpen();
1098 AllowRead();
1099 AllowStat();
1100 AllowSyscalls({__NR_lseek,
1101 #ifdef __NR__llseek
1102 __NR__llseek, // Newer glibc on PPC
1103 #endif
1104 __NR_close, __NR_munmap});
1105 AddPolicyOnSyscall(__NR_mprotect, {
1106 ARG_32(2),
1107 JEQ32(PROT_READ, ALLOW),
1108 JEQ32(PROT_NONE, ALLOW),
1109 JEQ32(PROT_READ | PROT_WRITE, ALLOW),
1110 JEQ32(PROT_READ | PROT_EXEC, ALLOW),
1111 });
1112 AllowStaticStartup();
1113
1114 return AddPolicyOnMmap([](bpf_labels& labels) -> std::vector<sock_filter> {
1115 return {
1116 ARG_32(2), // prot
1117 JEQ32(PROT_READ | PROT_EXEC, JUMP(&labels, prot_exec)),
1118 JEQ32(PROT_READ | PROT_WRITE, JUMP(&labels, prot_read_write)),
1119 JNE32(PROT_READ, JUMP(&labels, mmap_end)),
1120
1121 // PROT_READ
1122 ARG_32(3), // flags
1123 JEQ32(MAP_PRIVATE, ALLOW),
1124 JUMP(&labels, mmap_end),
1125
1126 // PROT_READ | PROT_WRITE
1127 LABEL(&labels, prot_read_write),
1128 ARG_32(3), // flags
1129 JEQ32(MAP_FILE | MAP_PRIVATE | MAP_FIXED | MAP_DENYWRITE, ALLOW),
1130 JEQ32(MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, ALLOW),
1131 JEQ32(MAP_ANONYMOUS | MAP_PRIVATE, ALLOW),
1132 JUMP(&labels, mmap_end),
1133
1134 // PROT_READ | PROT_EXEC
1135 LABEL(&labels, prot_exec),
1136 ARG_32(3), // flags
1137 JEQ32(MAP_FILE | MAP_PRIVATE | MAP_DENYWRITE, ALLOW),
1138
1139 LABEL(&labels, mmap_end),
1140 };
1141 });
1142 }
1143
AddPolicyOnSyscall(uint32_t num,absl::Span<const sock_filter> policy)1144 PolicyBuilder& PolicyBuilder::AddPolicyOnSyscall(
1145 uint32_t num, absl::Span<const sock_filter> policy) {
1146 return AddPolicyOnSyscalls({num}, policy);
1147 }
1148
AddPolicyOnSyscall(uint32_t num,BpfFunc f)1149 PolicyBuilder& PolicyBuilder::AddPolicyOnSyscall(uint32_t num, BpfFunc f) {
1150 return AddPolicyOnSyscalls({num}, f);
1151 }
1152
AddPolicyOnSyscalls(absl::Span<const uint32_t> nums,absl::Span<const sock_filter> policy)1153 PolicyBuilder& PolicyBuilder::AddPolicyOnSyscalls(
1154 absl::Span<const uint32_t> nums, absl::Span<const sock_filter> policy) {
1155 if (nums.empty()) {
1156 SetError(absl::InvalidArgumentError(
1157 "Cannot add a policy for empty list of syscalls"));
1158 return *this;
1159 }
1160 std::deque<sock_filter> out;
1161 // Insert and verify the policy.
1162 out.insert(out.end(), policy.begin(), policy.end());
1163 for (size_t i = 0; i < out.size(); ++i) {
1164 sock_filter& filter = out[i];
1165 const size_t max_jump = out.size() - i - 1;
1166 if (!CheckBpfBounds(filter, max_jump)) {
1167 SetError(absl::InvalidArgumentError("bpf jump out of bounds"));
1168 return *this;
1169 }
1170 // Syscall arch is expected as TRACE value
1171 if (filter.code == (BPF_RET | BPF_K) &&
1172 (filter.k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRACE &&
1173 (filter.k & SECCOMP_RET_DATA) != Syscall::GetHostArch()) {
1174 LOG(WARNING) << "SANDBOX2_TRACE should be used in policy instead of "
1175 "TRACE(value)";
1176 filter = SANDBOX2_TRACE;
1177 }
1178 }
1179 // Pre-/Postcondition: Syscall number loaded into A register
1180 out.push_back(LOAD_SYSCALL_NR);
1181 if (out.size() > std::numeric_limits<uint32_t>::max()) {
1182 SetError(absl::InvalidArgumentError("syscall policy is too long"));
1183 return *this;
1184 }
1185 // Create jumps for each syscall.
1186 size_t do_policy_loc = out.size();
1187 // Iterate in reverse order and prepend instruction, so that jumps can be
1188 // calculated easily.
1189 constexpr size_t kMaxShortJump = 255;
1190 bool last = true;
1191 for (auto it = std::rbegin(nums); it != std::rend(nums); ++it) {
1192 if (*it == __NR_bpf || *it == __NR_ptrace) {
1193 SetError(absl::InvalidArgumentError(
1194 "cannot add policy for bpf/ptrace syscall"));
1195 return *this;
1196 }
1197 // If syscall is not matched try with the next one.
1198 uint8_t jf = 0;
1199 // If last syscall on the list does not match skip the policy by jumping
1200 // over it.
1201 if (last) {
1202 if (out.size() > kMaxShortJump) {
1203 out.push_front(
1204 BPF_STMT(BPF_JMP + BPF_JA, static_cast<uint32_t>(out.size())));
1205 } else {
1206 jf = out.size();
1207 }
1208 last = false;
1209 }
1210 // Add a helper absolute jump if needed - the policy/last helper jump is
1211 // out of reach of a short jump.
1212 if ((out.size() - do_policy_loc) > kMaxShortJump) {
1213 out.push_front(BPF_STMT(
1214 BPF_JMP + BPF_JA, static_cast<uint32_t>(out.size() - policy.size())));
1215 do_policy_loc = out.size();
1216 ++jf;
1217 }
1218 uint8_t jt = out.size() - do_policy_loc;
1219 out.push_front(BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, *it, jt, jf));
1220 }
1221 user_policy_.insert(user_policy_.end(), out.begin(), out.end());
1222 return *this;
1223 }
1224
AddPolicyOnSyscalls(absl::Span<const uint32_t> nums,BpfFunc f)1225 PolicyBuilder& PolicyBuilder::AddPolicyOnSyscalls(
1226 absl::Span<const uint32_t> nums, BpfFunc f) {
1227 return AddPolicyOnSyscalls(nums, ResolveBpfFunc(f));
1228 }
1229
AddPolicyOnMmap(absl::Span<const sock_filter> policy)1230 PolicyBuilder& PolicyBuilder::AddPolicyOnMmap(
1231 absl::Span<const sock_filter> policy) {
1232 return AddPolicyOnSyscalls(kMmapSyscalls, policy);
1233 }
1234
AddPolicyOnMmap(BpfFunc f)1235 PolicyBuilder& PolicyBuilder::AddPolicyOnMmap(BpfFunc f) {
1236 return AddPolicyOnSyscalls(kMmapSyscalls, f);
1237 }
1238
DangerDefaultAllowAll()1239 PolicyBuilder& PolicyBuilder::DangerDefaultAllowAll() {
1240 return DefaultAction(AllowAllSyscalls());
1241 }
1242
DefaultAction(AllowAllSyscalls)1243 PolicyBuilder& PolicyBuilder::DefaultAction(AllowAllSyscalls) {
1244 default_action_ = ALLOW;
1245 return *this;
1246 }
1247
DefaultAction(TraceAllSyscalls)1248 PolicyBuilder& PolicyBuilder::DefaultAction(TraceAllSyscalls) {
1249 default_action_ = SANDBOX2_TRACE;
1250 return *this;
1251 }
1252
ValidateAbsolutePath(absl::string_view path)1253 absl::StatusOr<std::string> PolicyBuilder::ValidateAbsolutePath(
1254 absl::string_view path) {
1255 if (!file::IsAbsolutePath(path)) {
1256 return absl::InvalidArgumentError(
1257 absl::StrCat("Path is not absolute: '", path, "'"));
1258 }
1259 return ValidatePath(path);
1260 }
1261
ValidatePath(absl::string_view path)1262 absl::StatusOr<std::string> PolicyBuilder::ValidatePath(
1263 absl::string_view path) {
1264 std::string fixed_path = file::CleanPath(path);
1265 if (fixed_path != path) {
1266 return absl::InvalidArgumentError(absl::StrCat(
1267 "Path was not normalized. '", path, "' != '", fixed_path, "'"));
1268 }
1269 return fixed_path;
1270 }
1271
ResolveBpfFunc(BpfFunc f)1272 std::vector<sock_filter> PolicyBuilder::ResolveBpfFunc(BpfFunc f) {
1273 bpf_labels l = {0};
1274
1275 std::vector<sock_filter> policy = f(l);
1276 if (bpf_resolve_jumps(&l, policy.data(), policy.size()) != 0) {
1277 SetError(absl::InternalError("Cannot resolve bpf jumps"));
1278 }
1279
1280 return policy;
1281 }
1282
TryBuild()1283 absl::StatusOr<std::unique_ptr<Policy>> PolicyBuilder::TryBuild() {
1284 if (!last_status_.ok()) {
1285 return last_status_;
1286 }
1287
1288 if (user_policy_.size() > kMaxUserPolicyLength) {
1289 return absl::FailedPreconditionError(
1290 absl::StrCat("User syscall policy is to long (", user_policy_.size(),
1291 " > ", kMaxUserPolicyLength, ")."));
1292 }
1293
1294 // Using `new` to access a non-public constructor.
1295 auto output = absl::WrapUnique(new Policy());
1296
1297 if (already_built_) {
1298 return absl::FailedPreconditionError("Can only build policy once.");
1299 }
1300
1301 if (use_namespaces_) {
1302 if (allow_unrestricted_networking_ && hostname_ != kDefaultHostname) {
1303 return absl::FailedPreconditionError(
1304 "Cannot set hostname without network namespaces.");
1305 }
1306 output->namespace_ =
1307 Namespace(allow_unrestricted_networking_, std::move(mounts_), hostname_,
1308 allow_mount_propagation_);
1309 }
1310
1311 output->collect_stacktrace_on_signal_ = collect_stacktrace_on_signal_;
1312 output->collect_stacktrace_on_violation_ = collect_stacktrace_on_violation_;
1313 output->collect_stacktrace_on_timeout_ = collect_stacktrace_on_timeout_;
1314 output->collect_stacktrace_on_kill_ = collect_stacktrace_on_kill_;
1315 output->collect_stacktrace_on_exit_ = collect_stacktrace_on_exit_;
1316 output->user_policy_ = std::move(user_policy_);
1317 if (default_action_) {
1318 output->user_policy_.push_back(*default_action_);
1319 }
1320 output->user_policy_.insert(output->user_policy_.end(),
1321 overridable_policy_.begin(),
1322 overridable_policy_.end());
1323 output->user_policy_handles_bpf_ = user_policy_handles_bpf_;
1324 output->user_policy_handles_ptrace_ = user_policy_handles_ptrace_;
1325
1326 PolicyBuilderDescription pb_description;
1327
1328 StoreDescription(&pb_description);
1329 output->policy_builder_description_ = pb_description;
1330 output->allowed_hosts_ = std::move(allowed_hosts_);
1331 already_built_ = true;
1332 return std::move(output);
1333 }
1334
AddFile(absl::string_view path,bool is_ro)1335 PolicyBuilder& PolicyBuilder::AddFile(absl::string_view path, bool is_ro) {
1336 return AddFileAt(path, path, is_ro);
1337 }
1338
AddFileAt(absl::string_view outside,absl::string_view inside,bool is_ro)1339 PolicyBuilder& PolicyBuilder::AddFileAt(absl::string_view outside,
1340 absl::string_view inside, bool is_ro) {
1341 EnableNamespaces(); // NOLINT(clang-diagnostic-deprecated-declarations)
1342 return AddFileAtIfNamespaced(outside, inside, is_ro);
1343 }
1344
AddFileIfNamespaced(absl::string_view path,bool is_ro)1345 PolicyBuilder& PolicyBuilder::AddFileIfNamespaced(absl::string_view path,
1346 bool is_ro) {
1347 return AddFileAtIfNamespaced(path, path, is_ro);
1348 }
1349
AddFileAtIfNamespaced(absl::string_view outside,absl::string_view inside,bool is_ro)1350 PolicyBuilder& PolicyBuilder::AddFileAtIfNamespaced(absl::string_view outside,
1351 absl::string_view inside,
1352 bool is_ro) {
1353 auto valid_outside = ValidateAbsolutePath(outside);
1354 if (!valid_outside.ok()) {
1355 SetError(valid_outside.status());
1356 return *this;
1357 }
1358
1359 if (absl::StartsWith(*valid_outside, "/proc/self") &&
1360 *valid_outside != "/proc/self/cpuset") {
1361 SetError(absl::InvalidArgumentError(
1362 absl::StrCat("Cannot add /proc/self mounts, you need to mount the "
1363 "whole /proc instead. You tried to mount ",
1364 outside)));
1365 return *this;
1366 }
1367
1368 if (!is_ro && IsOnReadOnlyDev(*valid_outside)) {
1369 SetError(absl::FailedPreconditionError(
1370 absl::StrCat("Cannot add ", outside,
1371 " as read-write as it's on a read-only device")));
1372 return *this;
1373 }
1374
1375 if (auto status = mounts_.AddFileAt(*valid_outside, inside, is_ro);
1376 !status.ok()) {
1377 SetError(
1378 absl::InternalError(absl::StrCat("Could not add file ", outside, " => ",
1379 inside, ": ", status.message())));
1380 }
1381 return *this;
1382 }
1383
AddLibrariesForBinary(absl::string_view path,absl::string_view ld_library_path)1384 PolicyBuilder& PolicyBuilder::AddLibrariesForBinary(
1385 absl::string_view path, absl::string_view ld_library_path) {
1386 EnableNamespaces(); // NOLINT(clang-diagnostic-deprecated-declarations)
1387
1388 auto valid_path = ValidatePath(path);
1389 if (!valid_path.ok()) {
1390 SetError(valid_path.status());
1391 return *this;
1392 }
1393
1394 if (auto status = mounts_.AddMappingsForBinary(*valid_path, ld_library_path);
1395 !status.ok()) {
1396 SetError(absl::InternalError(absl::StrCat(
1397 "Could not add libraries for ", *valid_path, ": ", status.message())));
1398 }
1399 return *this;
1400 }
1401
AddLibrariesForBinary(int fd,absl::string_view ld_library_path)1402 PolicyBuilder& PolicyBuilder::AddLibrariesForBinary(
1403 int fd, absl::string_view ld_library_path) {
1404 return AddLibrariesForBinary(absl::StrCat("/proc/self/fd/", fd),
1405 ld_library_path);
1406 }
1407
AddDirectory(absl::string_view path,bool is_ro)1408 PolicyBuilder& PolicyBuilder::AddDirectory(absl::string_view path, bool is_ro) {
1409 return AddDirectoryAt(path, path, is_ro);
1410 }
1411
AddDirectoryAt(absl::string_view outside,absl::string_view inside,bool is_ro)1412 PolicyBuilder& PolicyBuilder::AddDirectoryAt(absl::string_view outside,
1413 absl::string_view inside,
1414 bool is_ro) {
1415 EnableNamespaces(); // NOLINT(clang-diagnostic-deprecated-declarations)
1416 return AddDirectoryAtIfNamespaced(outside, inside, is_ro);
1417 }
1418
AddDirectoryIfNamespaced(absl::string_view path,bool is_ro)1419 PolicyBuilder& PolicyBuilder::AddDirectoryIfNamespaced(absl::string_view path,
1420 bool is_ro) {
1421 return AddDirectoryAtIfNamespaced(path, path, is_ro);
1422 }
1423
AddDirectoryAtIfNamespaced(absl::string_view outside,absl::string_view inside,bool is_ro)1424 PolicyBuilder& PolicyBuilder::AddDirectoryAtIfNamespaced(
1425 absl::string_view outside, absl::string_view inside, bool is_ro) {
1426 auto valid_outside = ValidateAbsolutePath(outside);
1427 if (!valid_outside.ok()) {
1428 SetError(valid_outside.status());
1429 return *this;
1430 }
1431
1432 if (absl::StartsWith(*valid_outside, "/proc/self")) {
1433 SetError(absl::InvalidArgumentError(
1434 absl::StrCat("Cannot add /proc/self mounts, you need to mount the "
1435 "whole /proc instead. You tried to mount ",
1436 outside)));
1437 return *this;
1438 }
1439
1440 if (!is_ro && IsOnReadOnlyDev(*valid_outside)) {
1441 SetError(absl::FailedPreconditionError(
1442 absl::StrCat("Cannot add ", outside,
1443 " as read-write as it's on a read-only device")));
1444 return *this;
1445 }
1446
1447 if (absl::Status status =
1448 mounts_.AddDirectoryAt(*valid_outside, inside, is_ro);
1449 !status.ok()) {
1450 SetError(absl::InternalError(absl::StrCat("Could not add directory ",
1451 outside, " => ", inside, ": ",
1452 status.message())));
1453 return *this;
1454 }
1455 return *this;
1456 }
1457
AddTmpfs(absl::string_view inside,size_t size)1458 PolicyBuilder& PolicyBuilder::AddTmpfs(absl::string_view inside, size_t size) {
1459 EnableNamespaces(); // NOLINT(clang-diagnostic-deprecated-declarations)
1460
1461 if (auto status = mounts_.AddTmpfs(inside, size); !status.ok()) {
1462 SetError(absl::InternalError(absl::StrCat("Could not mount tmpfs ", inside,
1463 ": ", status.message())));
1464 }
1465 return *this;
1466 }
1467
1468 // Use Allow(UnrestrictedNetworking()) instead.
AllowUnrestrictedNetworking()1469 PolicyBuilder& PolicyBuilder::AllowUnrestrictedNetworking() {
1470 return Allow(UnrestrictedNetworking());
1471 }
1472
SetHostname(absl::string_view hostname)1473 PolicyBuilder& PolicyBuilder::SetHostname(absl::string_view hostname) {
1474 EnableNamespaces(); // NOLINT(clang-diagnostic-deprecated-declarations)
1475 hostname_ = std::string(hostname);
1476
1477 return *this;
1478 }
1479
CollectStacktracesOnViolation(bool enable)1480 PolicyBuilder& PolicyBuilder::CollectStacktracesOnViolation(bool enable) {
1481 collect_stacktrace_on_violation_ = enable;
1482 return *this;
1483 }
1484
CollectStacktracesOnSignal(bool enable)1485 PolicyBuilder& PolicyBuilder::CollectStacktracesOnSignal(bool enable) {
1486 collect_stacktrace_on_signal_ = enable;
1487 return *this;
1488 }
1489
CollectStacktracesOnTimeout(bool enable)1490 PolicyBuilder& PolicyBuilder::CollectStacktracesOnTimeout(bool enable) {
1491 collect_stacktrace_on_timeout_ = enable;
1492 return *this;
1493 }
1494
CollectStacktracesOnKill(bool enable)1495 PolicyBuilder& PolicyBuilder::CollectStacktracesOnKill(bool enable) {
1496 collect_stacktrace_on_kill_ = enable;
1497 return *this;
1498 }
1499
CollectStacktracesOnExit(bool enable)1500 PolicyBuilder& PolicyBuilder::CollectStacktracesOnExit(bool enable) {
1501 collect_stacktrace_on_exit_ = enable;
1502 return *this;
1503 }
1504
AddNetworkProxyPolicy()1505 PolicyBuilder& PolicyBuilder::AddNetworkProxyPolicy() {
1506 if (allowed_hosts_) {
1507 SetError(absl::FailedPreconditionError(
1508 "AddNetworkProxyPolicy or AddNetworkProxyHandlerPolicy can be called "
1509 "at most once"));
1510 return *this;
1511 }
1512
1513 allowed_hosts_ = AllowedHosts();
1514
1515 AllowFutexOp(FUTEX_WAKE);
1516 AllowFutexOp(FUTEX_WAIT);
1517 AllowFutexOp(FUTEX_WAIT_BITSET);
1518 AllowDup();
1519 AllowSyscalls({
1520 __NR_recvmsg,
1521 __NR_close,
1522 __NR_gettid,
1523 });
1524 AddPolicyOnSyscall(__NR_socket, {
1525 ARG_32(0),
1526 JEQ32(AF_INET, ALLOW),
1527 JEQ32(AF_INET6, ALLOW),
1528 });
1529 AddPolicyOnSyscall(__NR_getsockopt,
1530 [](bpf_labels& labels) -> std::vector<sock_filter> {
1531 return {
1532 ARG_32(1),
1533 JNE32(SOL_SOCKET, JUMP(&labels, getsockopt_end)),
1534 ARG_32(2),
1535 JEQ32(SO_TYPE, ALLOW),
1536 LABEL(&labels, getsockopt_end),
1537 };
1538 });
1539 #ifdef SAPI_PPC64_LE
1540 AddPolicyOnSyscall(__NR_socketcall, {
1541 ARG_32(0),
1542 JEQ32(SYS_SOCKET, ALLOW),
1543 JEQ32(SYS_GETSOCKOPT, ALLOW),
1544 JEQ32(SYS_RECVMSG, ALLOW),
1545 });
1546 #endif
1547 return *this;
1548 }
1549
AddNetworkProxyHandlerPolicy()1550 PolicyBuilder& PolicyBuilder::AddNetworkProxyHandlerPolicy() {
1551 AddNetworkProxyPolicy();
1552 AllowSyscall(__NR_rt_sigreturn);
1553
1554 AddPolicyOnSyscall(__NR_rt_sigaction, {
1555 ARG_32(0),
1556 JEQ32(SIGSYS, ALLOW),
1557 });
1558
1559 AddPolicyOnSyscall(__NR_rt_sigprocmask, {
1560 ARG_32(0),
1561 JEQ32(SIG_UNBLOCK, ALLOW),
1562 });
1563
1564 AddPolicyOnSyscall(__NR_connect, {TRAP(0)});
1565 #ifdef SAPI_PPC64_LE
1566 AddPolicyOnSyscall(__NR_socketcall, {
1567 ARG_32(0),
1568 JEQ32(SYS_CONNECT, TRAP(0)),
1569 });
1570 #endif
1571 return *this;
1572 }
1573
TrapPtrace()1574 PolicyBuilder& PolicyBuilder::TrapPtrace() {
1575 if (handled_syscalls_.insert(__NR_ptrace).second) {
1576 user_policy_.insert(user_policy_.end(), {SYSCALL(__NR_ptrace, TRAP(0))});
1577 user_policy_handles_ptrace_ = true;
1578 }
1579 return *this;
1580 }
1581
SetRootWritable()1582 PolicyBuilder& PolicyBuilder::SetRootWritable() {
1583 EnableNamespaces(); // NOLINT(clang-diagnostic-deprecated-declarations)
1584 mounts_.SetRootWritable();
1585
1586 return *this;
1587 }
1588
StoreDescription(PolicyBuilderDescription * pb_description)1589 void PolicyBuilder::StoreDescription(PolicyBuilderDescription* pb_description) {
1590 for (const auto& handled_syscall : handled_syscalls_) {
1591 pb_description->add_handled_syscalls(handled_syscall);
1592 }
1593 }
1594
AllowIPv4(const std::string & ip_and_mask,uint32_t port)1595 PolicyBuilder& PolicyBuilder::AllowIPv4(const std::string& ip_and_mask,
1596 uint32_t port) {
1597 if (!allowed_hosts_) {
1598 SetError(absl::FailedPreconditionError(
1599 "AddNetworkProxyPolicy or AddNetworkProxyHandlerPolicy must be called "
1600 "before adding IP rules"));
1601 return *this;
1602 }
1603
1604 absl::Status status = allowed_hosts_->AllowIPv4(ip_and_mask, port);
1605 if (!status.ok()) {
1606 SetError(status);
1607 }
1608 return *this;
1609 }
1610
AllowIPv6(const std::string & ip_and_mask,uint32_t port)1611 PolicyBuilder& PolicyBuilder::AllowIPv6(const std::string& ip_and_mask,
1612 uint32_t port) {
1613 if (!allowed_hosts_) {
1614 SetError(absl::FailedPreconditionError(
1615 "AddNetworkProxyPolicy or AddNetworkProxyHandlerPolicy must be called "
1616 "before adding IP rules"));
1617 return *this;
1618 }
1619
1620 absl::Status status = allowed_hosts_->AllowIPv6(ip_and_mask, port);
1621 if (!status.ok()) {
1622 SetError(status);
1623 }
1624 return *this;
1625 }
1626
SetError(const absl::Status & status)1627 PolicyBuilder& PolicyBuilder::SetError(const absl::Status& status) {
1628 LOG(ERROR) << status;
1629 last_status_ = status;
1630 return *this;
1631 }
1632
1633 } // namespace sandbox2
1634