xref: /aosp_15_r20/external/linux-kselftest/tools/testing/selftests/seccomp/seccomp_bpf.c (revision 053f45be4e351dfd5e965df293cd45b779f579ee)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4  *
5  * Test code for seccomp bpf.
6  */
7 
8 #define _GNU_SOURCE
9 #include <sys/types.h>
10 
11 /*
12  * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
13  * we need to use the kernel's siginfo.h file and trick glibc
14  * into accepting it.
15  */
16 #if defined(__GLIBC_PREREQ)
17 #if !__GLIBC_PREREQ(2, 26)
18 # include <asm/siginfo.h>
19 # define __have_siginfo_t 1
20 # define __have_sigval_t 1
21 # define __have_sigevent_t 1
22 #endif
23 #endif
24 
25 #include <errno.h>
26 #include <linux/filter.h>
27 #include <sys/prctl.h>
28 #include <sys/ptrace.h>
29 #include <sys/user.h>
30 #include <linux/prctl.h>
31 #include <linux/ptrace.h>
32 #include <linux/seccomp.h>
33 #include <pthread.h>
34 #include <semaphore.h>
35 #include <signal.h>
36 #include <stddef.h>
37 #include <stdbool.h>
38 #include <string.h>
39 #include <time.h>
40 #include <limits.h>
41 #include <linux/elf.h>
42 #include <sys/uio.h>
43 #include <sys/utsname.h>
44 #include <sys/fcntl.h>
45 #include <sys/mman.h>
46 #include <sys/times.h>
47 #include <sys/socket.h>
48 #include <sys/ioctl.h>
49 #include <linux/kcmp.h>
50 #include <sys/resource.h>
51 #include <sys/capability.h>
52 
53 #include <unistd.h>
54 #include <sys/syscall.h>
55 #include <poll.h>
56 
57 #include "../kselftest_harness.h"
58 #include "../clone3/clone3_selftests.h"
59 
60 /* Attempt to de-conflict with the selftests tree. */
61 #ifndef SKIP
62 #define SKIP(s, ...)	XFAIL(s, ##__VA_ARGS__)
63 #endif
64 
65 #define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
66 
67 #ifndef PR_SET_PTRACER
68 # define PR_SET_PTRACER 0x59616d61
69 #endif
70 
71 #ifndef PR_SET_NO_NEW_PRIVS
72 #define PR_SET_NO_NEW_PRIVS 38
73 #define PR_GET_NO_NEW_PRIVS 39
74 #endif
75 
76 #ifndef PR_SECCOMP_EXT
77 #define PR_SECCOMP_EXT 43
78 #endif
79 
80 #ifndef SECCOMP_EXT_ACT
81 #define SECCOMP_EXT_ACT 1
82 #endif
83 
84 #ifndef SECCOMP_EXT_ACT_TSYNC
85 #define SECCOMP_EXT_ACT_TSYNC 1
86 #endif
87 
88 #ifndef SECCOMP_MODE_STRICT
89 #define SECCOMP_MODE_STRICT 1
90 #endif
91 
92 #ifndef SECCOMP_MODE_FILTER
93 #define SECCOMP_MODE_FILTER 2
94 #endif
95 
96 #ifndef SECCOMP_RET_ALLOW
97 struct seccomp_data {
98 	int nr;
99 	__u32 arch;
100 	__u64 instruction_pointer;
101 	__u64 args[6];
102 };
103 #endif
104 
105 #ifndef SECCOMP_RET_KILL_PROCESS
106 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
107 #define SECCOMP_RET_KILL_THREAD	 0x00000000U /* kill the thread */
108 #endif
109 #ifndef SECCOMP_RET_KILL
110 #define SECCOMP_RET_KILL	 SECCOMP_RET_KILL_THREAD
111 #define SECCOMP_RET_TRAP	 0x00030000U /* disallow and force a SIGSYS */
112 #define SECCOMP_RET_ERRNO	 0x00050000U /* returns an errno */
113 #define SECCOMP_RET_TRACE	 0x7ff00000U /* pass to a tracer or disallow */
114 #define SECCOMP_RET_ALLOW	 0x7fff0000U /* allow */
115 #endif
116 #ifndef SECCOMP_RET_LOG
117 #define SECCOMP_RET_LOG		 0x7ffc0000U /* allow after logging */
118 #endif
119 
120 #ifndef __NR_seccomp
121 # if defined(__i386__)
122 #  define __NR_seccomp 354
123 # elif defined(__x86_64__)
124 #  define __NR_seccomp 317
125 # elif defined(__arm__)
126 #  define __NR_seccomp 383
127 # elif defined(__aarch64__)
128 #  define __NR_seccomp 277
129 # elif defined(__riscv)
130 #  define __NR_seccomp 277
131 # elif defined(__csky__)
132 #  define __NR_seccomp 277
133 # elif defined(__hppa__)
134 #  define __NR_seccomp 338
135 # elif defined(__powerpc__)
136 #  define __NR_seccomp 358
137 # elif defined(__s390__)
138 #  define __NR_seccomp 348
139 # elif defined(__xtensa__)
140 #  define __NR_seccomp 337
141 # elif defined(__sh__)
142 #  define __NR_seccomp 372
143 # else
144 #  warning "seccomp syscall number unknown for this architecture"
145 #  define __NR_seccomp 0xffff
146 # endif
147 #endif
148 
149 #ifndef SECCOMP_SET_MODE_STRICT
150 #define SECCOMP_SET_MODE_STRICT 0
151 #endif
152 
153 #ifndef SECCOMP_SET_MODE_FILTER
154 #define SECCOMP_SET_MODE_FILTER 1
155 #endif
156 
157 #ifndef SECCOMP_GET_ACTION_AVAIL
158 #define SECCOMP_GET_ACTION_AVAIL 2
159 #endif
160 
161 #ifndef SECCOMP_GET_NOTIF_SIZES
162 #define SECCOMP_GET_NOTIF_SIZES 3
163 #endif
164 
165 #ifndef SECCOMP_FILTER_FLAG_TSYNC
166 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
167 #endif
168 
169 #ifndef SECCOMP_FILTER_FLAG_LOG
170 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
171 #endif
172 
173 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
174 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
175 #endif
176 
177 #ifndef PTRACE_SECCOMP_GET_METADATA
178 #define PTRACE_SECCOMP_GET_METADATA	0x420d
179 
180 struct seccomp_metadata {
181 	__u64 filter_off;       /* Input: which filter */
182 	__u64 flags;             /* Output: filter's flags */
183 };
184 #endif
185 
186 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
187 #define SECCOMP_FILTER_FLAG_NEW_LISTENER	(1UL << 3)
188 #endif
189 
190 #ifndef SECCOMP_RET_USER_NOTIF
191 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U
192 
193 #define SECCOMP_IOC_MAGIC		'!'
194 #define SECCOMP_IO(nr)			_IO(SECCOMP_IOC_MAGIC, nr)
195 #define SECCOMP_IOR(nr, type)		_IOR(SECCOMP_IOC_MAGIC, nr, type)
196 #define SECCOMP_IOW(nr, type)		_IOW(SECCOMP_IOC_MAGIC, nr, type)
197 #define SECCOMP_IOWR(nr, type)		_IOWR(SECCOMP_IOC_MAGIC, nr, type)
198 
199 /* Flags for seccomp notification fd ioctl. */
200 #define SECCOMP_IOCTL_NOTIF_RECV	SECCOMP_IOWR(0, struct seccomp_notif)
201 #define SECCOMP_IOCTL_NOTIF_SEND	SECCOMP_IOWR(1,	\
202 						struct seccomp_notif_resp)
203 #define SECCOMP_IOCTL_NOTIF_ID_VALID	SECCOMP_IOW(2, __u64)
204 
205 struct seccomp_notif {
206 	__u64 id;
207 	__u32 pid;
208 	__u32 flags;
209 	struct seccomp_data data;
210 };
211 
212 struct seccomp_notif_resp {
213 	__u64 id;
214 	__s64 val;
215 	__s32 error;
216 	__u32 flags;
217 };
218 
219 struct seccomp_notif_sizes {
220 	__u16 seccomp_notif;
221 	__u16 seccomp_notif_resp;
222 	__u16 seccomp_data;
223 };
224 #endif
225 
226 #ifndef SECCOMP_IOCTL_NOTIF_ADDFD
227 /* On success, the return value is the remote process's added fd number */
228 #define SECCOMP_IOCTL_NOTIF_ADDFD	SECCOMP_IOW(3,	\
229 						struct seccomp_notif_addfd)
230 
231 /* valid flags for seccomp_notif_addfd */
232 #define SECCOMP_ADDFD_FLAG_SETFD	(1UL << 0) /* Specify remote fd */
233 
234 struct seccomp_notif_addfd {
235 	__u64 id;
236 	__u32 flags;
237 	__u32 srcfd;
238 	__u32 newfd;
239 	__u32 newfd_flags;
240 };
241 #endif
242 
243 #ifndef SECCOMP_ADDFD_FLAG_SEND
244 #define SECCOMP_ADDFD_FLAG_SEND	(1UL << 1) /* Addfd and return it, atomically */
245 #endif
246 
247 struct seccomp_notif_addfd_small {
248 	__u64 id;
249 	char weird[4];
250 };
251 #define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL	\
252 	SECCOMP_IOW(3, struct seccomp_notif_addfd_small)
253 
254 struct seccomp_notif_addfd_big {
255 	union {
256 		struct seccomp_notif_addfd addfd;
257 		char buf[sizeof(struct seccomp_notif_addfd) + 8];
258 	};
259 };
260 #define SECCOMP_IOCTL_NOTIF_ADDFD_BIG	\
261 	SECCOMP_IOWR(3, struct seccomp_notif_addfd_big)
262 
263 #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
264 #define PTRACE_EVENTMSG_SYSCALL_ENTRY	1
265 #define PTRACE_EVENTMSG_SYSCALL_EXIT	2
266 #endif
267 
268 #ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE
269 #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001
270 #endif
271 
272 #ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH
273 #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
274 #endif
275 
276 #ifndef SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV
277 #define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5)
278 #endif
279 
280 #ifndef seccomp
seccomp(unsigned int op,unsigned int flags,void * args)281 int seccomp(unsigned int op, unsigned int flags, void *args)
282 {
283 	errno = 0;
284 	return syscall(__NR_seccomp, op, flags, args);
285 }
286 #endif
287 
288 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
289 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
290 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
291 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
292 #else
293 #error "wut? Unknown __BYTE_ORDER__?!"
294 #endif
295 
296 #define SIBLING_EXIT_UNKILLED	0xbadbeef
297 #define SIBLING_EXIT_FAILURE	0xbadface
298 #define SIBLING_EXIT_NEWPRIVS	0xbadfeed
299 
__filecmp(pid_t pid1,pid_t pid2,int fd1,int fd2)300 static int __filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
301 {
302 #ifdef __NR_kcmp
303 	errno = 0;
304 	return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
305 #else
306 	errno = ENOSYS;
307 	return -1;
308 #endif
309 }
310 
311 /* Have TH_LOG report actual location filecmp() is used. */
312 #define filecmp(pid1, pid2, fd1, fd2)	({		\
313 	int _ret;					\
314 							\
315 	_ret = __filecmp(pid1, pid2, fd1, fd2);		\
316 	if (_ret != 0) {				\
317 		if (_ret < 0 && errno == ENOSYS) {	\
318 			TH_LOG("kcmp() syscall missing (test is less accurate)");\
319 			_ret = 0;			\
320 		}					\
321 	}						\
322 	_ret; })
323 
TEST(kcmp)324 TEST(kcmp)
325 {
326 	int ret;
327 
328 	ret = __filecmp(getpid(), getpid(), 1, 1);
329 	EXPECT_EQ(ret, 0);
330 	if (ret != 0 && errno == ENOSYS)
331 		SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)");
332 }
333 
TEST(mode_strict_support)334 TEST(mode_strict_support)
335 {
336 	long ret;
337 
338 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
339 	ASSERT_EQ(0, ret) {
340 		TH_LOG("Kernel does not support CONFIG_SECCOMP");
341 	}
342 	syscall(__NR_exit, 0);
343 }
344 
TEST_SIGNAL(mode_strict_cannot_call_prctl,SIGKILL)345 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
346 {
347 	long ret;
348 
349 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
350 	ASSERT_EQ(0, ret) {
351 		TH_LOG("Kernel does not support CONFIG_SECCOMP");
352 	}
353 	syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
354 		NULL, NULL, NULL);
355 	EXPECT_FALSE(true) {
356 		TH_LOG("Unreachable!");
357 	}
358 }
359 
360 /* Note! This doesn't test no new privs behavior */
TEST(no_new_privs_support)361 TEST(no_new_privs_support)
362 {
363 	long ret;
364 
365 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
366 	EXPECT_EQ(0, ret) {
367 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
368 	}
369 }
370 
371 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */
TEST(mode_filter_support)372 TEST(mode_filter_support)
373 {
374 	long ret;
375 
376 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
377 	ASSERT_EQ(0, ret) {
378 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
379 	}
380 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
381 	EXPECT_EQ(-1, ret);
382 	EXPECT_EQ(EFAULT, errno) {
383 		TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
384 	}
385 }
386 
TEST(mode_filter_without_nnp)387 TEST(mode_filter_without_nnp)
388 {
389 	struct sock_filter filter[] = {
390 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
391 	};
392 	struct sock_fprog prog = {
393 		.len = (unsigned short)ARRAY_SIZE(filter),
394 		.filter = filter,
395 	};
396 	long ret;
397 
398 	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
399 	ASSERT_LE(0, ret) {
400 		TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
401 	}
402 	errno = 0;
403 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
404 	/* Succeeds with CAP_SYS_ADMIN, fails without */
405 	/* TODO(wad) check caps not euid */
406 	if (geteuid()) {
407 		EXPECT_EQ(-1, ret);
408 		EXPECT_EQ(EACCES, errno);
409 	} else {
410 		EXPECT_EQ(0, ret);
411 	}
412 }
413 
414 #define MAX_INSNS_PER_PATH 32768
415 
TEST(filter_size_limits)416 TEST(filter_size_limits)
417 {
418 	int i;
419 	int count = BPF_MAXINSNS + 1;
420 	struct sock_filter allow[] = {
421 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
422 	};
423 	struct sock_filter *filter;
424 	struct sock_fprog prog = { };
425 	long ret;
426 
427 	filter = calloc(count, sizeof(*filter));
428 	ASSERT_NE(NULL, filter);
429 
430 	for (i = 0; i < count; i++)
431 		filter[i] = allow[0];
432 
433 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
434 	ASSERT_EQ(0, ret);
435 
436 	prog.filter = filter;
437 	prog.len = count;
438 
439 	/* Too many filter instructions in a single filter. */
440 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
441 	ASSERT_NE(0, ret) {
442 		TH_LOG("Installing %d insn filter was allowed", prog.len);
443 	}
444 
445 	/* One less is okay, though. */
446 	prog.len -= 1;
447 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
448 	ASSERT_EQ(0, ret) {
449 		TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
450 	}
451 }
452 
TEST(filter_chain_limits)453 TEST(filter_chain_limits)
454 {
455 	int i;
456 	int count = BPF_MAXINSNS;
457 	struct sock_filter allow[] = {
458 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
459 	};
460 	struct sock_filter *filter;
461 	struct sock_fprog prog = { };
462 	long ret;
463 
464 	filter = calloc(count, sizeof(*filter));
465 	ASSERT_NE(NULL, filter);
466 
467 	for (i = 0; i < count; i++)
468 		filter[i] = allow[0];
469 
470 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
471 	ASSERT_EQ(0, ret);
472 
473 	prog.filter = filter;
474 	prog.len = 1;
475 
476 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
477 	ASSERT_EQ(0, ret);
478 
479 	prog.len = count;
480 
481 	/* Too many total filter instructions. */
482 	for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
483 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
484 		if (ret != 0)
485 			break;
486 	}
487 	ASSERT_NE(0, ret) {
488 		TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
489 		       i, count, i * (count + 4));
490 	}
491 }
492 
TEST(mode_filter_cannot_move_to_strict)493 TEST(mode_filter_cannot_move_to_strict)
494 {
495 	struct sock_filter filter[] = {
496 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
497 	};
498 	struct sock_fprog prog = {
499 		.len = (unsigned short)ARRAY_SIZE(filter),
500 		.filter = filter,
501 	};
502 	long ret;
503 
504 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
505 	ASSERT_EQ(0, ret);
506 
507 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
508 	ASSERT_EQ(0, ret);
509 
510 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
511 	EXPECT_EQ(-1, ret);
512 	EXPECT_EQ(EINVAL, errno);
513 }
514 
515 
TEST(mode_filter_get_seccomp)516 TEST(mode_filter_get_seccomp)
517 {
518 	struct sock_filter filter[] = {
519 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
520 	};
521 	struct sock_fprog prog = {
522 		.len = (unsigned short)ARRAY_SIZE(filter),
523 		.filter = filter,
524 	};
525 	long ret;
526 
527 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
528 	ASSERT_EQ(0, ret);
529 
530 	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
531 	EXPECT_EQ(0, ret);
532 
533 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
534 	ASSERT_EQ(0, ret);
535 
536 	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
537 	EXPECT_EQ(2, ret);
538 }
539 
540 
TEST(ALLOW_all)541 TEST(ALLOW_all)
542 {
543 	struct sock_filter filter[] = {
544 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
545 	};
546 	struct sock_fprog prog = {
547 		.len = (unsigned short)ARRAY_SIZE(filter),
548 		.filter = filter,
549 	};
550 	long ret;
551 
552 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
553 	ASSERT_EQ(0, ret);
554 
555 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
556 	ASSERT_EQ(0, ret);
557 }
558 
TEST(empty_prog)559 TEST(empty_prog)
560 {
561 	struct sock_filter filter[] = {
562 	};
563 	struct sock_fprog prog = {
564 		.len = (unsigned short)ARRAY_SIZE(filter),
565 		.filter = filter,
566 	};
567 	long ret;
568 
569 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
570 	ASSERT_EQ(0, ret);
571 
572 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
573 	EXPECT_EQ(-1, ret);
574 	EXPECT_EQ(EINVAL, errno);
575 }
576 
TEST(log_all)577 TEST(log_all)
578 {
579 	struct sock_filter filter[] = {
580 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
581 	};
582 	struct sock_fprog prog = {
583 		.len = (unsigned short)ARRAY_SIZE(filter),
584 		.filter = filter,
585 	};
586 	long ret;
587 	pid_t parent = getppid();
588 
589 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
590 	ASSERT_EQ(0, ret);
591 
592 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
593 	ASSERT_EQ(0, ret);
594 
595 	/* getppid() should succeed and be logged (no check for logging) */
596 	EXPECT_EQ(parent, syscall(__NR_getppid));
597 }
598 
TEST_SIGNAL(unknown_ret_is_kill_inside,SIGSYS)599 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
600 {
601 	struct sock_filter filter[] = {
602 		BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
603 	};
604 	struct sock_fprog prog = {
605 		.len = (unsigned short)ARRAY_SIZE(filter),
606 		.filter = filter,
607 	};
608 	long ret;
609 
610 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
611 	ASSERT_EQ(0, ret);
612 
613 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
614 	ASSERT_EQ(0, ret);
615 	EXPECT_EQ(0, syscall(__NR_getpid)) {
616 		TH_LOG("getpid() shouldn't ever return");
617 	}
618 }
619 
620 /* return code >= 0x80000000 is unused. */
TEST_SIGNAL(unknown_ret_is_kill_above_allow,SIGSYS)621 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
622 {
623 	struct sock_filter filter[] = {
624 		BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
625 	};
626 	struct sock_fprog prog = {
627 		.len = (unsigned short)ARRAY_SIZE(filter),
628 		.filter = filter,
629 	};
630 	long ret;
631 
632 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
633 	ASSERT_EQ(0, ret);
634 
635 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
636 	ASSERT_EQ(0, ret);
637 	EXPECT_EQ(0, syscall(__NR_getpid)) {
638 		TH_LOG("getpid() shouldn't ever return");
639 	}
640 }
641 
TEST_SIGNAL(KILL_all,SIGSYS)642 TEST_SIGNAL(KILL_all, SIGSYS)
643 {
644 	struct sock_filter filter[] = {
645 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
646 	};
647 	struct sock_fprog prog = {
648 		.len = (unsigned short)ARRAY_SIZE(filter),
649 		.filter = filter,
650 	};
651 	long ret;
652 
653 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
654 	ASSERT_EQ(0, ret);
655 
656 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
657 	ASSERT_EQ(0, ret);
658 }
659 
TEST_SIGNAL(KILL_one,SIGSYS)660 TEST_SIGNAL(KILL_one, SIGSYS)
661 {
662 	struct sock_filter filter[] = {
663 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
664 			offsetof(struct seccomp_data, nr)),
665 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
666 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
667 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
668 	};
669 	struct sock_fprog prog = {
670 		.len = (unsigned short)ARRAY_SIZE(filter),
671 		.filter = filter,
672 	};
673 	long ret;
674 	pid_t parent = getppid();
675 
676 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
677 	ASSERT_EQ(0, ret);
678 
679 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
680 	ASSERT_EQ(0, ret);
681 
682 	EXPECT_EQ(parent, syscall(__NR_getppid));
683 	/* getpid() should never return. */
684 	EXPECT_EQ(0, syscall(__NR_getpid));
685 }
686 
TEST_SIGNAL(KILL_one_arg_one,SIGSYS)687 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
688 {
689 	void *fatal_address;
690 	struct sock_filter filter[] = {
691 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
692 			offsetof(struct seccomp_data, nr)),
693 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
694 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
695 		/* Only both with lower 32-bit for now. */
696 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
697 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
698 			(unsigned long)&fatal_address, 0, 1),
699 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
700 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
701 	};
702 	struct sock_fprog prog = {
703 		.len = (unsigned short)ARRAY_SIZE(filter),
704 		.filter = filter,
705 	};
706 	long ret;
707 	pid_t parent = getppid();
708 	struct tms timebuf;
709 	clock_t clock = times(&timebuf);
710 
711 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
712 	ASSERT_EQ(0, ret);
713 
714 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
715 	ASSERT_EQ(0, ret);
716 
717 	EXPECT_EQ(parent, syscall(__NR_getppid));
718 	EXPECT_LE(clock, syscall(__NR_times, &timebuf));
719 	/* times() should never return. */
720 	EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
721 }
722 
TEST_SIGNAL(KILL_one_arg_six,SIGSYS)723 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
724 {
725 #ifndef __NR_mmap2
726 	int sysno = __NR_mmap;
727 #else
728 	int sysno = __NR_mmap2;
729 #endif
730 	struct sock_filter filter[] = {
731 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
732 			offsetof(struct seccomp_data, nr)),
733 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
734 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
735 		/* Only both with lower 32-bit for now. */
736 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
737 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
738 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
739 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
740 	};
741 	struct sock_fprog prog = {
742 		.len = (unsigned short)ARRAY_SIZE(filter),
743 		.filter = filter,
744 	};
745 	long ret;
746 	pid_t parent = getppid();
747 	int fd;
748 	void *map1, *map2;
749 	int page_size = sysconf(_SC_PAGESIZE);
750 
751 	ASSERT_LT(0, page_size);
752 
753 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
754 	ASSERT_EQ(0, ret);
755 
756 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
757 	ASSERT_EQ(0, ret);
758 
759 	fd = open("/dev/zero", O_RDONLY);
760 	ASSERT_NE(-1, fd);
761 
762 	EXPECT_EQ(parent, syscall(__NR_getppid));
763 	map1 = (void *)syscall(sysno,
764 		NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
765 	EXPECT_NE(MAP_FAILED, map1);
766 	/* mmap2() should never return. */
767 	map2 = (void *)syscall(sysno,
768 		 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
769 	EXPECT_EQ(MAP_FAILED, map2);
770 
771 	/* The test failed, so clean up the resources. */
772 	munmap(map1, page_size);
773 	munmap(map2, page_size);
774 	close(fd);
775 }
776 
777 /* This is a thread task to die via seccomp filter violation. */
kill_thread(void * data)778 void *kill_thread(void *data)
779 {
780 	bool die = (bool)data;
781 
782 	if (die) {
783 		prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
784 		return (void *)SIBLING_EXIT_FAILURE;
785 	}
786 
787 	return (void *)SIBLING_EXIT_UNKILLED;
788 }
789 
790 enum kill_t {
791 	KILL_THREAD,
792 	KILL_PROCESS,
793 	RET_UNKNOWN
794 };
795 
796 /* Prepare a thread that will kill itself or both of us. */
kill_thread_or_group(struct __test_metadata * _metadata,enum kill_t kill_how)797 void kill_thread_or_group(struct __test_metadata *_metadata,
798 			  enum kill_t kill_how)
799 {
800 	pthread_t thread;
801 	void *status;
802 	/* Kill only when calling __NR_prctl. */
803 	struct sock_filter filter_thread[] = {
804 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
805 			offsetof(struct seccomp_data, nr)),
806 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
807 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
808 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
809 	};
810 	struct sock_fprog prog_thread = {
811 		.len = (unsigned short)ARRAY_SIZE(filter_thread),
812 		.filter = filter_thread,
813 	};
814 	int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAA;
815 	struct sock_filter filter_process[] = {
816 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
817 			offsetof(struct seccomp_data, nr)),
818 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
819 		BPF_STMT(BPF_RET|BPF_K, kill),
820 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
821 	};
822 	struct sock_fprog prog_process = {
823 		.len = (unsigned short)ARRAY_SIZE(filter_process),
824 		.filter = filter_process,
825 	};
826 
827 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
828 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
829 	}
830 
831 	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
832 			     kill_how == KILL_THREAD ? &prog_thread
833 						     : &prog_process));
834 
835 	/*
836 	 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
837 	 * flag cannot be downgraded by a new filter.
838 	 */
839 	if (kill_how == KILL_PROCESS)
840 		ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
841 
842 	/* Start a thread that will exit immediately. */
843 	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
844 	ASSERT_EQ(0, pthread_join(thread, &status));
845 	ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
846 
847 	/* Start a thread that will die immediately. */
848 	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
849 	ASSERT_EQ(0, pthread_join(thread, &status));
850 	ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
851 
852 	/*
853 	 * If we get here, only the spawned thread died. Let the parent know
854 	 * the whole process didn't die (i.e. this thread, the spawner,
855 	 * stayed running).
856 	 */
857 	exit(42);
858 }
859 
860 /*
861  * b/147676645
862  * SECCOMP_FILTER_FLAG_TSYNC_ESRCH not compatible < 5.7
863  * SECCOMP_FILTER_FLAG_NEW_LISTENER not compatible < 5.0
864  * SECCOMP_FILTER_FLAG_SPEC_ALLOW not compatible < 4.17
865  * SECCOMP_FILTER_FLAG_LOG not compatible < 4.14
866  */
867 #ifndef __ANDROID__
TEST(KILL_thread)868 TEST(KILL_thread)
869 {
870 	int status;
871 	pid_t child_pid;
872 
873 	child_pid = fork();
874 	ASSERT_LE(0, child_pid);
875 	if (child_pid == 0) {
876 		kill_thread_or_group(_metadata, KILL_THREAD);
877 		_exit(38);
878 	}
879 
880 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
881 
882 	/* If only the thread was killed, we'll see exit 42. */
883 	ASSERT_TRUE(WIFEXITED(status));
884 	ASSERT_EQ(42, WEXITSTATUS(status));
885 }
886 #endif
887 
TEST(KILL_process)888 TEST(KILL_process)
889 {
890 	int status;
891 	pid_t child_pid;
892 
893 	child_pid = fork();
894 	ASSERT_LE(0, child_pid);
895 	if (child_pid == 0) {
896 		kill_thread_or_group(_metadata, KILL_PROCESS);
897 		_exit(38);
898 	}
899 
900 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
901 
902 	/* If the entire process was killed, we'll see SIGSYS. */
903 	ASSERT_TRUE(WIFSIGNALED(status));
904 	ASSERT_EQ(SIGSYS, WTERMSIG(status));
905 }
906 
TEST(KILL_unknown)907 TEST(KILL_unknown)
908 {
909 	int status;
910 	pid_t child_pid;
911 
912 	child_pid = fork();
913 	ASSERT_LE(0, child_pid);
914 	if (child_pid == 0) {
915 		kill_thread_or_group(_metadata, RET_UNKNOWN);
916 		_exit(38);
917 	}
918 
919 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
920 
921 	/* If the entire process was killed, we'll see SIGSYS. */
922 	EXPECT_TRUE(WIFSIGNALED(status)) {
923 		TH_LOG("Unknown SECCOMP_RET is only killing the thread?");
924 	}
925 	ASSERT_EQ(SIGSYS, WTERMSIG(status));
926 }
927 
928 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
TEST(arg_out_of_range)929 TEST(arg_out_of_range)
930 {
931 	struct sock_filter filter[] = {
932 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
933 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
934 	};
935 	struct sock_fprog prog = {
936 		.len = (unsigned short)ARRAY_SIZE(filter),
937 		.filter = filter,
938 	};
939 	long ret;
940 
941 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
942 	ASSERT_EQ(0, ret);
943 
944 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
945 	EXPECT_EQ(-1, ret);
946 	EXPECT_EQ(EINVAL, errno);
947 }
948 
949 #define ERRNO_FILTER(name, errno)					\
950 	struct sock_filter _read_filter_##name[] = {			\
951 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,				\
952 			offsetof(struct seccomp_data, nr)),		\
953 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),	\
954 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),	\
955 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),		\
956 	};								\
957 	struct sock_fprog prog_##name = {				\
958 		.len = (unsigned short)ARRAY_SIZE(_read_filter_##name),	\
959 		.filter = _read_filter_##name,				\
960 	}
961 
962 /* Make sure basic errno values are correctly passed through a filter. */
TEST(ERRNO_valid)963 TEST(ERRNO_valid)
964 {
965 	ERRNO_FILTER(valid, E2BIG);
966 	long ret;
967 	pid_t parent = getppid();
968 
969 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
970 	ASSERT_EQ(0, ret);
971 
972 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
973 	ASSERT_EQ(0, ret);
974 
975 	EXPECT_EQ(parent, syscall(__NR_getppid));
976 	EXPECT_EQ(-1, read(-1, NULL, 0));
977 	EXPECT_EQ(E2BIG, errno);
978 }
979 
980 /* Make sure an errno of zero is correctly handled by the arch code. */
TEST(ERRNO_zero)981 TEST(ERRNO_zero)
982 {
983 	ERRNO_FILTER(zero, 0);
984 	long ret;
985 	pid_t parent = getppid();
986 
987 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
988 	ASSERT_EQ(0, ret);
989 
990 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
991 	ASSERT_EQ(0, ret);
992 
993 	EXPECT_EQ(parent, syscall(__NR_getppid));
994 	/* "errno" of 0 is ok. */
995 	EXPECT_EQ(0, read(-1, NULL, 0));
996 }
997 
998 /*
999  * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
1000  * This tests that the errno value gets capped correctly, fixed by
1001  * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
1002  */
TEST(ERRNO_capped)1003 TEST(ERRNO_capped)
1004 {
1005 	ERRNO_FILTER(capped, 4096);
1006 	long ret;
1007 	pid_t parent = getppid();
1008 
1009 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1010 	ASSERT_EQ(0, ret);
1011 
1012 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
1013 	ASSERT_EQ(0, ret);
1014 
1015 	EXPECT_EQ(parent, syscall(__NR_getppid));
1016 	EXPECT_EQ(-1, read(-1, NULL, 0));
1017 	EXPECT_EQ(4095, errno);
1018 }
1019 
1020 /*
1021  * Filters are processed in reverse order: last applied is executed first.
1022  * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
1023  * SECCOMP_RET_DATA mask results will follow the most recently applied
1024  * matching filter return (and not the lowest or highest value).
1025  */
TEST(ERRNO_order)1026 TEST(ERRNO_order)
1027 {
1028 	ERRNO_FILTER(first,  11);
1029 	ERRNO_FILTER(second, 13);
1030 	ERRNO_FILTER(third,  12);
1031 	long ret;
1032 	pid_t parent = getppid();
1033 
1034 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1035 	ASSERT_EQ(0, ret);
1036 
1037 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
1038 	ASSERT_EQ(0, ret);
1039 
1040 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
1041 	ASSERT_EQ(0, ret);
1042 
1043 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
1044 	ASSERT_EQ(0, ret);
1045 
1046 	EXPECT_EQ(parent, syscall(__NR_getppid));
1047 	EXPECT_EQ(-1, read(-1, NULL, 0));
1048 	EXPECT_EQ(12, errno);
1049 }
1050 
FIXTURE(TRAP)1051 FIXTURE(TRAP) {
1052 	struct sock_fprog prog;
1053 };
1054 
FIXTURE_SETUP(TRAP)1055 FIXTURE_SETUP(TRAP)
1056 {
1057 	struct sock_filter filter[] = {
1058 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1059 			offsetof(struct seccomp_data, nr)),
1060 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1061 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
1062 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1063 	};
1064 
1065 	memset(&self->prog, 0, sizeof(self->prog));
1066 	self->prog.filter = malloc(sizeof(filter));
1067 	ASSERT_NE(NULL, self->prog.filter);
1068 	memcpy(self->prog.filter, filter, sizeof(filter));
1069 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1070 }
1071 
FIXTURE_TEARDOWN(TRAP)1072 FIXTURE_TEARDOWN(TRAP)
1073 {
1074 	if (self->prog.filter)
1075 		free(self->prog.filter);
1076 }
1077 
TEST_F_SIGNAL(TRAP,dfl,SIGSYS)1078 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
1079 {
1080 	long ret;
1081 
1082 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1083 	ASSERT_EQ(0, ret);
1084 
1085 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
1086 	ASSERT_EQ(0, ret);
1087 	syscall(__NR_getpid);
1088 }
1089 
1090 /* Ensure that SIGSYS overrides SIG_IGN */
TEST_F_SIGNAL(TRAP,ign,SIGSYS)1091 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
1092 {
1093 	long ret;
1094 
1095 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1096 	ASSERT_EQ(0, ret);
1097 
1098 	signal(SIGSYS, SIG_IGN);
1099 
1100 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
1101 	ASSERT_EQ(0, ret);
1102 	syscall(__NR_getpid);
1103 }
1104 
1105 static siginfo_t TRAP_info;
1106 static volatile int TRAP_nr;
TRAP_action(int nr,siginfo_t * info,void * void_context)1107 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
1108 {
1109 	memcpy(&TRAP_info, info, sizeof(TRAP_info));
1110 	TRAP_nr = nr;
1111 }
1112 
TEST_F(TRAP,handler)1113 TEST_F(TRAP, handler)
1114 {
1115 	int ret, test;
1116 	struct sigaction act;
1117 	sigset_t mask;
1118 
1119 	memset(&act, 0, sizeof(act));
1120 	sigemptyset(&mask);
1121 	sigaddset(&mask, SIGSYS);
1122 
1123 	act.sa_sigaction = &TRAP_action;
1124 	act.sa_flags = SA_SIGINFO;
1125 	ret = sigaction(SIGSYS, &act, NULL);
1126 	ASSERT_EQ(0, ret) {
1127 		TH_LOG("sigaction failed");
1128 	}
1129 	ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
1130 	ASSERT_EQ(0, ret) {
1131 		TH_LOG("sigprocmask failed");
1132 	}
1133 
1134 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1135 	ASSERT_EQ(0, ret);
1136 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
1137 	ASSERT_EQ(0, ret);
1138 	TRAP_nr = 0;
1139 	memset(&TRAP_info, 0, sizeof(TRAP_info));
1140 	/* Expect the registers to be rolled back. (nr = error) may vary
1141 	 * based on arch. */
1142 	ret = syscall(__NR_getpid);
1143 	/* Silence gcc warning about volatile. */
1144 	test = TRAP_nr;
1145 	EXPECT_EQ(SIGSYS, test);
1146 	struct local_sigsys {
1147 		void *_call_addr;	/* calling user insn */
1148 		int _syscall;		/* triggering system call number */
1149 		unsigned int _arch;	/* AUDIT_ARCH_* of syscall */
1150 	} *sigsys = (struct local_sigsys *)
1151 #ifdef si_syscall
1152 		&(TRAP_info.si_call_addr);
1153 #else
1154 		&TRAP_info.si_pid;
1155 #endif
1156 	EXPECT_EQ(__NR_getpid, sigsys->_syscall);
1157 	/* Make sure arch is non-zero. */
1158 	EXPECT_NE(0, sigsys->_arch);
1159 	EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
1160 }
1161 
FIXTURE(precedence)1162 FIXTURE(precedence) {
1163 	struct sock_fprog allow;
1164 	struct sock_fprog log;
1165 	struct sock_fprog trace;
1166 	struct sock_fprog error;
1167 	struct sock_fprog trap;
1168 	struct sock_fprog kill;
1169 };
1170 
FIXTURE_SETUP(precedence)1171 FIXTURE_SETUP(precedence)
1172 {
1173 	struct sock_filter allow_insns[] = {
1174 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1175 	};
1176 	struct sock_filter log_insns[] = {
1177 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1178 			offsetof(struct seccomp_data, nr)),
1179 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1180 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1181 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
1182 	};
1183 	struct sock_filter trace_insns[] = {
1184 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1185 			offsetof(struct seccomp_data, nr)),
1186 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1187 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1188 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
1189 	};
1190 	struct sock_filter error_insns[] = {
1191 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1192 			offsetof(struct seccomp_data, nr)),
1193 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1194 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1195 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
1196 	};
1197 	struct sock_filter trap_insns[] = {
1198 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1199 			offsetof(struct seccomp_data, nr)),
1200 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1201 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1202 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
1203 	};
1204 	struct sock_filter kill_insns[] = {
1205 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1206 			offsetof(struct seccomp_data, nr)),
1207 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1208 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1209 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1210 	};
1211 
1212 	memset(self, 0, sizeof(*self));
1213 #define FILTER_ALLOC(_x) \
1214 	self->_x.filter = malloc(sizeof(_x##_insns)); \
1215 	ASSERT_NE(NULL, self->_x.filter); \
1216 	memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
1217 	self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
1218 	FILTER_ALLOC(allow);
1219 	FILTER_ALLOC(log);
1220 	FILTER_ALLOC(trace);
1221 	FILTER_ALLOC(error);
1222 	FILTER_ALLOC(trap);
1223 	FILTER_ALLOC(kill);
1224 }
1225 
FIXTURE_TEARDOWN(precedence)1226 FIXTURE_TEARDOWN(precedence)
1227 {
1228 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
1229 	FILTER_FREE(allow);
1230 	FILTER_FREE(log);
1231 	FILTER_FREE(trace);
1232 	FILTER_FREE(error);
1233 	FILTER_FREE(trap);
1234 	FILTER_FREE(kill);
1235 }
1236 
TEST_F(precedence,allow_ok)1237 TEST_F(precedence, allow_ok)
1238 {
1239 	pid_t parent, res = 0;
1240 	long ret;
1241 
1242 	parent = getppid();
1243 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1244 	ASSERT_EQ(0, ret);
1245 
1246 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1247 	ASSERT_EQ(0, ret);
1248 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1249 	ASSERT_EQ(0, ret);
1250 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1251 	ASSERT_EQ(0, ret);
1252 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1253 	ASSERT_EQ(0, ret);
1254 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1255 	ASSERT_EQ(0, ret);
1256 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1257 	ASSERT_EQ(0, ret);
1258 	/* Should work just fine. */
1259 	res = syscall(__NR_getppid);
1260 	EXPECT_EQ(parent, res);
1261 }
1262 
TEST_F_SIGNAL(precedence,kill_is_highest,SIGSYS)1263 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
1264 {
1265 	pid_t parent, res = 0;
1266 	long ret;
1267 
1268 	parent = getppid();
1269 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1270 	ASSERT_EQ(0, ret);
1271 
1272 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1273 	ASSERT_EQ(0, ret);
1274 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1275 	ASSERT_EQ(0, ret);
1276 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1277 	ASSERT_EQ(0, ret);
1278 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1279 	ASSERT_EQ(0, ret);
1280 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1281 	ASSERT_EQ(0, ret);
1282 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1283 	ASSERT_EQ(0, ret);
1284 	/* Should work just fine. */
1285 	res = syscall(__NR_getppid);
1286 	EXPECT_EQ(parent, res);
1287 	/* getpid() should never return. */
1288 	res = syscall(__NR_getpid);
1289 	EXPECT_EQ(0, res);
1290 }
1291 
TEST_F_SIGNAL(precedence,kill_is_highest_in_any_order,SIGSYS)1292 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
1293 {
1294 	pid_t parent;
1295 	long ret;
1296 
1297 	parent = getppid();
1298 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1299 	ASSERT_EQ(0, ret);
1300 
1301 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1302 	ASSERT_EQ(0, ret);
1303 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1304 	ASSERT_EQ(0, ret);
1305 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1306 	ASSERT_EQ(0, ret);
1307 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1308 	ASSERT_EQ(0, ret);
1309 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1310 	ASSERT_EQ(0, ret);
1311 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1312 	ASSERT_EQ(0, ret);
1313 	/* Should work just fine. */
1314 	EXPECT_EQ(parent, syscall(__NR_getppid));
1315 	/* getpid() should never return. */
1316 	EXPECT_EQ(0, syscall(__NR_getpid));
1317 }
1318 
TEST_F_SIGNAL(precedence,trap_is_second,SIGSYS)1319 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
1320 {
1321 	pid_t parent;
1322 	long ret;
1323 
1324 	parent = getppid();
1325 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1326 	ASSERT_EQ(0, ret);
1327 
1328 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1329 	ASSERT_EQ(0, ret);
1330 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1331 	ASSERT_EQ(0, ret);
1332 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1333 	ASSERT_EQ(0, ret);
1334 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1335 	ASSERT_EQ(0, ret);
1336 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1337 	ASSERT_EQ(0, ret);
1338 	/* Should work just fine. */
1339 	EXPECT_EQ(parent, syscall(__NR_getppid));
1340 	/* getpid() should never return. */
1341 	EXPECT_EQ(0, syscall(__NR_getpid));
1342 }
1343 
TEST_F_SIGNAL(precedence,trap_is_second_in_any_order,SIGSYS)1344 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
1345 {
1346 	pid_t parent;
1347 	long ret;
1348 
1349 	parent = getppid();
1350 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1351 	ASSERT_EQ(0, ret);
1352 
1353 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1354 	ASSERT_EQ(0, ret);
1355 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1356 	ASSERT_EQ(0, ret);
1357 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1358 	ASSERT_EQ(0, ret);
1359 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1360 	ASSERT_EQ(0, ret);
1361 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1362 	ASSERT_EQ(0, ret);
1363 	/* Should work just fine. */
1364 	EXPECT_EQ(parent, syscall(__NR_getppid));
1365 	/* getpid() should never return. */
1366 	EXPECT_EQ(0, syscall(__NR_getpid));
1367 }
1368 
TEST_F(precedence,errno_is_third)1369 TEST_F(precedence, errno_is_third)
1370 {
1371 	pid_t parent;
1372 	long ret;
1373 
1374 	parent = getppid();
1375 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1376 	ASSERT_EQ(0, ret);
1377 
1378 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1379 	ASSERT_EQ(0, ret);
1380 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1381 	ASSERT_EQ(0, ret);
1382 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1383 	ASSERT_EQ(0, ret);
1384 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1385 	ASSERT_EQ(0, ret);
1386 	/* Should work just fine. */
1387 	EXPECT_EQ(parent, syscall(__NR_getppid));
1388 	EXPECT_EQ(0, syscall(__NR_getpid));
1389 }
1390 
TEST_F(precedence,errno_is_third_in_any_order)1391 TEST_F(precedence, errno_is_third_in_any_order)
1392 {
1393 	pid_t parent;
1394 	long ret;
1395 
1396 	parent = getppid();
1397 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1398 	ASSERT_EQ(0, ret);
1399 
1400 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1401 	ASSERT_EQ(0, ret);
1402 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1403 	ASSERT_EQ(0, ret);
1404 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1405 	ASSERT_EQ(0, ret);
1406 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1407 	ASSERT_EQ(0, ret);
1408 	/* Should work just fine. */
1409 	EXPECT_EQ(parent, syscall(__NR_getppid));
1410 	EXPECT_EQ(0, syscall(__NR_getpid));
1411 }
1412 
TEST_F(precedence,trace_is_fourth)1413 TEST_F(precedence, trace_is_fourth)
1414 {
1415 	pid_t parent;
1416 	long ret;
1417 
1418 	parent = getppid();
1419 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1420 	ASSERT_EQ(0, ret);
1421 
1422 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1423 	ASSERT_EQ(0, ret);
1424 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1425 	ASSERT_EQ(0, ret);
1426 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1427 	ASSERT_EQ(0, ret);
1428 	/* Should work just fine. */
1429 	EXPECT_EQ(parent, syscall(__NR_getppid));
1430 	/* No ptracer */
1431 	EXPECT_EQ(-1, syscall(__NR_getpid));
1432 }
1433 
TEST_F(precedence,trace_is_fourth_in_any_order)1434 TEST_F(precedence, trace_is_fourth_in_any_order)
1435 {
1436 	pid_t parent;
1437 	long ret;
1438 
1439 	parent = getppid();
1440 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1441 	ASSERT_EQ(0, ret);
1442 
1443 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1444 	ASSERT_EQ(0, ret);
1445 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1446 	ASSERT_EQ(0, ret);
1447 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1448 	ASSERT_EQ(0, ret);
1449 	/* Should work just fine. */
1450 	EXPECT_EQ(parent, syscall(__NR_getppid));
1451 	/* No ptracer */
1452 	EXPECT_EQ(-1, syscall(__NR_getpid));
1453 }
1454 
TEST_F(precedence,log_is_fifth)1455 TEST_F(precedence, log_is_fifth)
1456 {
1457 	pid_t mypid, parent;
1458 	long ret;
1459 
1460 	mypid = getpid();
1461 	parent = getppid();
1462 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1463 	ASSERT_EQ(0, ret);
1464 
1465 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1466 	ASSERT_EQ(0, ret);
1467 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1468 	ASSERT_EQ(0, ret);
1469 	/* Should work just fine. */
1470 	EXPECT_EQ(parent, syscall(__NR_getppid));
1471 	/* Should also work just fine */
1472 	EXPECT_EQ(mypid, syscall(__NR_getpid));
1473 }
1474 
TEST_F(precedence,log_is_fifth_in_any_order)1475 TEST_F(precedence, log_is_fifth_in_any_order)
1476 {
1477 	pid_t mypid, parent;
1478 	long ret;
1479 
1480 	mypid = getpid();
1481 	parent = getppid();
1482 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1483 	ASSERT_EQ(0, ret);
1484 
1485 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1486 	ASSERT_EQ(0, ret);
1487 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1488 	ASSERT_EQ(0, ret);
1489 	/* Should work just fine. */
1490 	EXPECT_EQ(parent, syscall(__NR_getppid));
1491 	/* Should also work just fine */
1492 	EXPECT_EQ(mypid, syscall(__NR_getpid));
1493 }
1494 
1495 #ifndef PTRACE_O_TRACESECCOMP
1496 #define PTRACE_O_TRACESECCOMP	0x00000080
1497 #endif
1498 
1499 /* Catch the Ubuntu 12.04 value error. */
1500 #if PTRACE_EVENT_SECCOMP != 7
1501 #undef PTRACE_EVENT_SECCOMP
1502 #endif
1503 
1504 #ifndef PTRACE_EVENT_SECCOMP
1505 #define PTRACE_EVENT_SECCOMP 7
1506 #endif
1507 
1508 #define PTRACE_EVENT_MASK(status) ((status) >> 16)
1509 bool tracer_running;
tracer_stop(int sig)1510 void tracer_stop(int sig)
1511 {
1512 	tracer_running = false;
1513 }
1514 
1515 typedef void tracer_func_t(struct __test_metadata *_metadata,
1516 			   pid_t tracee, int status, void *args);
1517 
start_tracer(struct __test_metadata * _metadata,int fd,pid_t tracee,tracer_func_t tracer_func,void * args,bool ptrace_syscall)1518 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1519 	    tracer_func_t tracer_func, void *args, bool ptrace_syscall)
1520 {
1521 	int ret = -1;
1522 	struct sigaction action = {
1523 		.sa_handler = tracer_stop,
1524 	};
1525 
1526 	/* Allow external shutdown. */
1527 	tracer_running = true;
1528 	ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1529 
1530 	errno = 0;
1531 	while (ret == -1 && errno != EINVAL)
1532 		ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1533 	ASSERT_EQ(0, ret) {
1534 		kill(tracee, SIGKILL);
1535 	}
1536 	/* Wait for attach stop */
1537 	wait(NULL);
1538 
1539 	ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
1540 						      PTRACE_O_TRACESYSGOOD :
1541 						      PTRACE_O_TRACESECCOMP);
1542 	ASSERT_EQ(0, ret) {
1543 		TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1544 		kill(tracee, SIGKILL);
1545 	}
1546 	ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1547 		     tracee, NULL, 0);
1548 	ASSERT_EQ(0, ret);
1549 
1550 	/* Unblock the tracee */
1551 	ASSERT_EQ(1, write(fd, "A", 1));
1552 	ASSERT_EQ(0, close(fd));
1553 
1554 	/* Run until we're shut down. Must assert to stop execution. */
1555 	while (tracer_running) {
1556 		int status;
1557 
1558 		if (wait(&status) != tracee)
1559 			continue;
1560 
1561 		if (WIFSIGNALED(status)) {
1562 			/* Child caught a fatal signal. */
1563 			return;
1564 		}
1565 		if (WIFEXITED(status)) {
1566 			/* Child exited with code. */
1567 			return;
1568 		}
1569 
1570 		/* Check if we got an expected event. */
1571 		ASSERT_EQ(WIFCONTINUED(status), false);
1572 		ASSERT_EQ(WIFSTOPPED(status), true);
1573 		ASSERT_EQ(WSTOPSIG(status) & SIGTRAP, SIGTRAP) {
1574 			TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status));
1575 		}
1576 
1577 		tracer_func(_metadata, tracee, status, args);
1578 
1579 		ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1580 			     tracee, NULL, 0);
1581 		ASSERT_EQ(0, ret);
1582 	}
1583 	/* Directly report the status of our test harness results. */
1584 	syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1585 }
1586 
1587 /* Common tracer setup/teardown functions. */
cont_handler(int num)1588 void cont_handler(int num)
1589 { }
setup_trace_fixture(struct __test_metadata * _metadata,tracer_func_t func,void * args,bool ptrace_syscall)1590 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1591 			  tracer_func_t func, void *args, bool ptrace_syscall)
1592 {
1593 	char sync;
1594 	int pipefd[2];
1595 	pid_t tracer_pid;
1596 	pid_t tracee = getpid();
1597 
1598 	/* Setup a pipe for clean synchronization. */
1599 	ASSERT_EQ(0, pipe(pipefd));
1600 
1601 	/* Fork a child which we'll promote to tracer */
1602 	tracer_pid = fork();
1603 	ASSERT_LE(0, tracer_pid);
1604 	signal(SIGALRM, cont_handler);
1605 	if (tracer_pid == 0) {
1606 		close(pipefd[0]);
1607 		start_tracer(_metadata, pipefd[1], tracee, func, args,
1608 			     ptrace_syscall);
1609 		syscall(__NR_exit, 0);
1610 	}
1611 	close(pipefd[1]);
1612 	prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1613 	read(pipefd[0], &sync, 1);
1614 	close(pipefd[0]);
1615 
1616 	return tracer_pid;
1617 }
1618 
teardown_trace_fixture(struct __test_metadata * _metadata,pid_t tracer)1619 void teardown_trace_fixture(struct __test_metadata *_metadata,
1620 			    pid_t tracer)
1621 {
1622 	if (tracer) {
1623 		int status;
1624 		/*
1625 		 * Extract the exit code from the other process and
1626 		 * adopt it for ourselves in case its asserts failed.
1627 		 */
1628 		ASSERT_EQ(0, kill(tracer, SIGUSR1));
1629 		ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1630 		if (WEXITSTATUS(status))
1631 			_metadata->passed = 0;
1632 	}
1633 }
1634 
1635 /* "poke" tracer arguments and function. */
1636 struct tracer_args_poke_t {
1637 	unsigned long poke_addr;
1638 };
1639 
tracer_poke(struct __test_metadata * _metadata,pid_t tracee,int status,void * args)1640 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1641 		 void *args)
1642 {
1643 	int ret;
1644 	unsigned long msg;
1645 	struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1646 
1647 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1648 	EXPECT_EQ(0, ret);
1649 	/* If this fails, don't try to recover. */
1650 	ASSERT_EQ(0x1001, msg) {
1651 		kill(tracee, SIGKILL);
1652 	}
1653 	/*
1654 	 * Poke in the message.
1655 	 * Registers are not touched to try to keep this relatively arch
1656 	 * agnostic.
1657 	 */
1658 	ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1659 	EXPECT_EQ(0, ret);
1660 }
1661 
FIXTURE(TRACE_poke)1662 FIXTURE(TRACE_poke) {
1663 	struct sock_fprog prog;
1664 	pid_t tracer;
1665 	long poked;
1666 	struct tracer_args_poke_t tracer_args;
1667 };
1668 
FIXTURE_SETUP(TRACE_poke)1669 FIXTURE_SETUP(TRACE_poke)
1670 {
1671 	struct sock_filter filter[] = {
1672 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1673 			offsetof(struct seccomp_data, nr)),
1674 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1675 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1676 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1677 	};
1678 
1679 	self->poked = 0;
1680 	memset(&self->prog, 0, sizeof(self->prog));
1681 	self->prog.filter = malloc(sizeof(filter));
1682 	ASSERT_NE(NULL, self->prog.filter);
1683 	memcpy(self->prog.filter, filter, sizeof(filter));
1684 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1685 
1686 	/* Set up tracer args. */
1687 	self->tracer_args.poke_addr = (unsigned long)&self->poked;
1688 
1689 	/* Launch tracer. */
1690 	self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1691 					   &self->tracer_args, false);
1692 }
1693 
FIXTURE_TEARDOWN(TRACE_poke)1694 FIXTURE_TEARDOWN(TRACE_poke)
1695 {
1696 	teardown_trace_fixture(_metadata, self->tracer);
1697 	if (self->prog.filter)
1698 		free(self->prog.filter);
1699 }
1700 
TEST_F(TRACE_poke,read_has_side_effects)1701 TEST_F(TRACE_poke, read_has_side_effects)
1702 {
1703 	ssize_t ret;
1704 
1705 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1706 	ASSERT_EQ(0, ret);
1707 
1708 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1709 	ASSERT_EQ(0, ret);
1710 
1711 	EXPECT_EQ(0, self->poked);
1712 	ret = read(-1, NULL, 0);
1713 	EXPECT_EQ(-1, ret);
1714 	EXPECT_EQ(0x1001, self->poked);
1715 }
1716 
TEST_F(TRACE_poke,getpid_runs_normally)1717 TEST_F(TRACE_poke, getpid_runs_normally)
1718 {
1719 	long ret;
1720 
1721 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1722 	ASSERT_EQ(0, ret);
1723 
1724 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1725 	ASSERT_EQ(0, ret);
1726 
1727 	EXPECT_EQ(0, self->poked);
1728 	EXPECT_NE(0, syscall(__NR_getpid));
1729 	EXPECT_EQ(0, self->poked);
1730 }
1731 
1732 #if defined(__x86_64__)
1733 # define ARCH_REGS		struct user_regs_struct
1734 # define SYSCALL_NUM(_regs)	(_regs).orig_rax
1735 # define SYSCALL_RET(_regs)	(_regs).rax
1736 #elif defined(__i386__)
1737 # define ARCH_REGS		struct user_regs_struct
1738 # define SYSCALL_NUM(_regs)	(_regs).orig_eax
1739 # define SYSCALL_RET(_regs)	(_regs).eax
1740 #elif defined(__arm__)
1741 # define ARCH_REGS		struct pt_regs
1742 # define SYSCALL_NUM(_regs)	(_regs).ARM_r7
1743 # ifndef PTRACE_SET_SYSCALL
1744 #  define PTRACE_SET_SYSCALL   23
1745 # endif
1746 # define SYSCALL_NUM_SET(_regs, _nr)	\
1747 		EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr))
1748 # define SYSCALL_RET(_regs)	(_regs).ARM_r0
1749 #elif defined(__aarch64__)
1750 # define ARCH_REGS		struct user_pt_regs
1751 # define SYSCALL_NUM(_regs)	(_regs).regs[8]
1752 # ifndef NT_ARM_SYSTEM_CALL
1753 #  define NT_ARM_SYSTEM_CALL 0x404
1754 # endif
1755 # define SYSCALL_NUM_SET(_regs, _nr)				\
1756 	do {							\
1757 		struct iovec __v;				\
1758 		typeof(_nr) __nr = (_nr);			\
1759 		__v.iov_base = &__nr;				\
1760 		__v.iov_len = sizeof(__nr);			\
1761 		EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee,	\
1762 				    NT_ARM_SYSTEM_CALL, &__v));	\
1763 	} while (0)
1764 # define SYSCALL_RET(_regs)	(_regs).regs[0]
1765 #elif defined(__riscv) && __riscv_xlen == 64
1766 # define ARCH_REGS		struct user_regs_struct
1767 # define SYSCALL_NUM(_regs)	(_regs).a7
1768 # define SYSCALL_RET(_regs)	(_regs).a0
1769 #elif defined(__csky__)
1770 # define ARCH_REGS		struct pt_regs
1771 #  if defined(__CSKYABIV2__)
1772 #   define SYSCALL_NUM(_regs)	(_regs).regs[3]
1773 #  else
1774 #   define SYSCALL_NUM(_regs)	(_regs).regs[9]
1775 #  endif
1776 # define SYSCALL_RET(_regs)	(_regs).a0
1777 #elif defined(__hppa__)
1778 # define ARCH_REGS		struct user_regs_struct
1779 # define SYSCALL_NUM(_regs)	(_regs).gr[20]
1780 # define SYSCALL_RET(_regs)	(_regs).gr[28]
1781 #elif defined(__powerpc__)
1782 # define ARCH_REGS		struct pt_regs
1783 # define SYSCALL_NUM(_regs)	(_regs).gpr[0]
1784 # define SYSCALL_RET(_regs)	(_regs).gpr[3]
1785 # define SYSCALL_RET_SET(_regs, _val)				\
1786 	do {							\
1787 		typeof(_val) _result = (_val);			\
1788 		if ((_regs.trap & 0xfff0) == 0x3000) {		\
1789 			/*					\
1790 			 * scv 0 system call uses -ve result	\
1791 			 * for error, so no need to adjust.	\
1792 			 */					\
1793 			SYSCALL_RET(_regs) = _result;		\
1794 		} else {					\
1795 			/*					\
1796 			 * A syscall error is signaled by the	\
1797 			 * CR0 SO bit and the code is stored as	\
1798 			 * a positive value.			\
1799 			 */					\
1800 			if (_result < 0) {			\
1801 				SYSCALL_RET(_regs) = -_result;	\
1802 				(_regs).ccr |= 0x10000000;	\
1803 			} else {				\
1804 				SYSCALL_RET(_regs) = _result;	\
1805 				(_regs).ccr &= ~0x10000000;	\
1806 			}					\
1807 		}						\
1808 	} while (0)
1809 # define SYSCALL_RET_SET_ON_PTRACE_EXIT
1810 #elif defined(__s390__)
1811 # define ARCH_REGS		s390_regs
1812 # define SYSCALL_NUM(_regs)	(_regs).gprs[2]
1813 # define SYSCALL_RET_SET(_regs, _val)			\
1814 		TH_LOG("Can't modify syscall return on this architecture")
1815 #elif defined(__mips__)
1816 # include <asm/unistd_nr_n32.h>
1817 # include <asm/unistd_nr_n64.h>
1818 # include <asm/unistd_nr_o32.h>
1819 # define ARCH_REGS		struct pt_regs
1820 # define SYSCALL_NUM(_regs)				\
1821 	({						\
1822 		typeof((_regs).regs[2]) _nr;		\
1823 		if ((_regs).regs[2] == __NR_O32_Linux)	\
1824 			_nr = (_regs).regs[4];		\
1825 		else					\
1826 			_nr = (_regs).regs[2];		\
1827 		_nr;					\
1828 	})
1829 # define SYSCALL_NUM_SET(_regs, _nr)			\
1830 	do {						\
1831 		if ((_regs).regs[2] == __NR_O32_Linux)	\
1832 			(_regs).regs[4] = _nr;		\
1833 		else					\
1834 			(_regs).regs[2] = _nr;		\
1835 	} while (0)
1836 # define SYSCALL_RET_SET(_regs, _val)			\
1837 		TH_LOG("Can't modify syscall return on this architecture")
1838 #elif defined(__xtensa__)
1839 # define ARCH_REGS		struct user_pt_regs
1840 # define SYSCALL_NUM(_regs)	(_regs).syscall
1841 /*
1842  * On xtensa syscall return value is in the register
1843  * a2 of the current window which is not fixed.
1844  */
1845 #define SYSCALL_RET(_regs)	(_regs).a[(_regs).windowbase * 4 + 2]
1846 #elif defined(__sh__)
1847 # define ARCH_REGS		struct pt_regs
1848 # define SYSCALL_NUM(_regs)	(_regs).regs[3]
1849 # define SYSCALL_RET(_regs)	(_regs).regs[0]
1850 #else
1851 # error "Do not know how to find your architecture's registers and syscalls"
1852 #endif
1853 
1854 /*
1855  * Most architectures can change the syscall by just updating the
1856  * associated register. This is the default if not defined above.
1857  */
1858 #ifndef SYSCALL_NUM_SET
1859 # define SYSCALL_NUM_SET(_regs, _nr)		\
1860 	do {					\
1861 		SYSCALL_NUM(_regs) = (_nr);	\
1862 	} while (0)
1863 #endif
1864 /*
1865  * Most architectures can change the syscall return value by just
1866  * writing to the SYSCALL_RET register. This is the default if not
1867  * defined above. If an architecture cannot set the return value
1868  * (for example when the syscall and return value register is
1869  * shared), report it with TH_LOG() in an arch-specific definition
1870  * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined.
1871  */
1872 #if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET)
1873 # error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch"
1874 #endif
1875 #ifndef SYSCALL_RET_SET
1876 # define SYSCALL_RET_SET(_regs, _val)		\
1877 	do {					\
1878 		SYSCALL_RET(_regs) = (_val);	\
1879 	} while (0)
1880 #endif
1881 
1882 /* When the syscall return can't be changed, stub out the tests for it. */
1883 #ifndef SYSCALL_RET
1884 # define EXPECT_SYSCALL_RETURN(val, action)	EXPECT_EQ(-1, action)
1885 #else
1886 # define EXPECT_SYSCALL_RETURN(val, action)		\
1887 	do {						\
1888 		errno = 0;				\
1889 		if (val < 0) {				\
1890 			EXPECT_EQ(-1, action);		\
1891 			EXPECT_EQ(-(val), errno);	\
1892 		} else {				\
1893 			EXPECT_EQ(val, action);		\
1894 		}					\
1895 	} while (0)
1896 #endif
1897 
1898 /*
1899  * Some architectures (e.g. powerpc) can only set syscall
1900  * return values on syscall exit during ptrace.
1901  */
1902 const bool ptrace_entry_set_syscall_nr = true;
1903 const bool ptrace_entry_set_syscall_ret =
1904 #ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT
1905 	true;
1906 #else
1907 	false;
1908 #endif
1909 
1910 /*
1911  * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1912  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1913  */
1914 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
1915 # define ARCH_GETREGS(_regs)	ptrace(PTRACE_GETREGS, tracee, 0, &(_regs))
1916 # define ARCH_SETREGS(_regs)	ptrace(PTRACE_SETREGS, tracee, 0, &(_regs))
1917 #else
1918 # define ARCH_GETREGS(_regs)	({					\
1919 		struct iovec __v;					\
1920 		__v.iov_base = &(_regs);				\
1921 		__v.iov_len = sizeof(_regs);				\
1922 		ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v);	\
1923 	})
1924 # define ARCH_SETREGS(_regs)	({					\
1925 		struct iovec __v;					\
1926 		__v.iov_base = &(_regs);				\
1927 		__v.iov_len = sizeof(_regs);				\
1928 		ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v);	\
1929 	})
1930 #endif
1931 
1932 /* Architecture-specific syscall fetching routine. */
get_syscall(struct __test_metadata * _metadata,pid_t tracee)1933 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1934 {
1935 	ARCH_REGS regs;
1936 
1937 	EXPECT_EQ(0, ARCH_GETREGS(regs)) {
1938 		return -1;
1939 	}
1940 
1941 	return SYSCALL_NUM(regs);
1942 }
1943 
1944 /* Architecture-specific syscall changing routine. */
__change_syscall(struct __test_metadata * _metadata,pid_t tracee,long * syscall,long * ret)1945 void __change_syscall(struct __test_metadata *_metadata,
1946 		    pid_t tracee, long *syscall, long *ret)
1947 {
1948 	ARCH_REGS orig, regs;
1949 
1950 	/* Do not get/set registers if we have nothing to do. */
1951 	if (!syscall && !ret)
1952 		return;
1953 
1954 	EXPECT_EQ(0, ARCH_GETREGS(regs)) {
1955 		return;
1956 	}
1957 	orig = regs;
1958 
1959 	if (syscall)
1960 		SYSCALL_NUM_SET(regs, *syscall);
1961 
1962 	if (ret)
1963 		SYSCALL_RET_SET(regs, *ret);
1964 
1965 	/* Flush any register changes made. */
1966 	if (memcmp(&orig, &regs, sizeof(orig)) != 0)
1967 		EXPECT_EQ(0, ARCH_SETREGS(regs));
1968 }
1969 
1970 /* Change only syscall number. */
change_syscall_nr(struct __test_metadata * _metadata,pid_t tracee,long syscall)1971 void change_syscall_nr(struct __test_metadata *_metadata,
1972 		       pid_t tracee, long syscall)
1973 {
1974 	__change_syscall(_metadata, tracee, &syscall, NULL);
1975 }
1976 
1977 /* Change syscall return value (and set syscall number to -1). */
change_syscall_ret(struct __test_metadata * _metadata,pid_t tracee,long ret)1978 void change_syscall_ret(struct __test_metadata *_metadata,
1979 			pid_t tracee, long ret)
1980 {
1981 	long syscall = -1;
1982 
1983 	__change_syscall(_metadata, tracee, &syscall, &ret);
1984 }
1985 
tracer_seccomp(struct __test_metadata * _metadata,pid_t tracee,int status,void * args)1986 void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
1987 		    int status, void *args)
1988 {
1989 	int ret;
1990 	unsigned long msg;
1991 
1992 	EXPECT_EQ(PTRACE_EVENT_MASK(status), PTRACE_EVENT_SECCOMP) {
1993 		TH_LOG("Unexpected ptrace event: %d", PTRACE_EVENT_MASK(status));
1994 		return;
1995 	}
1996 
1997 	/* Make sure we got the right message. */
1998 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1999 	EXPECT_EQ(0, ret);
2000 
2001 	/* Validate and take action on expected syscalls. */
2002 	switch (msg) {
2003 	case 0x1002:
2004 		/* change getpid to getppid. */
2005 		EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
2006 		change_syscall_nr(_metadata, tracee, __NR_getppid);
2007 		break;
2008 	case 0x1003:
2009 		/* skip gettid with valid return code. */
2010 		EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
2011 		change_syscall_ret(_metadata, tracee, 45000);
2012 		break;
2013 	case 0x1004:
2014 		/* skip openat with error. */
2015 		EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
2016 		change_syscall_ret(_metadata, tracee, -ESRCH);
2017 		break;
2018 	case 0x1005:
2019 		/* do nothing (allow getppid) */
2020 		EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
2021 		break;
2022 	default:
2023 		EXPECT_EQ(0, msg) {
2024 			TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
2025 			kill(tracee, SIGKILL);
2026 		}
2027 	}
2028 
2029 }
2030 
FIXTURE(TRACE_syscall)2031 FIXTURE(TRACE_syscall) {
2032 	struct sock_fprog prog;
2033 	pid_t tracer, mytid, mypid, parent;
2034 	long syscall_nr;
2035 };
2036 
tracer_ptrace(struct __test_metadata * _metadata,pid_t tracee,int status,void * args)2037 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
2038 		   int status, void *args)
2039 {
2040 	int ret;
2041 	unsigned long msg;
2042 	static bool entry;
2043 	long syscall_nr_val, syscall_ret_val;
2044 	long *syscall_nr = NULL, *syscall_ret = NULL;
2045 	FIXTURE_DATA(TRACE_syscall) *self = args;
2046 
2047 	EXPECT_EQ(WSTOPSIG(status) & 0x80, 0x80) {
2048 		TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status));
2049 		return;
2050 	}
2051 
2052 	/*
2053 	 * The traditional way to tell PTRACE_SYSCALL entry/exit
2054 	 * is by counting.
2055 	 */
2056 	entry = !entry;
2057 
2058 	/* Make sure we got an appropriate message. */
2059 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
2060 	EXPECT_EQ(0, ret);
2061 
2062 	/*
2063 	 * TODO: b/33027081
2064 	 * PTRACE_EVENTMSG_SYSCALL_ENTRY and PTRACE_EVENTMSG_SYSCALL_EXIT not
2065 	 * compatible < 5.3 (see 201766a)
2066 	 *
2067 	 * EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
2068 	 *		: PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
2069 	 */
2070 
2071 	/*
2072 	 * Some architectures only support setting return values during
2073 	 * syscall exit under ptrace, and on exit the syscall number may
2074 	 * no longer be available. Therefore, save the initial sycall
2075 	 * number here, so it can be examined during both entry and exit
2076 	 * phases.
2077 	 */
2078 	if (entry)
2079 		self->syscall_nr = get_syscall(_metadata, tracee);
2080 
2081 	/*
2082 	 * Depending on the architecture's syscall setting abilities, we
2083 	 * pick which things to set during this phase (entry or exit).
2084 	 */
2085 	if (entry == ptrace_entry_set_syscall_nr)
2086 		syscall_nr = &syscall_nr_val;
2087 	if (entry == ptrace_entry_set_syscall_ret)
2088 		syscall_ret = &syscall_ret_val;
2089 
2090 	/* Now handle the actual rewriting cases. */
2091 	switch (self->syscall_nr) {
2092 	case __NR_getpid:
2093 		syscall_nr_val = __NR_getppid;
2094 		/* Never change syscall return for this case. */
2095 		syscall_ret = NULL;
2096 		break;
2097 	case __NR_gettid:
2098 		syscall_nr_val = -1;
2099 		syscall_ret_val = 45000;
2100 		break;
2101 	case __NR_openat:
2102 		syscall_nr_val = -1;
2103 		syscall_ret_val = -ESRCH;
2104 		break;
2105 	default:
2106 		/* Unhandled, do nothing. */
2107 		return;
2108 	}
2109 
2110 	__change_syscall(_metadata, tracee, syscall_nr, syscall_ret);
2111 }
2112 
FIXTURE_VARIANT(TRACE_syscall)2113 FIXTURE_VARIANT(TRACE_syscall) {
2114 	/*
2115 	 * All of the SECCOMP_RET_TRACE behaviors can be tested with either
2116 	 * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL.
2117 	 * This indicates if we should use SECCOMP_RET_TRACE (false), or
2118 	 * ptrace (true).
2119 	 */
2120 	bool use_ptrace;
2121 };
2122 
FIXTURE_VARIANT_ADD(TRACE_syscall,ptrace)2123 FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) {
2124 	.use_ptrace = true,
2125 };
2126 
FIXTURE_VARIANT_ADD(TRACE_syscall,seccomp)2127 FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) {
2128 	.use_ptrace = false,
2129 };
2130 
FIXTURE_SETUP(TRACE_syscall)2131 FIXTURE_SETUP(TRACE_syscall)
2132 {
2133 	struct sock_filter filter[] = {
2134 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2135 			offsetof(struct seccomp_data, nr)),
2136 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
2137 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
2138 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
2139 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
2140 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1),
2141 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
2142 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2143 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
2144 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2145 	};
2146 	struct sock_fprog prog = {
2147 		.len = (unsigned short)ARRAY_SIZE(filter),
2148 		.filter = filter,
2149 	};
2150 	long ret;
2151 
2152 	/* Prepare some testable syscall results. */
2153 	self->mytid = syscall(__NR_gettid);
2154 	ASSERT_GT(self->mytid, 0);
2155 	ASSERT_NE(self->mytid, 1) {
2156 		TH_LOG("Running this test as init is not supported. :)");
2157 	}
2158 
2159 	self->mypid = getpid();
2160 	ASSERT_GT(self->mypid, 0);
2161 	ASSERT_EQ(self->mytid, self->mypid);
2162 
2163 	self->parent = getppid();
2164 	ASSERT_GT(self->parent, 0);
2165 	ASSERT_NE(self->parent, self->mypid);
2166 
2167 	/* Launch tracer. */
2168 	self->tracer = setup_trace_fixture(_metadata,
2169 					   variant->use_ptrace ? tracer_ptrace
2170 							       : tracer_seccomp,
2171 					   self, variant->use_ptrace);
2172 
2173 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2174 	ASSERT_EQ(0, ret);
2175 
2176 	/* Do not install seccomp rewrite filters, as we'll use ptrace instead. */
2177 	if (variant->use_ptrace)
2178 		return;
2179 
2180 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2181 	ASSERT_EQ(0, ret);
2182 }
2183 
FIXTURE_TEARDOWN(TRACE_syscall)2184 FIXTURE_TEARDOWN(TRACE_syscall)
2185 {
2186 	teardown_trace_fixture(_metadata, self->tracer);
2187 }
2188 
TEST(negative_ENOSYS)2189 TEST(negative_ENOSYS)
2190 {
2191 	/*
2192 	 * There should be no difference between an "internal" skip
2193 	 * and userspace asking for syscall "-1".
2194 	 */
2195 	errno = 0;
2196 	EXPECT_EQ(-1, syscall(-1));
2197 	EXPECT_EQ(errno, ENOSYS);
2198 	/* And no difference for "still not valid but not -1". */
2199 	errno = 0;
2200 	EXPECT_EQ(-1, syscall(-101));
2201 	EXPECT_EQ(errno, ENOSYS);
2202 }
2203 
TEST_F(TRACE_syscall,negative_ENOSYS)2204 TEST_F(TRACE_syscall, negative_ENOSYS)
2205 {
2206 	negative_ENOSYS(_metadata);
2207 }
2208 
TEST_F(TRACE_syscall,syscall_allowed)2209 TEST_F(TRACE_syscall, syscall_allowed)
2210 {
2211 	/* getppid works as expected (no changes). */
2212 	EXPECT_EQ(self->parent, syscall(__NR_getppid));
2213 	EXPECT_NE(self->mypid, syscall(__NR_getppid));
2214 }
2215 
TEST_F(TRACE_syscall,syscall_redirected)2216 TEST_F(TRACE_syscall, syscall_redirected)
2217 {
2218 	/* getpid has been redirected to getppid as expected. */
2219 	EXPECT_EQ(self->parent, syscall(__NR_getpid));
2220 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
2221 }
2222 
TEST_F(TRACE_syscall,syscall_errno)2223 TEST_F(TRACE_syscall, syscall_errno)
2224 {
2225 	/* Tracer should skip the open syscall, resulting in ESRCH. */
2226 	EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
2227 }
2228 
TEST_F(TRACE_syscall,syscall_faked)2229 TEST_F(TRACE_syscall, syscall_faked)
2230 {
2231 	/* Tracer skips the gettid syscall and store altered return value. */
2232 	EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
2233 }
2234 
TEST_F_SIGNAL(TRACE_syscall,kill_immediate,SIGSYS)2235 TEST_F_SIGNAL(TRACE_syscall, kill_immediate, SIGSYS)
2236 {
2237 	struct sock_filter filter[] = {
2238 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2239 			offsetof(struct seccomp_data, nr)),
2240 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_mknodat, 0, 1),
2241 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
2242 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2243 	};
2244 	struct sock_fprog prog = {
2245 		.len = (unsigned short)ARRAY_SIZE(filter),
2246 		.filter = filter,
2247 	};
2248 	long ret;
2249 
2250 	/* Install "kill on mknodat" filter. */
2251 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2252 	ASSERT_EQ(0, ret);
2253 
2254 	/* This should immediately die with SIGSYS, regardless of tracer. */
2255 	EXPECT_EQ(-1, syscall(__NR_mknodat, -1, NULL, 0, 0));
2256 }
2257 
TEST_F(TRACE_syscall,skip_after)2258 TEST_F(TRACE_syscall, skip_after)
2259 {
2260 	struct sock_filter filter[] = {
2261 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2262 			offsetof(struct seccomp_data, nr)),
2263 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2264 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
2265 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2266 	};
2267 	struct sock_fprog prog = {
2268 		.len = (unsigned short)ARRAY_SIZE(filter),
2269 		.filter = filter,
2270 	};
2271 	long ret;
2272 
2273 	/* Install additional "errno on getppid" filter. */
2274 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2275 	ASSERT_EQ(0, ret);
2276 
2277 	/* Tracer will redirect getpid to getppid, and we should see EPERM. */
2278 	errno = 0;
2279 	EXPECT_EQ(-1, syscall(__NR_getpid));
2280 	EXPECT_EQ(EPERM, errno);
2281 }
2282 
TEST_F_SIGNAL(TRACE_syscall,kill_after,SIGSYS)2283 TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS)
2284 {
2285 	struct sock_filter filter[] = {
2286 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2287 			offsetof(struct seccomp_data, nr)),
2288 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2289 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2290 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2291 	};
2292 	struct sock_fprog prog = {
2293 		.len = (unsigned short)ARRAY_SIZE(filter),
2294 		.filter = filter,
2295 	};
2296 	long ret;
2297 
2298 	/* Install additional "death on getppid" filter. */
2299 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2300 	ASSERT_EQ(0, ret);
2301 
2302 	/* Tracer will redirect getpid to getppid, and we should die. */
2303 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
2304 }
2305 
TEST(seccomp_syscall)2306 TEST(seccomp_syscall)
2307 {
2308 	struct sock_filter filter[] = {
2309 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2310 	};
2311 	struct sock_fprog prog = {
2312 		.len = (unsigned short)ARRAY_SIZE(filter),
2313 		.filter = filter,
2314 	};
2315 	long ret;
2316 
2317 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2318 	ASSERT_EQ(0, ret) {
2319 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2320 	}
2321 
2322 	/* Reject insane operation. */
2323 	ret = seccomp(-1, 0, &prog);
2324 	ASSERT_NE(ENOSYS, errno) {
2325 		TH_LOG("Kernel does not support seccomp syscall!");
2326 	}
2327 	EXPECT_EQ(EINVAL, errno) {
2328 		TH_LOG("Did not reject crazy op value!");
2329 	}
2330 
2331 	/* Reject strict with flags or pointer. */
2332 	ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
2333 	EXPECT_EQ(EINVAL, errno) {
2334 		TH_LOG("Did not reject mode strict with flags!");
2335 	}
2336 	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
2337 	EXPECT_EQ(EINVAL, errno) {
2338 		TH_LOG("Did not reject mode strict with uargs!");
2339 	}
2340 
2341 	/* Reject insane args for filter. */
2342 	ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
2343 	EXPECT_EQ(EINVAL, errno) {
2344 		TH_LOG("Did not reject crazy filter flags!");
2345 	}
2346 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
2347 	EXPECT_EQ(EFAULT, errno) {
2348 		TH_LOG("Did not reject NULL filter!");
2349 	}
2350 
2351 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2352 	EXPECT_EQ(0, errno) {
2353 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
2354 			strerror(errno));
2355 	}
2356 }
2357 
TEST(seccomp_syscall_mode_lock)2358 TEST(seccomp_syscall_mode_lock)
2359 {
2360 	struct sock_filter filter[] = {
2361 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2362 	};
2363 	struct sock_fprog prog = {
2364 		.len = (unsigned short)ARRAY_SIZE(filter),
2365 		.filter = filter,
2366 	};
2367 	long ret;
2368 
2369 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2370 	ASSERT_EQ(0, ret) {
2371 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2372 	}
2373 
2374 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2375 	ASSERT_NE(ENOSYS, errno) {
2376 		TH_LOG("Kernel does not support seccomp syscall!");
2377 	}
2378 	EXPECT_EQ(0, ret) {
2379 		TH_LOG("Could not install filter!");
2380 	}
2381 
2382 	/* Make sure neither entry point will switch to strict. */
2383 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
2384 	EXPECT_EQ(EINVAL, errno) {
2385 		TH_LOG("Switched to mode strict!");
2386 	}
2387 
2388 	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
2389 	EXPECT_EQ(EINVAL, errno) {
2390 		TH_LOG("Switched to mode strict!");
2391 	}
2392 }
2393 
2394 /*
2395  * b/147676645
2396  * SECCOMP_FILTER_FLAG_TSYNC_ESRCH not compatible < 5.7
2397  * SECCOMP_FILTER_FLAG_NEW_LISTENER not compatible < 5.0
2398  * SECCOMP_FILTER_FLAG_SPEC_ALLOW not compatible < 4.17
2399  * SECCOMP_FILTER_FLAG_LOG not compatible < 4.14
2400  */
2401 #ifndef __ANDROID__
2402 /*
2403  * Test detection of known and unknown filter flags. Userspace needs to be able
2404  * to check if a filter flag is supported by the current kernel and a good way
2405  * of doing that is by attempting to enter filter mode, with the flag bit in
2406  * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
2407  * that the flag is valid and EINVAL indicates that the flag is invalid.
2408  */
TEST(detect_seccomp_filter_flags)2409 TEST(detect_seccomp_filter_flags)
2410 {
2411 	unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
2412 				 SECCOMP_FILTER_FLAG_LOG,
2413 				 SECCOMP_FILTER_FLAG_SPEC_ALLOW,
2414 				 SECCOMP_FILTER_FLAG_NEW_LISTENER,
2415 				 SECCOMP_FILTER_FLAG_TSYNC_ESRCH };
2416 	unsigned int exclusive[] = {
2417 				SECCOMP_FILTER_FLAG_TSYNC,
2418 				SECCOMP_FILTER_FLAG_NEW_LISTENER };
2419 	unsigned int flag, all_flags, exclusive_mask;
2420 	int i;
2421 	long ret;
2422 
2423 	/* Test detection of individual known-good filter flags */
2424 	for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
2425 		int bits = 0;
2426 
2427 		flag = flags[i];
2428 		/* Make sure the flag is a single bit! */
2429 		while (flag) {
2430 			if (flag & 0x1)
2431 				bits ++;
2432 			flag >>= 1;
2433 		}
2434 		ASSERT_EQ(1, bits);
2435 		flag = flags[i];
2436 
2437 		ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2438 		ASSERT_NE(ENOSYS, errno) {
2439 			TH_LOG("Kernel does not support seccomp syscall!");
2440 		}
2441 		EXPECT_EQ(-1, ret);
2442 		EXPECT_EQ(EFAULT, errno) {
2443 			TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
2444 			       flag);
2445 		}
2446 
2447 		all_flags |= flag;
2448 	}
2449 
2450 	/*
2451 	 * Test detection of all known-good filter flags combined. But
2452 	 * for the exclusive flags we need to mask them out and try them
2453 	 * individually for the "all flags" testing.
2454 	 */
2455 	exclusive_mask = 0;
2456 	for (i = 0; i < ARRAY_SIZE(exclusive); i++)
2457 		exclusive_mask |= exclusive[i];
2458 	for (i = 0; i < ARRAY_SIZE(exclusive); i++) {
2459 		flag = all_flags & ~exclusive_mask;
2460 		flag |= exclusive[i];
2461 
2462 		ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2463 		EXPECT_EQ(-1, ret);
2464 		EXPECT_EQ(EFAULT, errno) {
2465 			TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
2466 			       flag);
2467 		}
2468 	}
2469 
2470 	/* Test detection of an unknown filter flags, without exclusives. */
2471 	flag = -1;
2472 	flag &= ~exclusive_mask;
2473 	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2474 	EXPECT_EQ(-1, ret);
2475 	EXPECT_EQ(EINVAL, errno) {
2476 		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
2477 		       flag);
2478 	}
2479 
2480 	/*
2481 	 * Test detection of an unknown filter flag that may simply need to be
2482 	 * added to this test
2483 	 */
2484 	flag = flags[ARRAY_SIZE(flags) - 1] << 1;
2485 	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2486 	EXPECT_EQ(-1, ret);
2487 	EXPECT_EQ(EINVAL, errno) {
2488 		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
2489 		       flag);
2490 	}
2491 }
2492 #endif
2493 
TEST(TSYNC_first)2494 TEST(TSYNC_first)
2495 {
2496 	struct sock_filter filter[] = {
2497 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2498 	};
2499 	struct sock_fprog prog = {
2500 		.len = (unsigned short)ARRAY_SIZE(filter),
2501 		.filter = filter,
2502 	};
2503 	long ret;
2504 
2505 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2506 	ASSERT_EQ(0, ret) {
2507 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2508 	}
2509 
2510 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2511 		      &prog);
2512 	ASSERT_NE(ENOSYS, errno) {
2513 		TH_LOG("Kernel does not support seccomp syscall!");
2514 	}
2515 	EXPECT_EQ(0, ret) {
2516 		TH_LOG("Could not install initial filter with TSYNC!");
2517 	}
2518 }
2519 
2520 #define TSYNC_SIBLINGS 2
2521 struct tsync_sibling {
2522 	pthread_t tid;
2523 	pid_t system_tid;
2524 	sem_t *started;
2525 	pthread_cond_t *cond;
2526 	pthread_mutex_t *mutex;
2527 	int diverge;
2528 	int num_waits;
2529 	struct sock_fprog *prog;
2530 	struct __test_metadata *metadata;
2531 };
2532 
2533 /*
2534  * To avoid joining joined threads (which is not allowed by Bionic),
2535  * make sure we both successfully join and clear the tid to skip a
2536  * later join attempt during fixture teardown. Any remaining threads
2537  * will be directly killed during teardown.
2538  */
2539 #define PTHREAD_JOIN(tid, status)					\
2540 	do {								\
2541 		int _rc = pthread_join(tid, status);			\
2542 		if (_rc) {						\
2543 			TH_LOG("pthread_join of tid %u failed: %d\n",	\
2544 				(unsigned int)tid, _rc);		\
2545 		} else {						\
2546 			tid = 0;					\
2547 		}							\
2548 	} while (0)
2549 
FIXTURE(TSYNC)2550 FIXTURE(TSYNC) {
2551 	struct sock_fprog root_prog, apply_prog;
2552 	struct tsync_sibling sibling[TSYNC_SIBLINGS];
2553 	sem_t started;
2554 	pthread_cond_t cond;
2555 	pthread_mutex_t mutex;
2556 	int sibling_count;
2557 };
2558 
FIXTURE_SETUP(TSYNC)2559 FIXTURE_SETUP(TSYNC)
2560 {
2561 	struct sock_filter root_filter[] = {
2562 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2563 	};
2564 	struct sock_filter apply_filter[] = {
2565 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2566 			offsetof(struct seccomp_data, nr)),
2567 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
2568 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2569 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2570 	};
2571 
2572 	memset(&self->root_prog, 0, sizeof(self->root_prog));
2573 	memset(&self->apply_prog, 0, sizeof(self->apply_prog));
2574 	memset(&self->sibling, 0, sizeof(self->sibling));
2575 	self->root_prog.filter = malloc(sizeof(root_filter));
2576 	ASSERT_NE(NULL, self->root_prog.filter);
2577 	memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
2578 	self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
2579 
2580 	self->apply_prog.filter = malloc(sizeof(apply_filter));
2581 	ASSERT_NE(NULL, self->apply_prog.filter);
2582 	memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
2583 	self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
2584 
2585 	self->sibling_count = 0;
2586 	pthread_mutex_init(&self->mutex, NULL);
2587 	pthread_cond_init(&self->cond, NULL);
2588 	sem_init(&self->started, 0, 0);
2589 	self->sibling[0].tid = 0;
2590 	self->sibling[0].cond = &self->cond;
2591 	self->sibling[0].started = &self->started;
2592 	self->sibling[0].mutex = &self->mutex;
2593 	self->sibling[0].diverge = 0;
2594 	self->sibling[0].num_waits = 1;
2595 	self->sibling[0].prog = &self->root_prog;
2596 	self->sibling[0].metadata = _metadata;
2597 	self->sibling[1].tid = 0;
2598 	self->sibling[1].cond = &self->cond;
2599 	self->sibling[1].started = &self->started;
2600 	self->sibling[1].mutex = &self->mutex;
2601 	self->sibling[1].diverge = 0;
2602 	self->sibling[1].prog = &self->root_prog;
2603 	self->sibling[1].num_waits = 1;
2604 	self->sibling[1].metadata = _metadata;
2605 }
2606 
FIXTURE_TEARDOWN(TSYNC)2607 FIXTURE_TEARDOWN(TSYNC)
2608 {
2609 	int sib = 0;
2610 
2611 	if (self->root_prog.filter)
2612 		free(self->root_prog.filter);
2613 	if (self->apply_prog.filter)
2614 		free(self->apply_prog.filter);
2615 
2616 	for ( ; sib < self->sibling_count; ++sib) {
2617 		struct tsync_sibling *s = &self->sibling[sib];
2618 
2619 		if (!s->tid)
2620 			continue;
2621 		/*
2622 		 * If a thread is still running, it may be stuck, so hit
2623 		 * it over the head really hard.
2624 		 */
2625 		pthread_kill(s->tid, 9);
2626 	}
2627 	pthread_mutex_destroy(&self->mutex);
2628 	pthread_cond_destroy(&self->cond);
2629 	sem_destroy(&self->started);
2630 }
2631 
tsync_sibling(void * data)2632 void *tsync_sibling(void *data)
2633 {
2634 	long ret = 0;
2635 	struct tsync_sibling *me = data;
2636 
2637 	me->system_tid = syscall(__NR_gettid);
2638 
2639 	pthread_mutex_lock(me->mutex);
2640 	if (me->diverge) {
2641 		/* Just re-apply the root prog to fork the tree */
2642 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
2643 				me->prog, 0, 0);
2644 	}
2645 	sem_post(me->started);
2646 	/* Return outside of started so parent notices failures. */
2647 	if (ret) {
2648 		pthread_mutex_unlock(me->mutex);
2649 		return (void *)SIBLING_EXIT_FAILURE;
2650 	}
2651 	do {
2652 		pthread_cond_wait(me->cond, me->mutex);
2653 		me->num_waits = me->num_waits - 1;
2654 	} while (me->num_waits);
2655 	pthread_mutex_unlock(me->mutex);
2656 
2657 	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
2658 	if (!ret)
2659 		return (void *)SIBLING_EXIT_NEWPRIVS;
2660 	read(-1, NULL, 0);
2661 	return (void *)SIBLING_EXIT_UNKILLED;
2662 }
2663 
tsync_start_sibling(struct tsync_sibling * sibling)2664 void tsync_start_sibling(struct tsync_sibling *sibling)
2665 {
2666 	pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
2667 }
2668 
TEST_F(TSYNC,siblings_fail_prctl)2669 TEST_F(TSYNC, siblings_fail_prctl)
2670 {
2671 	long ret;
2672 	void *status;
2673 	struct sock_filter filter[] = {
2674 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2675 			offsetof(struct seccomp_data, nr)),
2676 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
2677 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
2678 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2679 	};
2680 	struct sock_fprog prog = {
2681 		.len = (unsigned short)ARRAY_SIZE(filter),
2682 		.filter = filter,
2683 	};
2684 
2685 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2686 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2687 	}
2688 
2689 	/* Check prctl failure detection by requesting sib 0 diverge. */
2690 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2691 	ASSERT_NE(ENOSYS, errno) {
2692 		TH_LOG("Kernel does not support seccomp syscall!");
2693 	}
2694 	ASSERT_EQ(0, ret) {
2695 		TH_LOG("setting filter failed");
2696 	}
2697 
2698 	self->sibling[0].diverge = 1;
2699 	tsync_start_sibling(&self->sibling[0]);
2700 	tsync_start_sibling(&self->sibling[1]);
2701 
2702 	while (self->sibling_count < TSYNC_SIBLINGS) {
2703 		sem_wait(&self->started);
2704 		self->sibling_count++;
2705 	}
2706 
2707 	/* Signal the threads to clean up*/
2708 	pthread_mutex_lock(&self->mutex);
2709 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2710 		TH_LOG("cond broadcast non-zero");
2711 	}
2712 	pthread_mutex_unlock(&self->mutex);
2713 
2714 	/* Ensure diverging sibling failed to call prctl. */
2715 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2716 	EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
2717 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2718 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2719 }
2720 
TEST_F(TSYNC,two_siblings_with_ancestor)2721 TEST_F(TSYNC, two_siblings_with_ancestor)
2722 {
2723 	long ret;
2724 	void *status;
2725 
2726 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2727 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2728 	}
2729 
2730 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2731 	ASSERT_NE(ENOSYS, errno) {
2732 		TH_LOG("Kernel does not support seccomp syscall!");
2733 	}
2734 	ASSERT_EQ(0, ret) {
2735 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2736 	}
2737 	tsync_start_sibling(&self->sibling[0]);
2738 	tsync_start_sibling(&self->sibling[1]);
2739 
2740 	while (self->sibling_count < TSYNC_SIBLINGS) {
2741 		sem_wait(&self->started);
2742 		self->sibling_count++;
2743 	}
2744 
2745 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2746 		      &self->apply_prog);
2747 	ASSERT_EQ(0, ret) {
2748 		TH_LOG("Could install filter on all threads!");
2749 	}
2750 	/* Tell the siblings to test the policy */
2751 	pthread_mutex_lock(&self->mutex);
2752 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2753 		TH_LOG("cond broadcast non-zero");
2754 	}
2755 	pthread_mutex_unlock(&self->mutex);
2756 	/* Ensure they are both killed and don't exit cleanly. */
2757 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2758 	EXPECT_EQ(0x0, (long)status);
2759 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2760 	EXPECT_EQ(0x0, (long)status);
2761 }
2762 
TEST_F(TSYNC,two_sibling_want_nnp)2763 TEST_F(TSYNC, two_sibling_want_nnp)
2764 {
2765 	void *status;
2766 
2767 	/* start siblings before any prctl() operations */
2768 	tsync_start_sibling(&self->sibling[0]);
2769 	tsync_start_sibling(&self->sibling[1]);
2770 	while (self->sibling_count < TSYNC_SIBLINGS) {
2771 		sem_wait(&self->started);
2772 		self->sibling_count++;
2773 	}
2774 
2775 	/* Tell the siblings to test no policy */
2776 	pthread_mutex_lock(&self->mutex);
2777 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2778 		TH_LOG("cond broadcast non-zero");
2779 	}
2780 	pthread_mutex_unlock(&self->mutex);
2781 
2782 	/* Ensure they are both upset about lacking nnp. */
2783 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2784 	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2785 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2786 	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2787 }
2788 
TEST_F(TSYNC,two_siblings_with_no_filter)2789 TEST_F(TSYNC, two_siblings_with_no_filter)
2790 {
2791 	long ret;
2792 	void *status;
2793 
2794 	/* start siblings before any prctl() operations */
2795 	tsync_start_sibling(&self->sibling[0]);
2796 	tsync_start_sibling(&self->sibling[1]);
2797 	while (self->sibling_count < TSYNC_SIBLINGS) {
2798 		sem_wait(&self->started);
2799 		self->sibling_count++;
2800 	}
2801 
2802 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2803 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2804 	}
2805 
2806 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2807 		      &self->apply_prog);
2808 	ASSERT_NE(ENOSYS, errno) {
2809 		TH_LOG("Kernel does not support seccomp syscall!");
2810 	}
2811 	ASSERT_EQ(0, ret) {
2812 		TH_LOG("Could install filter on all threads!");
2813 	}
2814 
2815 	/* Tell the siblings to test the policy */
2816 	pthread_mutex_lock(&self->mutex);
2817 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2818 		TH_LOG("cond broadcast non-zero");
2819 	}
2820 	pthread_mutex_unlock(&self->mutex);
2821 
2822 	/* Ensure they are both killed and don't exit cleanly. */
2823 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2824 	EXPECT_EQ(0x0, (long)status);
2825 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2826 	EXPECT_EQ(0x0, (long)status);
2827 }
2828 
TEST_F(TSYNC,two_siblings_with_one_divergence)2829 TEST_F(TSYNC, two_siblings_with_one_divergence)
2830 {
2831 	long ret;
2832 	void *status;
2833 
2834 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2835 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2836 	}
2837 
2838 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2839 	ASSERT_NE(ENOSYS, errno) {
2840 		TH_LOG("Kernel does not support seccomp syscall!");
2841 	}
2842 	ASSERT_EQ(0, ret) {
2843 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2844 	}
2845 	self->sibling[0].diverge = 1;
2846 	tsync_start_sibling(&self->sibling[0]);
2847 	tsync_start_sibling(&self->sibling[1]);
2848 
2849 	while (self->sibling_count < TSYNC_SIBLINGS) {
2850 		sem_wait(&self->started);
2851 		self->sibling_count++;
2852 	}
2853 
2854 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2855 		      &self->apply_prog);
2856 	ASSERT_EQ(self->sibling[0].system_tid, ret) {
2857 		TH_LOG("Did not fail on diverged sibling.");
2858 	}
2859 
2860 	/* Wake the threads */
2861 	pthread_mutex_lock(&self->mutex);
2862 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2863 		TH_LOG("cond broadcast non-zero");
2864 	}
2865 	pthread_mutex_unlock(&self->mutex);
2866 
2867 	/* Ensure they are both unkilled. */
2868 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2869 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2870 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2871 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2872 }
2873 
2874 /*
2875  * b/147676645
2876  * SECCOMP_FILTER_FLAG_TSYNC_ESRCH not compatible < 5.7
2877  */
2878 #ifndef __ANDROID__
TEST_F(TSYNC,two_siblings_with_one_divergence_no_tid_in_err)2879 TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err)
2880 {
2881 	long ret, flags;
2882 	void *status;
2883 
2884 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2885 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2886 	}
2887 
2888 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2889 	ASSERT_NE(ENOSYS, errno) {
2890 		TH_LOG("Kernel does not support seccomp syscall!");
2891 	}
2892 	ASSERT_EQ(0, ret) {
2893 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2894 	}
2895 	self->sibling[0].diverge = 1;
2896 	tsync_start_sibling(&self->sibling[0]);
2897 	tsync_start_sibling(&self->sibling[1]);
2898 
2899 	while (self->sibling_count < TSYNC_SIBLINGS) {
2900 		sem_wait(&self->started);
2901 		self->sibling_count++;
2902 	}
2903 
2904 	flags = SECCOMP_FILTER_FLAG_TSYNC | \
2905 		SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
2906 	ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog);
2907 	ASSERT_EQ(ESRCH, errno) {
2908 		TH_LOG("Did not return ESRCH for diverged sibling.");
2909 	}
2910 	ASSERT_EQ(-1, ret) {
2911 		TH_LOG("Did not fail on diverged sibling.");
2912 	}
2913 
2914 	/* Wake the threads */
2915 	pthread_mutex_lock(&self->mutex);
2916 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2917 		TH_LOG("cond broadcast non-zero");
2918 	}
2919 	pthread_mutex_unlock(&self->mutex);
2920 
2921 	/* Ensure they are both unkilled. */
2922 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2923 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2924 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2925 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2926 }
2927 #endif
2928 
TEST_F(TSYNC,two_siblings_not_under_filter)2929 TEST_F(TSYNC, two_siblings_not_under_filter)
2930 {
2931 	long ret, sib;
2932 	void *status;
2933 	struct timespec delay = { .tv_nsec = 100000000 };
2934 
2935 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2936 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2937 	}
2938 
2939 	/*
2940 	 * Sibling 0 will have its own seccomp policy
2941 	 * and Sibling 1 will not be under seccomp at
2942 	 * all. Sibling 1 will enter seccomp and 0
2943 	 * will cause failure.
2944 	 */
2945 	self->sibling[0].diverge = 1;
2946 	tsync_start_sibling(&self->sibling[0]);
2947 	tsync_start_sibling(&self->sibling[1]);
2948 
2949 	while (self->sibling_count < TSYNC_SIBLINGS) {
2950 		sem_wait(&self->started);
2951 		self->sibling_count++;
2952 	}
2953 
2954 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2955 	ASSERT_NE(ENOSYS, errno) {
2956 		TH_LOG("Kernel does not support seccomp syscall!");
2957 	}
2958 	ASSERT_EQ(0, ret) {
2959 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2960 	}
2961 
2962 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2963 		      &self->apply_prog);
2964 	ASSERT_EQ(ret, self->sibling[0].system_tid) {
2965 		TH_LOG("Did not fail on diverged sibling.");
2966 	}
2967 	sib = 1;
2968 	if (ret == self->sibling[0].system_tid)
2969 		sib = 0;
2970 
2971 	pthread_mutex_lock(&self->mutex);
2972 
2973 	/* Increment the other siblings num_waits so we can clean up
2974 	 * the one we just saw.
2975 	 */
2976 	self->sibling[!sib].num_waits += 1;
2977 
2978 	/* Signal the thread to clean up*/
2979 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2980 		TH_LOG("cond broadcast non-zero");
2981 	}
2982 	pthread_mutex_unlock(&self->mutex);
2983 	PTHREAD_JOIN(self->sibling[sib].tid, &status);
2984 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2985 	/* Poll for actual task death. pthread_join doesn't guarantee it. */
2986 	while (!kill(self->sibling[sib].system_tid, 0))
2987 		nanosleep(&delay, NULL);
2988 	/* Switch to the remaining sibling */
2989 	sib = !sib;
2990 
2991 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2992 		      &self->apply_prog);
2993 	ASSERT_EQ(0, ret) {
2994 		TH_LOG("Expected the remaining sibling to sync");
2995 	};
2996 
2997 	pthread_mutex_lock(&self->mutex);
2998 
2999 	/* If remaining sibling didn't have a chance to wake up during
3000 	 * the first broadcast, manually reduce the num_waits now.
3001 	 */
3002 	if (self->sibling[sib].num_waits > 1)
3003 		self->sibling[sib].num_waits = 1;
3004 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
3005 		TH_LOG("cond broadcast non-zero");
3006 	}
3007 	pthread_mutex_unlock(&self->mutex);
3008 	PTHREAD_JOIN(self->sibling[sib].tid, &status);
3009 	EXPECT_EQ(0, (long)status);
3010 	/* Poll for actual task death. pthread_join doesn't guarantee it. */
3011 	while (!kill(self->sibling[sib].system_tid, 0))
3012 		nanosleep(&delay, NULL);
3013 
3014 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
3015 		      &self->apply_prog);
3016 	ASSERT_EQ(0, ret);  /* just us chickens */
3017 }
3018 
3019 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
TEST(syscall_restart)3020 TEST(syscall_restart)
3021 {
3022 	long ret;
3023 	unsigned long msg;
3024 	pid_t child_pid;
3025 	int pipefd[2];
3026 	int status;
3027 	siginfo_t info = { };
3028 	struct sock_filter filter[] = {
3029 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
3030 			 offsetof(struct seccomp_data, nr)),
3031 
3032 #ifdef __NR_sigreturn
3033 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 7, 0),
3034 #endif
3035 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 6, 0),
3036 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 5, 0),
3037 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 4, 0),
3038 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 5, 0),
3039 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_clock_nanosleep, 4, 0),
3040 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
3041 
3042 		/* Allow __NR_write for easy logging. */
3043 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
3044 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3045 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
3046 		/* The nanosleep jump target. */
3047 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
3048 		/* The restart_syscall jump target. */
3049 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
3050 	};
3051 	struct sock_fprog prog = {
3052 		.len = (unsigned short)ARRAY_SIZE(filter),
3053 		.filter = filter,
3054 	};
3055 #if defined(__arm__)
3056 	struct utsname utsbuf;
3057 	int arm_version;
3058 #endif
3059 
3060 	ASSERT_EQ(0, pipe(pipefd));
3061 
3062 	child_pid = fork();
3063 	ASSERT_LE(0, child_pid);
3064 	if (child_pid == 0) {
3065 		/* Child uses EXPECT not ASSERT to deliver status correctly. */
3066 		char buf = ' ';
3067 		struct timespec timeout = { };
3068 
3069 		/* Attach parent as tracer and stop. */
3070 		EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
3071 		EXPECT_EQ(0, raise(SIGSTOP));
3072 
3073 		EXPECT_EQ(0, close(pipefd[1]));
3074 
3075 		EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
3076 			TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3077 		}
3078 
3079 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
3080 		EXPECT_EQ(0, ret) {
3081 			TH_LOG("Failed to install filter!");
3082 		}
3083 
3084 		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
3085 			TH_LOG("Failed to read() sync from parent");
3086 		}
3087 		EXPECT_EQ('.', buf) {
3088 			TH_LOG("Failed to get sync data from read()");
3089 		}
3090 
3091 		/* Start nanosleep to be interrupted. */
3092 		timeout.tv_sec = 1;
3093 		errno = 0;
3094 		EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
3095 			TH_LOG("Call to nanosleep() failed (errno %d)", errno);
3096 		}
3097 
3098 		/* Read final sync from parent. */
3099 		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
3100 			TH_LOG("Failed final read() from parent");
3101 		}
3102 		EXPECT_EQ('!', buf) {
3103 			TH_LOG("Failed to get final data from read()");
3104 		}
3105 
3106 		/* Directly report the status of our test harness results. */
3107 		syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
3108 						     : EXIT_FAILURE);
3109 	}
3110 	EXPECT_EQ(0, close(pipefd[0]));
3111 
3112 	/* Attach to child, setup options, and release. */
3113 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
3114 	ASSERT_EQ(true, WIFSTOPPED(status));
3115 	ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
3116 			    PTRACE_O_TRACESECCOMP));
3117 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
3118 	ASSERT_EQ(1, write(pipefd[1], ".", 1));
3119 
3120 	/* Wait for nanosleep() to start. */
3121 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
3122 	ASSERT_EQ(true, WIFSTOPPED(status));
3123 	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
3124 	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
3125 	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
3126 	ASSERT_EQ(0x100, msg);
3127 	ret = get_syscall(_metadata, child_pid);
3128 	EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep);
3129 
3130 	/* Might as well check siginfo for sanity while we're here. */
3131 	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
3132 	ASSERT_EQ(SIGTRAP, info.si_signo);
3133 	ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
3134 	EXPECT_EQ(0, info.si_errno);
3135 	EXPECT_EQ(getuid(), info.si_uid);
3136 	/* Verify signal delivery came from child (seccomp-triggered). */
3137 	EXPECT_EQ(child_pid, info.si_pid);
3138 
3139 	/* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
3140 	ASSERT_EQ(0, kill(child_pid, SIGSTOP));
3141 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
3142 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
3143 	ASSERT_EQ(true, WIFSTOPPED(status));
3144 	ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
3145 	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
3146 	/*
3147 	 * There is no siginfo on SIGSTOP any more, so we can't verify
3148 	 * signal delivery came from parent now (getpid() == info.si_pid).
3149 	 * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com
3150 	 * At least verify the SIGSTOP via PTRACE_GETSIGINFO.
3151 	 */
3152 	EXPECT_EQ(SIGSTOP, info.si_signo);
3153 
3154 	/* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
3155 	ASSERT_EQ(0, kill(child_pid, SIGCONT));
3156 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
3157 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
3158 	ASSERT_EQ(true, WIFSTOPPED(status));
3159 	ASSERT_EQ(SIGCONT, WSTOPSIG(status));
3160 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
3161 
3162 	/* Wait for restart_syscall() to start. */
3163 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
3164 	ASSERT_EQ(true, WIFSTOPPED(status));
3165 	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
3166 	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
3167 	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
3168 
3169 	ASSERT_EQ(0x200, msg);
3170 	ret = get_syscall(_metadata, child_pid);
3171 #if defined(__arm__)
3172 	/*
3173 	 * - native ARM registers do NOT expose true syscall.
3174 	 * - compat ARM registers on ARM64 DO expose true syscall.
3175 	 */
3176 	ASSERT_EQ(0, uname(&utsbuf));
3177 	if (sscanf(utsbuf.machine, "armv%d", &arm_version) == 1 &&
3178 	    arm_version < 8) {
3179 		EXPECT_EQ(__NR_nanosleep, ret);
3180 	} else
3181 #endif
3182 	{
3183 		EXPECT_EQ(__NR_restart_syscall, ret);
3184 	}
3185 
3186 	/* Write again to end test. */
3187 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
3188 	ASSERT_EQ(1, write(pipefd[1], "!", 1));
3189 	EXPECT_EQ(0, close(pipefd[1]));
3190 
3191 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
3192 	if (WIFSIGNALED(status) || WEXITSTATUS(status))
3193 		_metadata->passed = 0;
3194 }
3195 
TEST_SIGNAL(filter_flag_log,SIGSYS)3196 TEST_SIGNAL(filter_flag_log, SIGSYS)
3197 {
3198 	struct sock_filter allow_filter[] = {
3199 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3200 	};
3201 	struct sock_filter kill_filter[] = {
3202 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
3203 			offsetof(struct seccomp_data, nr)),
3204 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
3205 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
3206 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3207 	};
3208 	struct sock_fprog allow_prog = {
3209 		.len = (unsigned short)ARRAY_SIZE(allow_filter),
3210 		.filter = allow_filter,
3211 	};
3212 	struct sock_fprog kill_prog = {
3213 		.len = (unsigned short)ARRAY_SIZE(kill_filter),
3214 		.filter = kill_filter,
3215 	};
3216 	long ret;
3217 	pid_t parent = getppid();
3218 
3219 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3220 	ASSERT_EQ(0, ret);
3221 
3222 	/* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
3223 	ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
3224 		      &allow_prog);
3225 	ASSERT_NE(ENOSYS, errno) {
3226 		TH_LOG("Kernel does not support seccomp syscall!");
3227 	}
3228 	EXPECT_NE(0, ret) {
3229 		TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
3230 	}
3231 	EXPECT_EQ(EINVAL, errno) {
3232 		TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
3233 	}
3234 
3235 	/* Verify that a simple, permissive filter can be added with no flags */
3236 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
3237 	EXPECT_EQ(0, ret);
3238 
3239 	/* See if the same filter can be added with the FILTER_FLAG_LOG flag */
3240 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
3241 		      &allow_prog);
3242 	ASSERT_NE(EINVAL, errno) {
3243 		TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
3244 	}
3245 	EXPECT_EQ(0, ret);
3246 
3247 	/* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
3248 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
3249 		      &kill_prog);
3250 	EXPECT_EQ(0, ret);
3251 
3252 	EXPECT_EQ(parent, syscall(__NR_getppid));
3253 	/* getpid() should never return. */
3254 	EXPECT_EQ(0, syscall(__NR_getpid));
3255 }
3256 
TEST(get_action_avail)3257 TEST(get_action_avail)
3258 {
3259 	__u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
3260 			    SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
3261 			    SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
3262 	__u32 unknown_action = 0x10000000U;
3263 	int i;
3264 	long ret;
3265 
3266 	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
3267 	ASSERT_NE(ENOSYS, errno) {
3268 		TH_LOG("Kernel does not support seccomp syscall!");
3269 	}
3270 	ASSERT_NE(EINVAL, errno) {
3271 		TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
3272 	}
3273 	EXPECT_EQ(ret, 0);
3274 
3275 	for (i = 0; i < ARRAY_SIZE(actions); i++) {
3276 		ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
3277 		EXPECT_EQ(ret, 0) {
3278 			TH_LOG("Expected action (0x%X) not available!",
3279 			       actions[i]);
3280 		}
3281 	}
3282 
3283 	/* Check that an unknown action is handled properly (EOPNOTSUPP) */
3284 	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
3285 	EXPECT_EQ(ret, -1);
3286 	EXPECT_EQ(errno, EOPNOTSUPP);
3287 }
3288 
3289 /*
3290  * b/147676645
3291  * PTRACE_SECCOMP_GET_METADATA not compatible < 4.16
3292  */
3293 #ifndef __ANDROID__
TEST(get_metadata)3294 TEST(get_metadata)
3295 {
3296 	pid_t pid;
3297 	int pipefd[2];
3298 	char buf;
3299 	struct seccomp_metadata md;
3300 	long ret;
3301 
3302 	/* Only real root can get metadata. */
3303 	if (geteuid()) {
3304 		SKIP(return, "get_metadata requires real root");
3305 		return;
3306 	}
3307 
3308 	ASSERT_EQ(0, pipe(pipefd));
3309 
3310 	pid = fork();
3311 	ASSERT_GE(pid, 0);
3312 	if (pid == 0) {
3313 		struct sock_filter filter[] = {
3314 			BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3315 		};
3316 		struct sock_fprog prog = {
3317 			.len = (unsigned short)ARRAY_SIZE(filter),
3318 			.filter = filter,
3319 		};
3320 
3321 		/* one with log, one without */
3322 		EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
3323 				     SECCOMP_FILTER_FLAG_LOG, &prog));
3324 		EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
3325 
3326 		EXPECT_EQ(0, close(pipefd[0]));
3327 		ASSERT_EQ(1, write(pipefd[1], "1", 1));
3328 		ASSERT_EQ(0, close(pipefd[1]));
3329 
3330 		while (1)
3331 			sleep(100);
3332 	}
3333 
3334 	ASSERT_EQ(0, close(pipefd[1]));
3335 	ASSERT_EQ(1, read(pipefd[0], &buf, 1));
3336 
3337 	ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
3338 	ASSERT_EQ(pid, waitpid(pid, NULL, 0));
3339 
3340 	/* Past here must not use ASSERT or child process is never killed. */
3341 
3342 	md.filter_off = 0;
3343 	errno = 0;
3344 	ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3345 	EXPECT_EQ(sizeof(md), ret) {
3346 		if (errno == EINVAL)
3347 			SKIP(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
3348 	}
3349 
3350 	EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
3351 	EXPECT_EQ(md.filter_off, 0);
3352 
3353 	md.filter_off = 1;
3354 	ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3355 	EXPECT_EQ(sizeof(md), ret);
3356 	EXPECT_EQ(md.flags, 0);
3357 	EXPECT_EQ(md.filter_off, 1);
3358 
3359 skip:
3360 	ASSERT_EQ(0, kill(pid, SIGKILL));
3361 }
3362 #endif
3363 
user_notif_syscall(int nr,unsigned int flags)3364 static int user_notif_syscall(int nr, unsigned int flags)
3365 {
3366 	struct sock_filter filter[] = {
3367 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
3368 			offsetof(struct seccomp_data, nr)),
3369 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1),
3370 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF),
3371 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3372 	};
3373 
3374 	struct sock_fprog prog = {
3375 		.len = (unsigned short)ARRAY_SIZE(filter),
3376 		.filter = filter,
3377 	};
3378 
3379 	return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
3380 }
3381 
3382 #define USER_NOTIF_MAGIC INT_MAX
3383 
3384 /*
3385  * b/147676645
3386  * SECCOMP_FILTER_FLAG_NEW_LISTENER not compatible < 5.0
3387  */
3388 #ifndef __ANDROID__
TEST(user_notification_basic)3389 TEST(user_notification_basic)
3390 {
3391 	pid_t pid;
3392 	long ret;
3393 	int status, listener;
3394 	struct seccomp_notif req = {};
3395 	struct seccomp_notif_resp resp = {};
3396 	struct pollfd pollfd;
3397 
3398 	struct sock_filter filter[] = {
3399 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3400 	};
3401 	struct sock_fprog prog = {
3402 		.len = (unsigned short)ARRAY_SIZE(filter),
3403 		.filter = filter,
3404 	};
3405 
3406 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3407 	ASSERT_EQ(0, ret) {
3408 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3409 	}
3410 
3411 	pid = fork();
3412 	ASSERT_GE(pid, 0);
3413 
3414 	/* Check that we get -ENOSYS with no listener attached */
3415 	if (pid == 0) {
3416 		if (user_notif_syscall(__NR_getppid, 0) < 0)
3417 			exit(1);
3418 		ret = syscall(__NR_getppid);
3419 		exit(ret >= 0 || errno != ENOSYS);
3420 	}
3421 
3422 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3423 	EXPECT_EQ(true, WIFEXITED(status));
3424 	EXPECT_EQ(0, WEXITSTATUS(status));
3425 
3426 	/* Add some no-op filters for grins. */
3427 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3428 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3429 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3430 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3431 
3432 	/* Check that the basic notification machinery works */
3433 	listener = user_notif_syscall(__NR_getppid,
3434 				      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3435 	ASSERT_GE(listener, 0);
3436 
3437 	/* Installing a second listener in the chain should EBUSY */
3438 	EXPECT_EQ(user_notif_syscall(__NR_getppid,
3439 				     SECCOMP_FILTER_FLAG_NEW_LISTENER),
3440 		  -1);
3441 	EXPECT_EQ(errno, EBUSY);
3442 
3443 	pid = fork();
3444 	ASSERT_GE(pid, 0);
3445 
3446 	if (pid == 0) {
3447 		ret = syscall(__NR_getppid);
3448 		exit(ret != USER_NOTIF_MAGIC);
3449 	}
3450 
3451 	pollfd.fd = listener;
3452 	pollfd.events = POLLIN | POLLOUT;
3453 
3454 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3455 	EXPECT_EQ(pollfd.revents, POLLIN);
3456 
3457 	/* Test that we can't pass garbage to the kernel. */
3458 	memset(&req, 0, sizeof(req));
3459 	req.pid = -1;
3460 	errno = 0;
3461 	ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
3462 	EXPECT_EQ(-1, ret);
3463 	EXPECT_EQ(EINVAL, errno);
3464 
3465 	if (ret) {
3466 		req.pid = 0;
3467 		EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3468 	}
3469 
3470 	pollfd.fd = listener;
3471 	pollfd.events = POLLIN | POLLOUT;
3472 
3473 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3474 	EXPECT_EQ(pollfd.revents, POLLOUT);
3475 
3476 	EXPECT_EQ(req.data.nr,  __NR_getppid);
3477 
3478 	resp.id = req.id;
3479 	resp.error = 0;
3480 	resp.val = USER_NOTIF_MAGIC;
3481 
3482 	/* check that we make sure flags == 0 */
3483 	resp.flags = 1;
3484 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3485 	EXPECT_EQ(errno, EINVAL);
3486 
3487 	resp.flags = 0;
3488 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3489 
3490 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3491 	EXPECT_EQ(true, WIFEXITED(status));
3492 	EXPECT_EQ(0, WEXITSTATUS(status));
3493 }
3494 #endif
3495 
3496 /*
3497  * b/147676645
3498  * SECCOMP_FILTER_FLAG_NEW_LISTENER not compatible < 5.0
3499  */
3500 #ifndef __ANDROID__
TEST(user_notification_with_tsync)3501 TEST(user_notification_with_tsync)
3502 {
3503 	int ret;
3504 	unsigned int flags;
3505 
3506 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3507 	ASSERT_EQ(0, ret) {
3508 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3509 	}
3510 
3511 	/* these were exclusive */
3512 	flags = SECCOMP_FILTER_FLAG_NEW_LISTENER |
3513 		SECCOMP_FILTER_FLAG_TSYNC;
3514 	ASSERT_EQ(-1, user_notif_syscall(__NR_getppid, flags));
3515 	ASSERT_EQ(EINVAL, errno);
3516 
3517 	/* but now they're not */
3518 	flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
3519 	ret = user_notif_syscall(__NR_getppid, flags);
3520 	close(ret);
3521 	ASSERT_LE(0, ret);
3522 }
3523 #endif
3524 
3525 /*
3526  * b/147676645
3527  * SECCOMP_FILTER_FLAG_NEW_LISTENER not compatible < 5.0
3528  */
3529 #ifndef __ANDROID__
TEST(user_notification_kill_in_middle)3530 TEST(user_notification_kill_in_middle)
3531 {
3532 	pid_t pid;
3533 	long ret;
3534 	int listener;
3535 	struct seccomp_notif req = {};
3536 	struct seccomp_notif_resp resp = {};
3537 
3538 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3539 	ASSERT_EQ(0, ret) {
3540 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3541 	}
3542 
3543 	listener = user_notif_syscall(__NR_getppid,
3544 				      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3545 	ASSERT_GE(listener, 0);
3546 
3547 	/*
3548 	 * Check that nothing bad happens when we kill the task in the middle
3549 	 * of a syscall.
3550 	 */
3551 	pid = fork();
3552 	ASSERT_GE(pid, 0);
3553 
3554 	if (pid == 0) {
3555 		ret = syscall(__NR_getppid);
3556 		exit(ret != USER_NOTIF_MAGIC);
3557 	}
3558 
3559 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3560 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0);
3561 
3562 	EXPECT_EQ(kill(pid, SIGKILL), 0);
3563 	EXPECT_EQ(waitpid(pid, NULL, 0), pid);
3564 
3565 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1);
3566 
3567 	resp.id = req.id;
3568 	ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp);
3569 	EXPECT_EQ(ret, -1);
3570 	EXPECT_EQ(errno, ENOENT);
3571 }
3572 #endif
3573 
3574 static int handled = -1;
3575 
signal_handler(int signal)3576 static void signal_handler(int signal)
3577 {
3578 	if (write(handled, "c", 1) != 1)
3579 		perror("write from signal");
3580 }
3581 
3582 /*
3583  * b/147676645
3584  * SECCOMP_FILTER_FLAG_NEW_LISTENER not compatible < 5.0
3585  */
3586 #ifndef __ANDROID__
TEST(user_notification_signal)3587 TEST(user_notification_signal)
3588 {
3589 	pid_t pid;
3590 	long ret;
3591 	int status, listener, sk_pair[2];
3592 	struct seccomp_notif req = {};
3593 	struct seccomp_notif_resp resp = {};
3594 	char c;
3595 
3596 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3597 	ASSERT_EQ(0, ret) {
3598 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3599 	}
3600 
3601 	ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
3602 
3603 	listener = user_notif_syscall(__NR_gettid,
3604 				      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3605 	ASSERT_GE(listener, 0);
3606 
3607 	pid = fork();
3608 	ASSERT_GE(pid, 0);
3609 
3610 	if (pid == 0) {
3611 		close(sk_pair[0]);
3612 		handled = sk_pair[1];
3613 		if (signal(SIGUSR1, signal_handler) == SIG_ERR) {
3614 			perror("signal");
3615 			exit(1);
3616 		}
3617 		/*
3618 		 * ERESTARTSYS behavior is a bit hard to test, because we need
3619 		 * to rely on a signal that has not yet been handled. Let's at
3620 		 * least check that the error code gets propagated through, and
3621 		 * hope that it doesn't break when there is actually a signal :)
3622 		 */
3623 		ret = syscall(__NR_gettid);
3624 		exit(!(ret == -1 && errno == 512));
3625 	}
3626 
3627 	close(sk_pair[1]);
3628 
3629 	memset(&req, 0, sizeof(req));
3630 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3631 
3632 	EXPECT_EQ(kill(pid, SIGUSR1), 0);
3633 
3634 	/*
3635 	 * Make sure the signal really is delivered, which means we're not
3636 	 * stuck in the user notification code any more and the notification
3637 	 * should be dead.
3638 	 */
3639 	EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
3640 
3641 	resp.id = req.id;
3642 	resp.error = -EPERM;
3643 	resp.val = 0;
3644 
3645 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3646 	EXPECT_EQ(errno, ENOENT);
3647 
3648 	memset(&req, 0, sizeof(req));
3649 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3650 
3651 	resp.id = req.id;
3652 	resp.error = -512; /* -ERESTARTSYS */
3653 	resp.val = 0;
3654 
3655 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3656 
3657 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3658 	EXPECT_EQ(true, WIFEXITED(status));
3659 	EXPECT_EQ(0, WEXITSTATUS(status));
3660 }
3661 #endif
3662 
3663 /*
3664  * b/147676645
3665  * SECCOMP_FILTER_FLAG_NEW_LISTENER not compatible < 5.0
3666  */
3667 #ifndef __ANDROID__
TEST(user_notification_closed_listener)3668 TEST(user_notification_closed_listener)
3669 {
3670 	pid_t pid;
3671 	long ret;
3672 	int status, listener;
3673 
3674 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3675 	ASSERT_EQ(0, ret) {
3676 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3677 	}
3678 
3679 	listener = user_notif_syscall(__NR_getppid,
3680 				      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3681 	ASSERT_GE(listener, 0);
3682 
3683 	/*
3684 	 * Check that we get an ENOSYS when the listener is closed.
3685 	 */
3686 	pid = fork();
3687 	ASSERT_GE(pid, 0);
3688 	if (pid == 0) {
3689 		close(listener);
3690 		ret = syscall(__NR_getppid);
3691 		exit(ret != -1 && errno != ENOSYS);
3692 	}
3693 
3694 	close(listener);
3695 
3696 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3697 	EXPECT_EQ(true, WIFEXITED(status));
3698 	EXPECT_EQ(0, WEXITSTATUS(status));
3699 }
3700 #endif
3701 
3702 /*
3703  * b/147676645
3704  * SECCOMP_FILTER_FLAG_NEW_LISTENER not compatible < 5.0
3705  * unshare(CLONE_NEWUSER) returns EINVAL with Android
3706  * unshare(CLONE_NEWPID) returns EINVAL with Android
3707  */
3708 #ifndef __ANDROID__
3709 /*
3710  * Check that a pid in a child namespace still shows up as valid in ours.
3711  */
TEST(user_notification_child_pid_ns)3712 TEST(user_notification_child_pid_ns)
3713 {
3714 	pid_t pid;
3715 	int status, listener;
3716 	struct seccomp_notif req = {};
3717 	struct seccomp_notif_resp resp = {};
3718 
3719 	ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0) {
3720 		if (errno == EINVAL)
3721 			SKIP(return, "kernel missing CLONE_NEWUSER support");
3722 	};
3723 
3724 	listener = user_notif_syscall(__NR_getppid,
3725 				      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3726 	ASSERT_GE(listener, 0);
3727 
3728 	pid = fork();
3729 	ASSERT_GE(pid, 0);
3730 
3731 	if (pid == 0)
3732 		exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3733 
3734 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3735 	EXPECT_EQ(req.pid, pid);
3736 
3737 	resp.id = req.id;
3738 	resp.error = 0;
3739 	resp.val = USER_NOTIF_MAGIC;
3740 
3741 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3742 
3743 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3744 	EXPECT_EQ(true, WIFEXITED(status));
3745 	EXPECT_EQ(0, WEXITSTATUS(status));
3746 	close(listener);
3747 }
3748 #endif
3749 
3750 /*
3751  * b/147676645
3752  * SECCOMP_FILTER_FLAG_NEW_LISTENER not compatible < 5.0
3753  * unshare(CLONE_NEWPID) returns EINVAL with Android
3754  */
3755 #ifndef __ANDROID__
3756 /*
3757  * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e.
3758  * invalid.
3759  */
TEST(user_notification_sibling_pid_ns)3760 TEST(user_notification_sibling_pid_ns)
3761 {
3762 	pid_t pid, pid2;
3763 	int status, listener;
3764 	struct seccomp_notif req = {};
3765 	struct seccomp_notif_resp resp = {};
3766 
3767 	ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) {
3768 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3769 	}
3770 
3771 	listener = user_notif_syscall(__NR_getppid,
3772 				      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3773 	ASSERT_GE(listener, 0);
3774 
3775 	pid = fork();
3776 	ASSERT_GE(pid, 0);
3777 
3778 	if (pid == 0) {
3779 		ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3780 
3781 		pid2 = fork();
3782 		ASSERT_GE(pid2, 0);
3783 
3784 		if (pid2 == 0)
3785 			exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3786 
3787 		EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3788 		EXPECT_EQ(true, WIFEXITED(status));
3789 		EXPECT_EQ(0, WEXITSTATUS(status));
3790 		exit(WEXITSTATUS(status));
3791 	}
3792 
3793 	/* Create the sibling ns, and sibling in it. */
3794 	ASSERT_EQ(unshare(CLONE_NEWPID), 0) {
3795 		if (errno == EPERM)
3796 			SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN");
3797 	}
3798 	ASSERT_EQ(errno, 0);
3799 
3800 	pid2 = fork();
3801 	ASSERT_GE(pid2, 0);
3802 
3803 	if (pid2 == 0) {
3804 		ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3805 		/*
3806 		 * The pid should be 0, i.e. the task is in some namespace that
3807 		 * we can't "see".
3808 		 */
3809 		EXPECT_EQ(req.pid, 0);
3810 
3811 		resp.id = req.id;
3812 		resp.error = 0;
3813 		resp.val = USER_NOTIF_MAGIC;
3814 
3815 		ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3816 		exit(0);
3817 	}
3818 
3819 	close(listener);
3820 
3821 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3822 	EXPECT_EQ(true, WIFEXITED(status));
3823 	EXPECT_EQ(0, WEXITSTATUS(status));
3824 
3825 	EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3826 	EXPECT_EQ(true, WIFEXITED(status));
3827 	EXPECT_EQ(0, WEXITSTATUS(status));
3828 }
3829 #endif
3830 
3831 /*
3832  * b/147676645
3833  * SECCOMP_FILTER_FLAG_NEW_LISTENER not compatible < 5.0
3834  * unshare(CLONE_NEWUSER) returns EINVAL with Android
3835  */
3836 #ifndef __ANDROID__
TEST(user_notification_fault_recv)3837 TEST(user_notification_fault_recv)
3838 {
3839 	pid_t pid;
3840 	int status, listener;
3841 	struct seccomp_notif req = {};
3842 	struct seccomp_notif_resp resp = {};
3843 
3844 	ASSERT_EQ(unshare(CLONE_NEWUSER), 0) {
3845 		if (errno == EINVAL)
3846 			SKIP(return, "kernel missing CLONE_NEWUSER support");
3847 	}
3848 
3849 	listener = user_notif_syscall(__NR_getppid,
3850 				      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3851 	ASSERT_GE(listener, 0);
3852 
3853 	pid = fork();
3854 	ASSERT_GE(pid, 0);
3855 
3856 	if (pid == 0)
3857 		exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3858 
3859 	/* Do a bad recv() */
3860 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1);
3861 	EXPECT_EQ(errno, EFAULT);
3862 
3863 	/* We should still be able to receive this notification, though. */
3864 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3865 	EXPECT_EQ(req.pid, pid);
3866 
3867 	resp.id = req.id;
3868 	resp.error = 0;
3869 	resp.val = USER_NOTIF_MAGIC;
3870 
3871 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3872 
3873 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3874 	EXPECT_EQ(true, WIFEXITED(status));
3875 	EXPECT_EQ(0, WEXITSTATUS(status));
3876 }
3877 #endif
3878 
3879 /*
3880  * b/147676645
3881  * SECCOMP_GET_NOTIF_SIZES not compatible < 5.0
3882  */
3883 #ifndef __ANDROID__
TEST(seccomp_get_notif_sizes)3884 TEST(seccomp_get_notif_sizes)
3885 {
3886 	struct seccomp_notif_sizes sizes;
3887 
3888 	ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0);
3889 	EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif));
3890 	EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp));
3891 }
3892 #endif
3893 
3894 /*
3895  * b/147676645
3896  * SECCOMP_FILTER_FLAG_NEW_LISTENER not compatible < 5.0
3897  */
3898 #ifndef __ANDROID__
TEST(user_notification_continue)3899 TEST(user_notification_continue)
3900 {
3901 	pid_t pid;
3902 	long ret;
3903 	int status, listener;
3904 	struct seccomp_notif req = {};
3905 	struct seccomp_notif_resp resp = {};
3906 	struct pollfd pollfd;
3907 
3908 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3909 	ASSERT_EQ(0, ret) {
3910 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3911 	}
3912 
3913 	listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
3914 	ASSERT_GE(listener, 0);
3915 
3916 	pid = fork();
3917 	ASSERT_GE(pid, 0);
3918 
3919 	if (pid == 0) {
3920 		int dup_fd, pipe_fds[2];
3921 		pid_t self;
3922 
3923 		ASSERT_GE(pipe(pipe_fds), 0);
3924 
3925 		dup_fd = dup(pipe_fds[0]);
3926 		ASSERT_GE(dup_fd, 0);
3927 		EXPECT_NE(pipe_fds[0], dup_fd);
3928 
3929 		self = getpid();
3930 		ASSERT_EQ(filecmp(self, self, pipe_fds[0], dup_fd), 0);
3931 		exit(0);
3932 	}
3933 
3934 	pollfd.fd = listener;
3935 	pollfd.events = POLLIN | POLLOUT;
3936 
3937 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3938 	EXPECT_EQ(pollfd.revents, POLLIN);
3939 
3940 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3941 
3942 	pollfd.fd = listener;
3943 	pollfd.events = POLLIN | POLLOUT;
3944 
3945 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3946 	EXPECT_EQ(pollfd.revents, POLLOUT);
3947 
3948 	EXPECT_EQ(req.data.nr, __NR_dup);
3949 
3950 	resp.id = req.id;
3951 	resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE;
3952 
3953 	/*
3954 	 * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other
3955 	 * args be set to 0.
3956 	 */
3957 	resp.error = 0;
3958 	resp.val = USER_NOTIF_MAGIC;
3959 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3960 	EXPECT_EQ(errno, EINVAL);
3961 
3962 	resp.error = USER_NOTIF_MAGIC;
3963 	resp.val = 0;
3964 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3965 	EXPECT_EQ(errno, EINVAL);
3966 
3967 	resp.error = 0;
3968 	resp.val = 0;
3969 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) {
3970 		if (errno == EINVAL)
3971 			SKIP(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE");
3972 	}
3973 
3974 skip:
3975 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3976 	EXPECT_EQ(true, WIFEXITED(status));
3977 	EXPECT_EQ(0, WEXITSTATUS(status)) {
3978 		if (WEXITSTATUS(status) == 2) {
3979 			SKIP(return, "Kernel does not support kcmp() syscall");
3980 			return;
3981 		}
3982 	}
3983 }
3984 #endif
3985 
3986 /*
3987  * b/147676645
3988  * SECCOMP_FILTER_FLAG_NEW_LISTENER not compatible < 5.0
3989  */
3990 #ifndef __ANDROID__
TEST(user_notification_filter_empty)3991 TEST(user_notification_filter_empty)
3992 {
3993 	pid_t pid;
3994 	long ret;
3995 	int status;
3996 	struct pollfd pollfd;
3997 	struct __clone_args args = {
3998 		.flags = CLONE_FILES,
3999 		.exit_signal = SIGCHLD,
4000 	};
4001 
4002 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
4003 	ASSERT_EQ(0, ret) {
4004 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
4005 	}
4006 
4007 	pid = sys_clone3(&args, sizeof(args));
4008 	ASSERT_GE(pid, 0);
4009 
4010 	if (pid == 0) {
4011 		int listener;
4012 
4013 		listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER);
4014 		if (listener < 0)
4015 			_exit(EXIT_FAILURE);
4016 
4017 		if (dup2(listener, 200) != 200)
4018 			_exit(EXIT_FAILURE);
4019 
4020 		close(listener);
4021 
4022 		_exit(EXIT_SUCCESS);
4023 	}
4024 
4025 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
4026 	EXPECT_EQ(true, WIFEXITED(status));
4027 	EXPECT_EQ(0, WEXITSTATUS(status));
4028 
4029 	/*
4030 	 * The seccomp filter has become unused so we should be notified once
4031 	 * the kernel gets around to cleaning up task struct.
4032 	 */
4033 	pollfd.fd = 200;
4034 	pollfd.events = POLLHUP;
4035 
4036 	EXPECT_GT(poll(&pollfd, 1, 2000), 0);
4037 	EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
4038 }
4039 #endif
4040 
4041 /*
4042  * b/147676645
4043  * SECCOMP_FILTER_FLAG_NEW_LISTENER not compatible < 5.0
4044  */
4045 #ifndef __ANDROID__
do_thread(void * data)4046 static void *do_thread(void *data)
4047 {
4048 	return NULL;
4049 }
4050 
TEST(user_notification_filter_empty_threaded)4051 TEST(user_notification_filter_empty_threaded)
4052 {
4053 	pid_t pid;
4054 	long ret;
4055 	int status;
4056 	struct pollfd pollfd;
4057 	struct __clone_args args = {
4058 		.flags = CLONE_FILES,
4059 		.exit_signal = SIGCHLD,
4060 	};
4061 
4062 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
4063 	ASSERT_EQ(0, ret) {
4064 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
4065 	}
4066 
4067 	pid = sys_clone3(&args, sizeof(args));
4068 	ASSERT_GE(pid, 0);
4069 
4070 	if (pid == 0) {
4071 		pid_t pid1, pid2;
4072 		int listener, status;
4073 		pthread_t thread;
4074 
4075 		listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
4076 		if (listener < 0)
4077 			_exit(EXIT_FAILURE);
4078 
4079 		if (dup2(listener, 200) != 200)
4080 			_exit(EXIT_FAILURE);
4081 
4082 		close(listener);
4083 
4084 		pid1 = fork();
4085 		if (pid1 < 0)
4086 			_exit(EXIT_FAILURE);
4087 
4088 		if (pid1 == 0)
4089 			_exit(EXIT_SUCCESS);
4090 
4091 		pid2 = fork();
4092 		if (pid2 < 0)
4093 			_exit(EXIT_FAILURE);
4094 
4095 		if (pid2 == 0)
4096 			_exit(EXIT_SUCCESS);
4097 
4098 		if (pthread_create(&thread, NULL, do_thread, NULL) ||
4099 		    pthread_join(thread, NULL))
4100 			_exit(EXIT_FAILURE);
4101 
4102 		if (pthread_create(&thread, NULL, do_thread, NULL) ||
4103 		    pthread_join(thread, NULL))
4104 			_exit(EXIT_FAILURE);
4105 
4106 		if (waitpid(pid1, &status, 0) != pid1 || !WIFEXITED(status) ||
4107 		    WEXITSTATUS(status))
4108 			_exit(EXIT_FAILURE);
4109 
4110 		if (waitpid(pid2, &status, 0) != pid2 || !WIFEXITED(status) ||
4111 		    WEXITSTATUS(status))
4112 			_exit(EXIT_FAILURE);
4113 
4114 		exit(EXIT_SUCCESS);
4115 	}
4116 
4117 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
4118 	EXPECT_EQ(true, WIFEXITED(status));
4119 	EXPECT_EQ(0, WEXITSTATUS(status));
4120 
4121 	/*
4122 	 * The seccomp filter has become unused so we should be notified once
4123 	 * the kernel gets around to cleaning up task struct.
4124 	 */
4125 	pollfd.fd = 200;
4126 	pollfd.events = POLLHUP;
4127 
4128 	EXPECT_GT(poll(&pollfd, 1, 2000), 0);
4129 	EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
4130 }
4131 #endif
4132 
4133 /*
4134  * b/147676645
4135  * SECCOMP_ADDED_FLAG_SEND not compatible < 5.14
4136  * SECCOMP_IOCTL_NOTIF_ADDFD not comptible < 5.9
4137  * SECCOMP_FILTER_FLAG_NEW_LISTENER not compatible < 5.0
4138  */
4139 #ifndef __ANDROID__
TEST(user_notification_addfd)4140 TEST(user_notification_addfd)
4141 {
4142 	pid_t pid;
4143 	long ret;
4144 	int status, listener, memfd, fd, nextfd;
4145 	struct seccomp_notif_addfd addfd = {};
4146 	struct seccomp_notif_addfd_small small = {};
4147 	struct seccomp_notif_addfd_big big = {};
4148 	struct seccomp_notif req = {};
4149 	struct seccomp_notif_resp resp = {};
4150 	/* 100 ms */
4151 	struct timespec delay = { .tv_nsec = 100000000 };
4152 
4153 	/* There may be arbitrary already-open fds at test start. */
4154 	memfd = memfd_create("test", 0);
4155 	ASSERT_GE(memfd, 0);
4156 	nextfd = memfd + 1;
4157 
4158 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
4159 	ASSERT_EQ(0, ret) {
4160 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
4161 	}
4162 
4163 	/* fd: 4 */
4164 	/* Check that the basic notification machinery works */
4165 	listener = user_notif_syscall(__NR_getppid,
4166 				      SECCOMP_FILTER_FLAG_NEW_LISTENER);
4167 	ASSERT_EQ(listener, nextfd++);
4168 
4169 	pid = fork();
4170 	ASSERT_GE(pid, 0);
4171 
4172 	if (pid == 0) {
4173 		/* fds will be added and this value is expected */
4174 		if (syscall(__NR_getppid) != USER_NOTIF_MAGIC)
4175 			exit(1);
4176 
4177 		/* Atomic addfd+send is received here. Check it is a valid fd */
4178 		if (fcntl(syscall(__NR_getppid), F_GETFD) == -1)
4179 			exit(1);
4180 
4181 		exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
4182 	}
4183 
4184 	ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4185 
4186 	addfd.srcfd = memfd;
4187 	addfd.newfd = 0;
4188 	addfd.id = req.id;
4189 	addfd.flags = 0x0;
4190 
4191 	/* Verify bad newfd_flags cannot be set */
4192 	addfd.newfd_flags = ~O_CLOEXEC;
4193 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4194 	EXPECT_EQ(errno, EINVAL);
4195 	addfd.newfd_flags = O_CLOEXEC;
4196 
4197 	/* Verify bad flags cannot be set */
4198 	addfd.flags = 0xff;
4199 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4200 	EXPECT_EQ(errno, EINVAL);
4201 	addfd.flags = 0;
4202 
4203 	/* Verify that remote_fd cannot be set without setting flags */
4204 	addfd.newfd = 1;
4205 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4206 	EXPECT_EQ(errno, EINVAL);
4207 	addfd.newfd = 0;
4208 
4209 	/* Verify small size cannot be set */
4210 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1);
4211 	EXPECT_EQ(errno, EINVAL);
4212 
4213 	/* Verify we can't send bits filled in unknown buffer area */
4214 	memset(&big, 0xAA, sizeof(big));
4215 	big.addfd = addfd;
4216 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1);
4217 	EXPECT_EQ(errno, E2BIG);
4218 
4219 
4220 	/* Verify we can set an arbitrary remote fd */
4221 	fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
4222 	EXPECT_EQ(fd, nextfd++);
4223 	EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
4224 
4225 	/* Verify we can set an arbitrary remote fd with large size */
4226 	memset(&big, 0x0, sizeof(big));
4227 	big.addfd = addfd;
4228 	fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big);
4229 	EXPECT_EQ(fd, nextfd++);
4230 
4231 	/* Verify we can set a specific remote fd */
4232 	addfd.newfd = 42;
4233 	addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
4234 	fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
4235 	EXPECT_EQ(fd, 42);
4236 	EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
4237 
4238 	/* Resume syscall */
4239 	resp.id = req.id;
4240 	resp.error = 0;
4241 	resp.val = USER_NOTIF_MAGIC;
4242 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
4243 
4244 	/*
4245 	 * This sets the ID of the ADD FD to the last request plus 1. The
4246 	 * notification ID increments 1 per notification.
4247 	 */
4248 	addfd.id = req.id + 1;
4249 
4250 	/* This spins until the underlying notification is generated */
4251 	while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
4252 	       errno != -EINPROGRESS)
4253 		nanosleep(&delay, NULL);
4254 
4255 	memset(&req, 0, sizeof(req));
4256 	ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4257 	ASSERT_EQ(addfd.id, req.id);
4258 
4259 	/* Verify we can do an atomic addfd and send */
4260 	addfd.newfd = 0;
4261 	addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
4262 	fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
4263 	/*
4264 	 * Child has earlier "low" fds and now 42, so we expect the next
4265 	 * lowest available fd to be assigned here.
4266 	 */
4267 	EXPECT_EQ(fd, nextfd++);
4268 	ASSERT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
4269 
4270 	/*
4271 	 * This sets the ID of the ADD FD to the last request plus 1. The
4272 	 * notification ID increments 1 per notification.
4273 	 */
4274 	addfd.id = req.id + 1;
4275 
4276 	/* This spins until the underlying notification is generated */
4277 	while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
4278 	       errno != -EINPROGRESS)
4279 		nanosleep(&delay, NULL);
4280 
4281 	memset(&req, 0, sizeof(req));
4282 	ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4283 	ASSERT_EQ(addfd.id, req.id);
4284 
4285 	resp.id = req.id;
4286 	resp.error = 0;
4287 	resp.val = USER_NOTIF_MAGIC;
4288 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
4289 
4290 	/* Wait for child to finish. */
4291 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
4292 	EXPECT_EQ(true, WIFEXITED(status));
4293 	EXPECT_EQ(0, WEXITSTATUS(status));
4294 
4295 	close(memfd);
4296 }
4297 #endif
4298 
4299 /*
4300  * b/147676645
4301  * SECCOMP_ADDED_FLAG_SEND not compatible < 5.14
4302  * SECCOMP_IOCTL_NOTIF_ADDFD not comptible < 5.9
4303  * SECCOMP_FILTER_FLAG_NEW_LISTENER not compatible < 5.0
4304  */
4305 #ifndef __ANDROID__
TEST(user_notification_addfd_rlimit)4306 TEST(user_notification_addfd_rlimit)
4307 {
4308 	pid_t pid;
4309 	long ret;
4310 	int status, listener, memfd;
4311 	struct seccomp_notif_addfd addfd = {};
4312 	struct seccomp_notif req = {};
4313 	struct seccomp_notif_resp resp = {};
4314 	const struct rlimit lim = {
4315 		.rlim_cur	= 0,
4316 		.rlim_max	= 0,
4317 	};
4318 
4319 	memfd = memfd_create("test", 0);
4320 	ASSERT_GE(memfd, 0);
4321 
4322 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
4323 	ASSERT_EQ(0, ret) {
4324 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
4325 	}
4326 
4327 	/* Check that the basic notification machinery works */
4328 	listener = user_notif_syscall(__NR_getppid,
4329 				      SECCOMP_FILTER_FLAG_NEW_LISTENER);
4330 	ASSERT_GE(listener, 0);
4331 
4332 	pid = fork();
4333 	ASSERT_GE(pid, 0);
4334 
4335 	if (pid == 0)
4336 		exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
4337 
4338 
4339 	ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4340 
4341 	ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0);
4342 
4343 	addfd.srcfd = memfd;
4344 	addfd.newfd_flags = O_CLOEXEC;
4345 	addfd.newfd = 0;
4346 	addfd.id = req.id;
4347 	addfd.flags = 0;
4348 
4349 	/* Should probably spot check /proc/sys/fs/file-nr */
4350 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4351 	EXPECT_EQ(errno, EMFILE);
4352 
4353 	addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
4354 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4355 	EXPECT_EQ(errno, EMFILE);
4356 
4357 	addfd.newfd = 100;
4358 	addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
4359 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4360 	EXPECT_EQ(errno, EBADF);
4361 
4362 	resp.id = req.id;
4363 	resp.error = 0;
4364 	resp.val = USER_NOTIF_MAGIC;
4365 
4366 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
4367 
4368 	/* Wait for child to finish. */
4369 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
4370 	EXPECT_EQ(true, WIFEXITED(status));
4371 	EXPECT_EQ(0, WEXITSTATUS(status));
4372 
4373 	close(memfd);
4374 }
4375 #endif
4376 
4377 /* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */
FIXTURE(O_SUSPEND_SECCOMP)4378 FIXTURE(O_SUSPEND_SECCOMP) {
4379 	pid_t pid;
4380 };
4381 
FIXTURE_SETUP(O_SUSPEND_SECCOMP)4382 FIXTURE_SETUP(O_SUSPEND_SECCOMP)
4383 {
4384 	ERRNO_FILTER(block_read, E2BIG);
4385 	cap_value_t cap_list[] = { CAP_SYS_ADMIN };
4386 	cap_t caps;
4387 
4388 	self->pid = 0;
4389 
4390 	/* make sure we don't have CAP_SYS_ADMIN */
4391 	caps = cap_get_proc();
4392 	ASSERT_NE(NULL, caps);
4393 	ASSERT_EQ(0, cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR));
4394 	ASSERT_EQ(0, cap_set_proc(caps));
4395 	cap_free(caps);
4396 
4397 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
4398 	ASSERT_EQ(0, prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_block_read));
4399 
4400 	self->pid = fork();
4401 	ASSERT_GE(self->pid, 0);
4402 
4403 	if (self->pid == 0) {
4404 		while (1)
4405 			pause();
4406 		_exit(127);
4407 	}
4408 }
4409 
FIXTURE_TEARDOWN(O_SUSPEND_SECCOMP)4410 FIXTURE_TEARDOWN(O_SUSPEND_SECCOMP)
4411 {
4412 	if (self->pid)
4413 		kill(self->pid, SIGKILL);
4414 }
4415 
TEST_F(O_SUSPEND_SECCOMP,setoptions)4416 TEST_F(O_SUSPEND_SECCOMP, setoptions)
4417 {
4418 	int wstatus;
4419 
4420 	ASSERT_EQ(0, ptrace(PTRACE_ATTACH, self->pid, NULL, 0));
4421 	ASSERT_EQ(self->pid, wait(&wstatus));
4422 	ASSERT_EQ(-1, ptrace(PTRACE_SETOPTIONS, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP));
4423 	if (errno == EINVAL)
4424 		SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)");
4425 	ASSERT_EQ(EPERM, errno);
4426 }
4427 
TEST_F(O_SUSPEND_SECCOMP,seize)4428 TEST_F(O_SUSPEND_SECCOMP, seize)
4429 {
4430 	int ret;
4431 
4432 	ret = ptrace(PTRACE_SEIZE, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP);
4433 	ASSERT_EQ(-1, ret);
4434 	if (errno == EINVAL)
4435 		SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)");
4436 	ASSERT_EQ(EPERM, errno);
4437 }
4438 
4439 /*
4440  * get_nth - Get the nth, space separated entry in a file.
4441  *
4442  * Returns the length of the read field.
4443  * Throws error if field is zero-lengthed.
4444  */
get_nth(struct __test_metadata * _metadata,const char * path,const unsigned int position,char ** entry)4445 static ssize_t get_nth(struct __test_metadata *_metadata, const char *path,
4446 		     const unsigned int position, char **entry)
4447 {
4448 	char *line = NULL;
4449 	unsigned int i;
4450 	ssize_t nread;
4451 	size_t len = 0;
4452 	FILE *f;
4453 
4454 	f = fopen(path, "r");
4455 	ASSERT_NE(f, NULL) {
4456 		TH_LOG("Could not open %s: %s", path, strerror(errno));
4457 	}
4458 
4459 	for (i = 0; i < position; i++) {
4460 		nread = getdelim(&line, &len, ' ', f);
4461 		ASSERT_GE(nread, 0) {
4462 			TH_LOG("Failed to read %d entry in file %s", i, path);
4463 		}
4464 	}
4465 	fclose(f);
4466 
4467 	ASSERT_GT(nread, 0) {
4468 		TH_LOG("Entry in file %s had zero length", path);
4469 	}
4470 
4471 	*entry = line;
4472 	return nread - 1;
4473 }
4474 
4475 /* For a given PID, get the task state (D, R, etc...) */
get_proc_stat(struct __test_metadata * _metadata,pid_t pid)4476 static char get_proc_stat(struct __test_metadata *_metadata, pid_t pid)
4477 {
4478 	char proc_path[100] = {0};
4479 	char status;
4480 	char *line;
4481 
4482 	snprintf(proc_path, sizeof(proc_path), "/proc/%d/stat", pid);
4483 	ASSERT_EQ(get_nth(_metadata, proc_path, 3, &line), 1);
4484 
4485 	status = *line;
4486 	free(line);
4487 
4488 	return status;
4489 }
4490 
4491 /*
4492  * b/147676645
4493  * SECCOMP_IOCTL_NOTIF_RECV not compatible < 5.0
4494  * SECCOMP_IOCTL_NOTIF_SEND not compatible < 5.0
4495  * SECCOMP_FILTER_FLAG_NEW_LISTENER not compatible < 5.0
4496  */
4497 #ifndef __ANDROID__
TEST(user_notification_fifo)4498 TEST(user_notification_fifo)
4499 {
4500 	struct seccomp_notif_resp resp = {};
4501 	struct seccomp_notif req = {};
4502 	int i, status, listener;
4503 	pid_t pid, pids[3];
4504 	__u64 baseid;
4505 	long ret;
4506 	/* 100 ms */
4507 	struct timespec delay = { .tv_nsec = 100000000 };
4508 
4509 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
4510 	ASSERT_EQ(0, ret) {
4511 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
4512 	}
4513 
4514 	/* Setup a listener */
4515 	listener = user_notif_syscall(__NR_getppid,
4516 				      SECCOMP_FILTER_FLAG_NEW_LISTENER);
4517 	ASSERT_GE(listener, 0);
4518 
4519 	pid = fork();
4520 	ASSERT_GE(pid, 0);
4521 
4522 	if (pid == 0) {
4523 		ret = syscall(__NR_getppid);
4524 		exit(ret != USER_NOTIF_MAGIC);
4525 	}
4526 
4527 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4528 	baseid = req.id + 1;
4529 
4530 	resp.id = req.id;
4531 	resp.error = 0;
4532 	resp.val = USER_NOTIF_MAGIC;
4533 
4534 	/* check that we make sure flags == 0 */
4535 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
4536 
4537 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
4538 	EXPECT_EQ(true, WIFEXITED(status));
4539 	EXPECT_EQ(0, WEXITSTATUS(status));
4540 
4541 	/* Start children, and generate notifications */
4542 	for (i = 0; i < ARRAY_SIZE(pids); i++) {
4543 		pid = fork();
4544 		if (pid == 0) {
4545 			ret = syscall(__NR_getppid);
4546 			exit(ret != USER_NOTIF_MAGIC);
4547 		}
4548 		pids[i] = pid;
4549 	}
4550 
4551 	/* This spins until all of the children are sleeping */
4552 restart_wait:
4553 	for (i = 0; i < ARRAY_SIZE(pids); i++) {
4554 		if (get_proc_stat(_metadata, pids[i]) != 'S') {
4555 			nanosleep(&delay, NULL);
4556 			goto restart_wait;
4557 		}
4558 	}
4559 
4560 	/* Read the notifications in order (and respond) */
4561 	for (i = 0; i < ARRAY_SIZE(pids); i++) {
4562 		memset(&req, 0, sizeof(req));
4563 		EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4564 		EXPECT_EQ(req.id, baseid + i);
4565 		resp.id = req.id;
4566 		EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
4567 	}
4568 
4569 	/* Make sure notifications were received */
4570 	for (i = 0; i < ARRAY_SIZE(pids); i++) {
4571 		EXPECT_EQ(waitpid(pids[i], &status, 0), pids[i]);
4572 		EXPECT_EQ(true, WIFEXITED(status));
4573 		EXPECT_EQ(0, WEXITSTATUS(status));
4574 	}
4575 }
4576 #endif
4577 
4578 /* get_proc_syscall - Get the syscall in progress for a given pid
4579  *
4580  * Returns the current syscall number for a given process
4581  * Returns -1 if not in syscall (running or blocked)
4582  */
get_proc_syscall(struct __test_metadata * _metadata,int pid)4583 static long get_proc_syscall(struct __test_metadata *_metadata, int pid)
4584 {
4585 	char proc_path[100] = {0};
4586 	long ret = -1;
4587 	ssize_t nread;
4588 	char *line;
4589 
4590 	snprintf(proc_path, sizeof(proc_path), "/proc/%d/syscall", pid);
4591 	nread = get_nth(_metadata, proc_path, 1, &line);
4592 	ASSERT_GT(nread, 0);
4593 
4594 	if (!strncmp("running", line, MIN(7, nread)))
4595 		ret = strtol(line, NULL, 16);
4596 
4597 	free(line);
4598 	return ret;
4599 }
4600 
4601 /*
4602  * b/147676645
4603  * SECCOMP_FILTER_FLAG_NEW_LISTENER not compatible < 5.0
4604  * SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV not compatible < 5.19
4605  */
4606 #ifndef __ANDROID__
4607 /* Ensure non-fatal signals prior to receive are unmodified */
TEST(user_notification_wait_killable_pre_notification)4608 TEST(user_notification_wait_killable_pre_notification)
4609 {
4610 	struct sigaction new_action = {
4611 		.sa_handler = signal_handler,
4612 	};
4613 	int listener, status, sk_pair[2];
4614 	pid_t pid;
4615 	long ret;
4616 	char c;
4617 	/* 100 ms */
4618 	struct timespec delay = { .tv_nsec = 100000000 };
4619 
4620 	ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0);
4621 
4622 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
4623 	ASSERT_EQ(0, ret)
4624 	{
4625 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
4626 	}
4627 
4628 	ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
4629 
4630 	listener = user_notif_syscall(
4631 		__NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
4632 				      SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
4633 	ASSERT_GE(listener, 0);
4634 
4635 	/*
4636 	 * Check that we can kill the process with SIGUSR1 prior to receiving
4637 	 * the notification. SIGUSR1 is wired up to a custom signal handler,
4638 	 * and make sure it gets called.
4639 	 */
4640 	pid = fork();
4641 	ASSERT_GE(pid, 0);
4642 
4643 	if (pid == 0) {
4644 		close(sk_pair[0]);
4645 		handled = sk_pair[1];
4646 
4647 		/* Setup the non-fatal sigaction without SA_RESTART */
4648 		if (sigaction(SIGUSR1, &new_action, NULL)) {
4649 			perror("sigaction");
4650 			exit(1);
4651 		}
4652 
4653 		ret = syscall(__NR_getppid);
4654 		/* Make sure we got a return from a signal interruption */
4655 		exit(ret != -1 || errno != EINTR);
4656 	}
4657 
4658 	/*
4659 	 * Make sure we've gotten to the seccomp user notification wait
4660 	 * from getppid prior to sending any signals
4661 	 */
4662 	while (get_proc_syscall(_metadata, pid) != __NR_getppid &&
4663 	       get_proc_stat(_metadata, pid) != 'S')
4664 		nanosleep(&delay, NULL);
4665 
4666 	/* Send non-fatal kill signal */
4667 	EXPECT_EQ(kill(pid, SIGUSR1), 0);
4668 
4669 	/* wait for process to exit (exit checks for EINTR) */
4670 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
4671 	EXPECT_EQ(true, WIFEXITED(status));
4672 	EXPECT_EQ(0, WEXITSTATUS(status));
4673 
4674 	EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
4675 }
4676 #endif
4677 
4678 /*
4679  * b/147676645
4680  * SECCOMP_IOCTL_NOTIF_RECV not compatible < 5.0
4681  * SECCOMP_IOCTL_NOTIF_SEND not compatible < 5.0
4682  * SECCOMP_FILTER_FLAG_NEW_LISTENER not compatible < 5.0
4683  * SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV not compatible < 5.19
4684  */
4685 #ifndef __ANDROID__
4686 /* Ensure non-fatal signals after receive are blocked */
TEST(user_notification_wait_killable)4687 TEST(user_notification_wait_killable)
4688 {
4689 	struct sigaction new_action = {
4690 		.sa_handler = signal_handler,
4691 	};
4692 	struct seccomp_notif_resp resp = {};
4693 	struct seccomp_notif req = {};
4694 	int listener, status, sk_pair[2];
4695 	pid_t pid;
4696 	long ret;
4697 	char c;
4698 	/* 100 ms */
4699 	struct timespec delay = { .tv_nsec = 100000000 };
4700 
4701 	ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0);
4702 
4703 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
4704 	ASSERT_EQ(0, ret)
4705 	{
4706 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
4707 	}
4708 
4709 	ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
4710 
4711 	listener = user_notif_syscall(
4712 		__NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
4713 				      SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
4714 	ASSERT_GE(listener, 0);
4715 
4716 	pid = fork();
4717 	ASSERT_GE(pid, 0);
4718 
4719 	if (pid == 0) {
4720 		close(sk_pair[0]);
4721 		handled = sk_pair[1];
4722 
4723 		/* Setup the sigaction without SA_RESTART */
4724 		if (sigaction(SIGUSR1, &new_action, NULL)) {
4725 			perror("sigaction");
4726 			exit(1);
4727 		}
4728 
4729 		/* Make sure that the syscall is completed (no EINTR) */
4730 		ret = syscall(__NR_getppid);
4731 		exit(ret != USER_NOTIF_MAGIC);
4732 	}
4733 
4734 	/*
4735 	 * Get the notification, to make move the notifying process into a
4736 	 * non-preemptible (TASK_KILLABLE) state.
4737 	 */
4738 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4739 	/* Send non-fatal kill signal */
4740 	EXPECT_EQ(kill(pid, SIGUSR1), 0);
4741 
4742 	/*
4743 	 * Make sure the task enters moves to TASK_KILLABLE by waiting for
4744 	 * D (Disk Sleep) state after receiving non-fatal signal.
4745 	 */
4746 	while (get_proc_stat(_metadata, pid) != 'D')
4747 		nanosleep(&delay, NULL);
4748 
4749 	resp.id = req.id;
4750 	resp.val = USER_NOTIF_MAGIC;
4751 	/* Make sure the notification is found and able to be replied to */
4752 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
4753 
4754 	/*
4755 	 * Make sure that the signal handler does get called once we're back in
4756 	 * userspace.
4757 	 */
4758 	EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
4759 	/* wait for process to exit (exit checks for USER_NOTIF_MAGIC) */
4760 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
4761 	EXPECT_EQ(true, WIFEXITED(status));
4762 	EXPECT_EQ(0, WEXITSTATUS(status));
4763 }
4764 #endif
4765 
4766 /*
4767  * b/147676645
4768  * SECCOMP_IOCTL_NOTIF_RECV not compatible < 5.0
4769  * SECCOMP_FILTER_FLAG_NEW_LISTENER not compatible < 5.0
4770  * SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV not compatible < 5.19
4771  */
4772 #ifndef __ANDROID__
4773 /* Ensure fatal signals after receive are not blocked */
TEST(user_notification_wait_killable_fatal)4774 TEST(user_notification_wait_killable_fatal)
4775 {
4776 	struct seccomp_notif req = {};
4777 	int listener, status;
4778 	pid_t pid;
4779 	long ret;
4780 	/* 100 ms */
4781 	struct timespec delay = { .tv_nsec = 100000000 };
4782 
4783 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
4784 	ASSERT_EQ(0, ret)
4785 	{
4786 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
4787 	}
4788 
4789 	listener = user_notif_syscall(
4790 		__NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
4791 				      SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
4792 	ASSERT_GE(listener, 0);
4793 
4794 	pid = fork();
4795 	ASSERT_GE(pid, 0);
4796 
4797 	if (pid == 0) {
4798 		/* This should never complete as it should get a SIGTERM */
4799 		syscall(__NR_getppid);
4800 		exit(1);
4801 	}
4802 
4803 	while (get_proc_stat(_metadata, pid) != 'S')
4804 		nanosleep(&delay, NULL);
4805 
4806 	/*
4807 	 * Get the notification, to make move the notifying process into a
4808 	 * non-preemptible (TASK_KILLABLE) state.
4809 	 */
4810 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4811 	/* Kill the process with a fatal signal */
4812 	EXPECT_EQ(kill(pid, SIGTERM), 0);
4813 
4814 	/*
4815 	 * Wait for the process to exit, and make sure the process terminated
4816 	 * due to the SIGTERM signal.
4817 	 */
4818 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
4819 	EXPECT_EQ(true, WIFSIGNALED(status));
4820 	EXPECT_EQ(SIGTERM, WTERMSIG(status));
4821 }
4822 #endif
4823 
4824 /*
4825  * TODO:
4826  * - expand NNP testing
4827  * - better arch-specific TRACE and TRAP handlers.
4828  * - endianness checking when appropriate
4829  * - 64-bit arg prodding
4830  * - arch value testing (x86 modes especially)
4831  * - verify that FILTER_FLAG_LOG filters generate log messages
4832  * - verify that RET_LOG generates log messages
4833  */
4834 
4835 TEST_HARNESS_MAIN
4836