1 /* Copyright 2018 The ChromiumOS Authors
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 */
5
6 #include <dlfcn.h>
7 #include <err.h>
8 #include <errno.h>
9 #include <fcntl.h>
10 #include <getopt.h>
11 #include <inttypes.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <sys/capability.h>
17 #include <sys/mount.h>
18 #include <sys/stat.h>
19 #include <sys/types.h>
20 #include <sys/vfs.h>
21 #include <unistd.h>
22
23 #include <linux/filter.h>
24
25 #include "libminijail.h"
26 #include "libsyscalls.h"
27
28 #include "config_parser.h"
29 #include "elfparse.h"
30 #include "minijail0_cli.h"
31 #include "system.h"
32 #include "util.h"
33
34 #define IDMAP_LEN 32U
35 #define DEFAULT_TMP_SIZE (64 * 1024 * 1024)
36
37 /*
38 * A malloc() that aborts on failure. We only implement this in the CLI as
39 * the library should return ENOMEM errors when allocations fail.
40 */
xmalloc(size_t size)41 static void *xmalloc(size_t size)
42 {
43 void *ret = malloc(size);
44 if (!ret)
45 err(1, "malloc() failed");
46 return ret;
47 }
48
xstrdup(const char * s)49 static char *xstrdup(const char *s)
50 {
51 char *ret = strdup(s);
52 if (!ret)
53 err(1, "strdup() failed");
54 return ret;
55 }
56
set_user(struct minijail * j,const char * arg,uid_t * out_uid,gid_t * out_gid)57 static void set_user(struct minijail *j, const char *arg, uid_t *out_uid,
58 gid_t *out_gid)
59 {
60 char *end = NULL;
61 uid_t uid = strtoul(arg, &end, 10);
62 if (!*end && *arg) {
63 *out_uid = uid;
64 minijail_change_uid(j, uid);
65 return;
66 }
67
68 int ret = lookup_user(arg, out_uid, out_gid);
69 if (ret) {
70 errno = -ret;
71 err(1, "Bad user '%s'", arg);
72 }
73
74 ret = minijail_change_user(j, arg);
75 if (ret) {
76 errno = -ret;
77 err(1, "minijail_change_user('%s') failed", arg);
78 }
79 }
80
set_group(struct minijail * j,const char * arg,gid_t * out_gid)81 static void set_group(struct minijail *j, const char *arg, gid_t *out_gid)
82 {
83 char *end = NULL;
84 gid_t gid = strtoul(arg, &end, 10);
85 if (!*end && *arg) {
86 *out_gid = gid;
87 minijail_change_gid(j, gid);
88 return;
89 }
90
91 int ret = lookup_group(arg, out_gid);
92 if (ret) {
93 errno = -ret;
94 err(1, "Bad group '%s'", arg);
95 }
96
97 minijail_change_gid(j, *out_gid);
98 }
99
100 /*
101 * Helper function used by --add-suppl-group (possibly more than once),
102 * to build the supplementary gids array.
103 */
suppl_group_add(size_t * suppl_gids_count,gid_t ** suppl_gids,char * arg)104 static void suppl_group_add(size_t *suppl_gids_count, gid_t **suppl_gids,
105 char *arg)
106 {
107 char *end = NULL;
108 gid_t gid = strtoul(arg, &end, 10);
109 int ret;
110 if (!*end && *arg) {
111 /* A gid number has been specified, proceed. */
112 } else if ((ret = lookup_group(arg, &gid))) {
113 /*
114 * A group name has been specified,
115 * but doesn't exist: we bail out.
116 */
117 errno = -ret;
118 err(1, "Bad group '%s'", arg);
119 }
120
121 /*
122 * From here, gid is guaranteed to be set and valid,
123 * we add it to our supplementary gids array.
124 */
125 *suppl_gids =
126 realloc(*suppl_gids, sizeof(gid_t) * ++(*suppl_gids_count));
127 if (!suppl_gids)
128 err(1, "failed to allocate memory");
129
130 (*suppl_gids)[*suppl_gids_count - 1] = gid;
131 }
132
skip_securebits(struct minijail * j,const char * arg)133 static void skip_securebits(struct minijail *j, const char *arg)
134 {
135 uint64_t securebits_skip_mask;
136 char *end = NULL;
137 securebits_skip_mask = strtoull(arg, &end, 16);
138 if (*end)
139 errx(1, "Invalid securebit mask: '%s'", arg);
140 minijail_skip_setting_securebits(j, securebits_skip_mask);
141 }
142
use_caps(struct minijail * j,const char * arg)143 static void use_caps(struct minijail *j, const char *arg)
144 {
145 uint64_t caps = 0;
146 cap_t parsed_caps = cap_from_text(arg);
147
148 if (parsed_caps != NULL) {
149 unsigned int i;
150 const uint64_t one = 1;
151 cap_flag_value_t cap_value;
152 unsigned int last_valid_cap = get_last_valid_cap();
153
154 for (i = 0; i <= last_valid_cap; ++i) {
155 if (cap_get_flag(parsed_caps, i, CAP_EFFECTIVE,
156 &cap_value)) {
157 if (errno == EINVAL) {
158 /*
159 * Some versions of libcap reject any
160 * capabilities they were not compiled
161 * with by returning EINVAL.
162 */
163 continue;
164 }
165 err(1,
166 "Could not get the value of the %d-th "
167 "capability",
168 i);
169 }
170 if (cap_value == CAP_SET)
171 caps |= (one << i);
172 }
173 cap_free(parsed_caps);
174 } else {
175 char *end = NULL;
176 caps = strtoull(arg, &end, 16);
177 if (*end)
178 errx(1, "Invalid cap set: '%s'", arg);
179 }
180
181 minijail_use_caps(j, caps);
182 }
183
add_binding(struct minijail * j,char * arg)184 static void add_binding(struct minijail *j, char *arg)
185 {
186 char *src = tokenize(&arg, ",");
187 char *dest = tokenize(&arg, ",");
188 char *flags = tokenize(&arg, ",");
189 if (!src || src[0] == '\0' || arg != NULL)
190 errx(1, "Bad binding: %s %s", src, dest);
191 if (dest == NULL || dest[0] == '\0')
192 dest = src;
193 int writable;
194 if (flags == NULL || flags[0] == '\0' || streq(flags, "0"))
195 writable = 0;
196 else if (streq(flags, "1"))
197 writable = 1;
198 else
199 errx(1, "Bad value for <writable>: %s", flags);
200 if (minijail_bind(j, src, dest, writable))
201 errx(1, "minijail_bind failed");
202 }
203
add_rlimit(struct minijail * j,char * arg)204 static void add_rlimit(struct minijail *j, char *arg)
205 {
206 char *type = tokenize(&arg, ",");
207 char *cur = tokenize(&arg, ",");
208 char *max = tokenize(&arg, ",");
209 char *end;
210 if (!type || type[0] == '\0' || !cur || cur[0] == '\0' || !max ||
211 max[0] == '\0' || arg != NULL) {
212 errx(1, "Bad rlimit '%s'", arg);
213 }
214 rlim_t cur_rlim;
215 rlim_t max_rlim;
216 if (streq(cur, "unlimited")) {
217 cur_rlim = RLIM_INFINITY;
218 } else {
219 end = NULL;
220 cur_rlim = strtoul(cur, &end, 0);
221 if (*end)
222 errx(1, "Bad soft limit: '%s'", cur);
223 }
224 if (streq(max, "unlimited")) {
225 max_rlim = RLIM_INFINITY;
226 } else {
227 end = NULL;
228 max_rlim = strtoul(max, &end, 0);
229 if (*end)
230 errx(1, "Bad hard limit: '%s'", max);
231 }
232
233 end = NULL;
234 int resource = parse_single_constant(type, &end);
235 if (type == end)
236 errx(1, "Bad rlimit: '%s'", type);
237
238 if (minijail_rlimit(j, resource, cur_rlim, max_rlim))
239 errx(1, "minijail_rlimit '%s,%s,%s' failed", type, cur, max);
240 }
241
add_mount(struct minijail * j,char * arg)242 static void add_mount(struct minijail *j, char *arg)
243 {
244 char *src = tokenize(&arg, ",");
245 char *dest = tokenize(&arg, ",");
246 char *type = tokenize(&arg, ",");
247 char *flags = tokenize(&arg, ",");
248 char *data = tokenize(&arg, ",");
249 char *end;
250 if (!src || src[0] == '\0' || !dest || dest[0] == '\0' || !type ||
251 type[0] == '\0') {
252 errx(1, "Bad mount: %s %s %s", src, dest, type);
253 }
254
255 /*
256 * Fun edge case: the data option itself is comma delimited. If there
257 * were no more options, then arg would be set to NULL. But if we had
258 * more pending, it'll be pointing to the next token. Back up and undo
259 * the null byte so it'll be merged back.
260 * An example:
261 * none,/tmp,tmpfs,0xe,mode=0755,uid=10,gid=10
262 * The tokenize calls above will turn this memory into:
263 * none\0/tmp\0tmpfs\00xe\0mode=0755\0uid=10,gid=10
264 * With data pointing at mode=0755 and arg pointing at uid=10,gid=10.
265 */
266 if (arg != NULL)
267 arg[-1] = ',';
268
269 unsigned long mountflags;
270 if (flags == NULL || flags[0] == '\0') {
271 mountflags = 0;
272 } else {
273 end = NULL;
274 mountflags = parse_constant(flags, &end);
275 if (flags == end)
276 errx(1, "Bad mount flags: %s", flags);
277 }
278
279 if (minijail_mount_with_data(j, src, dest, type, mountflags, data))
280 errx(1, "minijail_mount failed");
281 }
282
build_idmap(id_t id,id_t lowerid)283 static char *build_idmap(id_t id, id_t lowerid)
284 {
285 int ret;
286 char *idmap = xmalloc(IDMAP_LEN);
287 ret = snprintf(idmap, IDMAP_LEN, "%d %d 1", id, lowerid);
288 if (ret < 0 || (size_t)ret >= IDMAP_LEN) {
289 free(idmap);
290 errx(1, "Could not build id map");
291 }
292 return idmap;
293 }
294
has_cap_setgid(void)295 static int has_cap_setgid(void)
296 {
297 cap_t caps;
298 cap_flag_value_t cap_value;
299
300 if (!CAP_IS_SUPPORTED(CAP_SETGID))
301 return 0;
302
303 caps = cap_get_proc();
304 if (!caps)
305 err(1, "Could not get process' capabilities");
306
307 if (cap_get_flag(caps, CAP_SETGID, CAP_EFFECTIVE, &cap_value))
308 err(1, "Could not get the value of CAP_SETGID");
309
310 if (cap_free(caps))
311 err(1, "Could not free capabilities");
312
313 return cap_value == CAP_SET;
314 }
315
set_ugid_mapping(struct minijail * j,int set_uidmap,uid_t uid,char * uidmap,int set_gidmap,gid_t gid,char * gidmap)316 static void set_ugid_mapping(struct minijail *j, int set_uidmap, uid_t uid,
317 char *uidmap, int set_gidmap, gid_t gid,
318 char *gidmap)
319 {
320 if (set_uidmap) {
321 minijail_namespace_user(j);
322 minijail_namespace_pids(j);
323
324 if (!uidmap) {
325 /*
326 * If no map is passed, map the current uid to the
327 * chosen uid in the target namespace (or root, if none
328 * was chosen).
329 */
330 uidmap = build_idmap(uid, getuid());
331 }
332 if (0 != minijail_uidmap(j, uidmap))
333 errx(1, "Could not set uid map");
334 free(uidmap);
335 }
336 if (set_gidmap) {
337 minijail_namespace_user(j);
338 minijail_namespace_pids(j);
339
340 if (!gidmap) {
341 /*
342 * If no map is passed, map the current gid to the
343 * chosen gid in the target namespace.
344 */
345 gidmap = build_idmap(gid, getgid());
346 }
347 if (!has_cap_setgid()) {
348 /*
349 * This means that we are not running as root,
350 * so we also have to disable setgroups(2) to
351 * be able to set the gid map.
352 * See
353 * http://man7.org/linux/man-pages/man7/user_namespaces.7.html
354 */
355 minijail_namespace_user_disable_setgroups(j);
356 }
357 if (0 != minijail_gidmap(j, gidmap))
358 errx(1, "Could not set gid map");
359 free(gidmap);
360 }
361 }
362
use_chroot(struct minijail * j,const char * path,int * chroot,int pivot_root)363 static void use_chroot(struct minijail *j, const char *path, int *chroot,
364 int pivot_root)
365 {
366 if (pivot_root)
367 errx(1, "Could not set chroot because -P was specified");
368 if (minijail_enter_chroot(j, path))
369 errx(1, "Could not set chroot");
370 *chroot = 1;
371 }
372
use_pivot_root(struct minijail * j,const char * path,int * pivot_root,int chroot)373 static void use_pivot_root(struct minijail *j, const char *path,
374 int *pivot_root, int chroot)
375 {
376 if (chroot)
377 errx(1, "Could not set pivot_root because -C was specified");
378 if (minijail_enter_pivot_root(j, path))
379 errx(1, "Could not set pivot_root");
380 minijail_namespace_vfs(j);
381 *pivot_root = 1;
382 }
383
use_profile(struct minijail * j,const char * profile,int * pivot_root,int chroot,size_t * tmp_size)384 static void use_profile(struct minijail *j, const char *profile,
385 int *pivot_root, int chroot, size_t *tmp_size)
386 {
387 /* Note: New profiles should be added in minijail0_cli_unittest.cc. */
388
389 if (streq(profile, "minimalistic-mountns") ||
390 streq(profile, "minimalistic-mountns-nodev")) {
391 minijail_namespace_vfs(j);
392 if (minijail_bind(j, "/", "/", 0))
393 errx(1, "minijail_bind(/) failed");
394 if (minijail_bind(j, "/proc", "/proc", 0))
395 errx(1, "minijail_bind(/proc) failed");
396 if (streq(profile, "minimalistic-mountns")) {
397 if (minijail_bind(j, "/dev/log", "/dev/log", 0))
398 errx(1, "minijail_bind(/dev/log) failed");
399 minijail_mount_dev(j);
400 }
401 if (!*tmp_size) {
402 /* Avoid clobbering |tmp_size| if it was already set. */
403 *tmp_size = DEFAULT_TMP_SIZE;
404 }
405 minijail_remount_proc_readonly(j);
406 minijail_set_using_minimalistic_mountns(j);
407 use_pivot_root(j, DEFAULT_PIVOT_ROOT, pivot_root, chroot);
408 } else
409 errx(1, "Unrecognized profile name '%s'", profile);
410 }
411
set_remount_mode(struct minijail * j,const char * mode)412 static void set_remount_mode(struct minijail *j, const char *mode)
413 {
414 unsigned long msmode;
415 if (streq(mode, "shared"))
416 msmode = MS_SHARED;
417 else if (streq(mode, "private"))
418 msmode = MS_PRIVATE;
419 else if (streq(mode, "slave"))
420 msmode = MS_SLAVE;
421 else if (streq(mode, "unbindable"))
422 msmode = MS_UNBINDABLE;
423 else
424 errx(1, "Unknown remount mode: '%s'", mode);
425 minijail_remount_mode(j, msmode);
426 }
427
read_seccomp_filter(const char * filter_path,struct sock_fprog * filter)428 static void read_seccomp_filter(const char *filter_path,
429 struct sock_fprog *filter)
430 {
431 attribute_cleanup_fp FILE *f = fopen(filter_path, "re");
432 if (!f)
433 err(1, "failed to open %s", filter_path);
434 off_t filter_size = 0;
435 if (fseeko(f, 0, SEEK_END) == -1 || (filter_size = ftello(f)) == -1)
436 err(1, "failed to get file size of %s", filter_path);
437 if (filter_size % sizeof(struct sock_filter) != 0) {
438 errx(1,
439 "filter size (%" PRId64 ") of %s is not a multiple of"
440 " %zu",
441 filter_size, filter_path, sizeof(struct sock_filter));
442 }
443 rewind(f);
444
445 filter->len = filter_size / sizeof(struct sock_filter);
446 filter->filter = xmalloc(filter_size);
447 if (fread(filter->filter, sizeof(struct sock_filter), filter->len, f) !=
448 filter->len) {
449 err(1, "failed read %s", filter_path);
450 }
451 }
452
453 /*
454 * Long options use values starting at 0x100 so that they're out of range of
455 * bytes which is how command line options are processed. Practically speaking,
456 * we could get by with the (7-bit) ASCII range, but UTF-8 codepoints would be a
457 * bit confusing, and honestly there's no reason to "optimize" here.
458 *
459 * The long enum values are internal to this file and can freely change at any
460 * time without breaking anything. Please keep alphabetically ordered.
461 */
462 enum {
463 /* Everything after this point only have long options. */
464 LONG_OPTION_BASE = 0x100,
465 OPT_ADD_SUPPL_GROUP,
466 OPT_ALLOW_SPECULATIVE_EXECUTION,
467 OPT_AMBIENT,
468 OPT_CONFIG,
469 OPT_ENV_ADD,
470 OPT_ENV_RESET,
471 OPT_FS_DEFAULT_PATHS,
472 OPT_FS_PATH_RX,
473 OPT_FS_PATH_RO,
474 OPT_FS_PATH_RW,
475 OPT_FS_PATH_ADVANCED_RW,
476 OPT_LOGGING,
477 OPT_PRELOAD_LIBRARY,
478 OPT_PROFILE,
479 OPT_SECCOMP_BPF_BINARY,
480 OPT_UTS,
481 };
482
483 /*
484 * NB: When adding new options, prefer long-option only. Add a short option
485 * only if its meaning is intuitive/obvious at a glance.
486 *
487 * Keep this sorted.
488 */
489 static const char optstring[] =
490 "+a:b:c:de::f:g:hik:lm::nprst::u:vwyzB:C:GHIK::LM::NP:R:S:T:UV:Y";
491
492 static const struct option long_options[] = {
493 {"help", no_argument, 0, 'h'},
494 {"mount-dev", no_argument, 0, 'd'},
495 {"ambient", no_argument, 0, OPT_AMBIENT},
496 {"uts", optional_argument, 0, OPT_UTS},
497 {"logging", required_argument, 0, OPT_LOGGING},
498 {"profile", required_argument, 0, OPT_PROFILE},
499 {"preload-library", required_argument, 0, OPT_PRELOAD_LIBRARY},
500 {"seccomp-bpf-binary", required_argument, 0, OPT_SECCOMP_BPF_BINARY},
501 {"add-suppl-group", required_argument, 0, OPT_ADD_SUPPL_GROUP},
502 {"allow-speculative-execution", no_argument, 0,
503 OPT_ALLOW_SPECULATIVE_EXECUTION},
504 {"config", required_argument, 0, OPT_CONFIG},
505 {"env-add", required_argument, 0, OPT_ENV_ADD},
506 {"env-reset", no_argument, 0, OPT_ENV_RESET},
507 {"mount", required_argument, 0, 'k'},
508 {"bind-mount", required_argument, 0, 'b'},
509 {"ns-mount", no_argument, 0, 'v'},
510 {"fs-default-paths", no_argument, 0, OPT_FS_DEFAULT_PATHS},
511 {"fs-path-rx", required_argument, 0, OPT_FS_PATH_RX},
512 {"fs-path-ro", required_argument, 0, OPT_FS_PATH_RO},
513 {"fs-path-rw", required_argument, 0, OPT_FS_PATH_RW},
514 {"fs-path-advanced-rw", required_argument, 0, OPT_FS_PATH_ADVANCED_RW},
515 {0, 0, 0, 0},
516 };
517
518 /*
519 * Pull the usage string out into the top-level to help with long-lines. We
520 * want the output to be wrapped at 80 cols when it's shown to the user in the
521 * terminal, but we don't want the source wrapped to 80 cols because that will
522 * effectively make terminal output wrap to much lower levels (like <70).
523 */
524 /* clang-format off */
525 static const char help_text[] =
526 "Account (user/group) options:\n"
527 " -u <user> Change uid to <user>.\n"
528 " -g <group> Change gid to <group>.\n"
529 " -G Inherit supplementary groups from new uid.\n"
530 " Incompatible with -y or --add-suppl-group.\n"
531 " -y Keep original uid's supplementary groups.\n"
532 " Incompatible with -G or --add-suppl-group.\n"
533 " --add-suppl-group <group>\n"
534 " Add <group> to the proccess' supplementary groups.\n"
535 " Can be specified multiple times to add several groups.\n"
536 " Incompatible with -y or -G.\n"
537 "\n"
538 "Mount/path options:\n"
539 " -b <src[,dst[,writable]]>, --bind-mount <...>\n"
540 " Bind <src> to <dst>.\n"
541 " -k <src,dst,fstype[,flags[,data]]>, --mount <...>\n"
542 " Mount <src> at <dst>. <flags> and <data> can be specified as\n"
543 " in mount(2). Multiple instances allowed.\n"
544 " -K Do not change share mode of any existing mounts.\n"
545 " -K<mode> Mark all existing mounts as <mode> instead of MS_PRIVATE.\n"
546 " -r Remount /proc read-only (implies -v).\n"
547 " -d, --mount-dev\n"
548 " Create a new /dev with a minimal set of device nodes\n"
549 " (implies -v). See minijail0(1) for exact list.\n"
550 " -t[size] Mount tmpfs at /tmp (implies -v).\n"
551 " Optional argument specifies size (default \"64M\").\n"
552 " -C <dir> chroot(2) to <dir>. Incompatible with -P.\n"
553 " -P <dir> pivot_root(2) to <dir> (implies -v). Incompatible with -C.\n"
554 "\n"
555 "Namespace options:\n"
556 " -N Enter a new cgroup namespace.\n"
557 " -l Enter new IPC namespace.\n"
558 " -v, --ns-mount\n"
559 " Enter new mount namespace.\n"
560 " -V <file> Enter specified mount namespace.\n"
561 " -e[file] Enter new network namespace, or existing |file| if provided.\n"
562 " -p Enter new pid namespace (implies -vr).\n"
563 " -I Run as init (pid 1) inside a new pid namespace (implies -p).\n"
564 " -U Enter new user namespace (implies -p).\n"
565 " -m[<uid> <loweruid> <count>]\n"
566 " Set the uid map of a user namespace (implies -pU).\n"
567 " Same arguments as newuidmap(1); mappings are comma separated.\n"
568 " With no mapping, map the current uid to root.\n"
569 " Incompatible with -b without the 'writable' option.\n"
570 " -M[<gid> <lowergid> <count>]\n"
571 " Set the gid map of a user namespace (implies -pU).\n"
572 " Same arguments as newgidmap(1); mappings are comma separated.\n"
573 " With no mapping, map the current gid to root.\n"
574 " Incompatible with -b without the 'writable' option.\n"
575 " --uts[=name] Enter a new UTS namespace (and set hostname).\n"
576 "\n"
577 "Seccomp options:\n"
578 " -S <file> Set seccomp filter using <file>.\n"
579 " E.g., '-S /usr/share/filters/<prog>.$(uname -m)'.\n"
580 " Requires -n when not running as root.\n"
581 " --seccomp-bpf-binary=<f>\n"
582 " Set a pre-compiled seccomp filter using <f>.\n"
583 " E.g., '-S /usr/share/filters/<prog>.$(uname -m).bpf'.\n"
584 " Requires -n when not running as root.\n"
585 " The user is responsible for ensuring that the binary\n"
586 " was compiled for the correct architecture / kernel version.\n"
587 " -L Report blocked syscalls when using seccomp filter.\n"
588 " If the kernel does not support SECCOMP_RET_LOG, some syscalls\n"
589 " will automatically be allowed (see below).\n"
590 " -Y Synchronize seccomp filters across thread group.\n"
591 " -a <table> Use alternate syscall table <table>.\n"
592 " -s Use seccomp mode 1 (not the same as -S).\n"
593 "\n"
594 "Other options:\n"
595 " --config <file>\n"
596 " Load the Minijail configuration file <file>.\n"
597 " If used, must be specified ahead of other options.\n"
598 " --profile <p>\n"
599 " Configure minijail0 to run with the <p> sandboxing profile,\n"
600 " which is a convenient way to express multiple flags\n"
601 " that are typically used together.\n"
602 " See the minijail0(1) man page for the full list.\n"
603 " -n Set no_new_privs. See prctl(2) for details.\n"
604 " -c <caps> Restrict caps to <caps>.\n"
605 " --ambient Raise ambient capabilities. Requires -c.\n"
606 " -B <mask> Skip setting <mask> securebits when restricting caps (-c).\n"
607 " By default, SECURE_NOROOT, SECURE_NO_SETUID_FIXUP, and \n"
608 " SECURE_KEEP_CAPS (with their respective locks) are set.\n"
609 " -f <file> Write the pid of the jailed process to <file>.\n"
610 " -i Exit immediately after fork(2); i.e. background the program.\n"
611 " -z Don't forward signals to jailed process.\n"
612 " -R <type,cur,max>\n"
613 " Call setrlimit(3); can be specified multiple times.\n"
614 " -T <type> Assume <program> is a <type> ELF binary;\n"
615 " <type> may be 'static' or 'dynamic'.\n"
616 " This will avoid accessing <program> binary before execve(2).\n"
617 " Type 'static' will avoid preload hooking.\n"
618 " -w Create and join a new anonymous session keyring.\n"
619 " --env-reset Clear the current environment instead of having <program>\n"
620 " inherit the active environment. Often used to start <program>\n"
621 " with a minimal sanitized environment.\n"
622 " --env-add <NAME=value>\n"
623 " Sets the specified environment variable <NAME>\n"
624 " in the <program>'s environment before starting it.\n"
625 "\n"
626 "Uncommon options:\n"
627 " --allow-speculative-execution\n"
628 " Allow speculative execution by disabling mitigations.\n"
629 " --fs-default-paths\n"
630 " Adds a set of allowed paths to allow running common system \n"
631 " executables.\n"
632 " --fs-path-rx\n"
633 " Adds an allowed read-execute path.\n"
634 " --fs-path-ro\n"
635 " Adds an allowed read-only path.\n"
636 " --fs-path-rw\n"
637 " Adds an allowed read-write path.\n"
638 " --fs-path-advanced-rw\n"
639 " Adds an allowed advanced read-write path.\n"
640 " --preload-library=<file>\n"
641 " Overrides the path to \"" PRELOADPATH "\".\n"
642 " This is only really useful for local testing.\n"
643 " --logging=<output>\n"
644 " Set the logging system output: 'auto' (default),\n"
645 " 'syslog', or 'stderr'.\n"
646 " -h Help (this message).\n"
647 " -H Seccomp filter help message.\n";
648 /* clang-format on */
649
usage(const char * progn)650 static void usage(const char *progn)
651 {
652 printf("Usage: %s [options] [--] <program> [args...]\n\n%s", progn,
653 help_text);
654
655 printf("\nsyscalls allowed when logging (-L):\n ");
656 for (size_t i = 0; i < log_syscalls_len; ++i)
657 printf(" %s", log_syscalls[i]);
658 printf("\n");
659 }
660
seccomp_filter_usage(const char * progn)661 static void seccomp_filter_usage(const char *progn)
662 {
663 const struct syscall_entry *entry = syscall_table;
664 printf("Usage: %s -S <policy.file> <program> [args...]\n\n"
665 "System call names supported:\n",
666 progn);
667 for (; entry->name && entry->nr >= 0; ++entry)
668 printf(" %s [%d]\n", entry->name, entry->nr);
669 printf("\nSee minijail0(5) for example policies.\n");
670 }
671
672 /*
673 * Return the next unconsumed option char/value parsed from
674 * |*conf_entry_list|. |optarg| is updated to point to an argument from
675 * the entry value. If all options have been consumed, |*conf_entry_list|
676 * will be freed and -1 will be returned.
677 */
getopt_from_conf(const struct option * longopts,struct config_entry_list ** conf_entry_list,size_t * conf_index)678 static int getopt_from_conf(const struct option *longopts,
679 struct config_entry_list **conf_entry_list,
680 size_t *conf_index)
681 {
682 int opt = -1;
683 /* If we've consumed all the options in the this config, reset it. */
684 if (*conf_index >= (*conf_entry_list)->num_entries) {
685 free_config_entry_list(*conf_entry_list);
686 *conf_entry_list = NULL;
687 *conf_index = 0;
688 return opt;
689 }
690
691 struct config_entry *entry = &(*conf_entry_list)->entries[*conf_index];
692 /* Look up a matching long option. */
693 size_t i = 0;
694 const struct option *curr_opt;
695 for (curr_opt = &longopts[0]; curr_opt->name != NULL;
696 curr_opt = &longopts[++i])
697 if (streq(entry->key, curr_opt->name))
698 break;
699 if (curr_opt->name == NULL) {
700 errx(1,
701 "Unable to recognize '%s' as Minijail conf entry key, "
702 "please refer to minijail0(5) for syntax and examples.",
703 entry->key);
704 }
705 opt = curr_opt->val;
706 optarg = (char *)entry->value;
707 (*conf_index)++;
708 return opt;
709 }
710
711 /*
712 * Similar to getopt(3), return the next option char/value as it
713 * parses through the CLI argument list. Config entries in
714 * |*conf_entry_list| will be parsed with precendences over cli options.
715 * Same as getopt(3), |optarg| is pointing to the option argument.
716 */
getopt_conf_or_cli(int argc,char * const argv[],struct config_entry_list ** conf_entry_list,size_t * conf_index)717 static int getopt_conf_or_cli(int argc, char *const argv[],
718 struct config_entry_list **conf_entry_list,
719 size_t *conf_index)
720 {
721 int opt = -1;
722 if (*conf_entry_list != NULL)
723 opt =
724 getopt_from_conf(long_options, conf_entry_list, conf_index);
725 if (opt == -1)
726 opt = getopt_long(argc, argv, optstring, long_options, NULL);
727 return opt;
728 }
729
set_child_env(char *** envp,char * arg,char * const environ[])730 static void set_child_env(char ***envp, char *arg, char *const environ[])
731 {
732 /* We expect VAR=value format for arg. */
733 char *delim = strchr(arg, '=');
734 if (!delim) {
735 errx(1, "Expected an argument of the "
736 "form VAR=value (got '%s')", arg);
737 }
738 *delim = '\0';
739 const char *env_value = delim + 1;
740 if (!*envp) {
741 /*
742 * We got our first --env-add. Initialize *envp by
743 * copying our current env to the future child env.
744 */
745 *envp = minijail_copy_env(environ);
746 if (!*envp)
747 err(1, "Failed to allocate memory.");
748 }
749 if (minijail_setenv(envp, arg, env_value, 1))
750 err(1, "minijail_setenv() failed.");
751 }
752
parse_args(struct minijail * j,int argc,char * const argv[],char * const environ[],int * exit_immediately,ElfType * elftype,const char ** preload_path,char *** envp)753 int parse_args(struct minijail *j, int argc, char *const argv[],
754 char *const environ[], int *exit_immediately,
755 ElfType *elftype, const char **preload_path,
756 char ***envp)
757 {
758 enum seccomp_type { None, Strict, Filter, BpfBinaryFilter };
759 enum seccomp_type seccomp = None;
760 int opt;
761 int use_seccomp_filter = 0;
762 int use_seccomp_filter_binary = 0;
763 int use_seccomp_log = 0;
764 int forward = 1;
765 int binding = 0;
766 int chroot = 0, pivot_root = 0;
767 int mount_ns = 0, change_remount = 0;
768 const char *remount_mode = NULL;
769 int inherit_suppl_gids = 0, keep_suppl_gids = 0;
770 int caps = 0, ambient_caps = 0;
771 bool use_uid = false, use_gid = false;
772 uid_t uid = 0;
773 gid_t gid = 0;
774 gid_t *suppl_gids = NULL;
775 size_t suppl_gids_count = 0;
776 char *uidmap = NULL, *gidmap = NULL;
777 int set_uidmap = 0, set_gidmap = 0;
778 size_t tmp_size = 0;
779 const char *filter_path = NULL;
780 int log_to_stderr = -1;
781 struct config_entry_list *conf_entry_list = NULL;
782 size_t conf_index = 0;
783
784 while ((opt = getopt_conf_or_cli(argc, argv, &conf_entry_list,
785 &conf_index)) != -1) {
786 switch (opt) {
787 case 'u':
788 if (use_uid)
789 errx(1, "-u provided multiple times.");
790 use_uid = true;
791 set_user(j, optarg, &uid, &gid);
792 break;
793 case 'g':
794 if (use_gid)
795 errx(1, "-g provided multiple times.");
796 use_gid = true;
797 set_group(j, optarg, &gid);
798 break;
799 case 'n':
800 minijail_no_new_privs(j);
801 break;
802 case 's':
803 if (seccomp != None && seccomp != Strict) {
804 errx(1, "Do not use -s, -S, or "
805 "--seccomp-bpf-binary together");
806 }
807 seccomp = Strict;
808 minijail_use_seccomp(j);
809 break;
810 case 'S':
811 if (seccomp != None && seccomp != Filter) {
812 errx(1, "Do not use -s, -S, or "
813 "--seccomp-bpf-binary together");
814 }
815 seccomp = Filter;
816 minijail_use_seccomp_filter(j);
817 filter_path = optarg;
818 use_seccomp_filter = 1;
819 break;
820 case 'l':
821 minijail_namespace_ipc(j);
822 break;
823 case 'L':
824 if (seccomp == BpfBinaryFilter) {
825 errx(1, "-L does not work with "
826 "--seccomp-bpf-binary");
827 }
828 use_seccomp_log = 1;
829 minijail_log_seccomp_filter_failures(j);
830 break;
831 case 'b':
832 add_binding(j, optarg);
833 binding = 1;
834 break;
835 case 'B':
836 skip_securebits(j, optarg);
837 break;
838 case 'c':
839 caps = 1;
840 use_caps(j, optarg);
841 break;
842 case 'C':
843 use_chroot(j, optarg, &chroot, pivot_root);
844 break;
845 case 'k':
846 add_mount(j, optarg);
847 break;
848 case 'K':
849 remount_mode = optarg;
850 change_remount = 1;
851 break;
852 case 'P':
853 use_pivot_root(j, optarg, &pivot_root, chroot);
854 break;
855 case 'f':
856 if (0 != minijail_write_pid_file(j, optarg))
857 errx(1, "Could not prepare pid file path");
858 break;
859 case 't':
860 minijail_namespace_vfs(j);
861 if (!tmp_size) {
862 /*
863 * Avoid clobbering |tmp_size| if it was already
864 * set.
865 */
866 tmp_size = DEFAULT_TMP_SIZE;
867 }
868 if (optarg != NULL &&
869 0 != parse_size(&tmp_size, optarg)) {
870 errx(1, "Invalid /tmp tmpfs size");
871 }
872 break;
873 case 'v':
874 minijail_namespace_vfs(j);
875 /*
876 * Set the default mount propagation in the command-line
877 * tool to MS_SLAVE.
878 *
879 * When executing the sandboxed program in a new mount
880 * namespace the Minijail library will by default
881 * remount all mounts with the MS_PRIVATE flag. While
882 * this is an appropriate, safe default for the library,
883 * MS_PRIVATE can be problematic: unmount events will
884 * not propagate into mountpoints marked as MS_PRIVATE.
885 * This means that if a mount is unmounted in the root
886 * mount namespace, it will not be unmounted in the
887 * non-root mount namespace.
888 * This in turn can be problematic because activity in
889 * the non-root mount namespace can now directly
890 * influence the root mount namespace (e.g. preventing
891 * re-mounts of said mount), which would be a privilege
892 * inversion.
893 *
894 * Setting the default in the command-line to MS_SLAVE
895 * will still prevent mounts from leaking out of the
896 * non-root mount namespace but avoid these
897 * privilege-inversion issues.
898 * For cases where mounts should not flow *into* the
899 * namespace either, the user can pass -Kprivate.
900 * Note that mounts are marked as MS_PRIVATE by default
901 * by the kernel, so unless the init process (like
902 * systemd) or something else marks them as shared, this
903 * won't do anything.
904 */
905 minijail_remount_mode(j, MS_SLAVE);
906 mount_ns = 1;
907 break;
908 case 'V':
909 minijail_namespace_enter_vfs(j, optarg);
910 break;
911 case 'r':
912 minijail_remount_proc_readonly(j);
913 break;
914 case 'G':
915 if (keep_suppl_gids)
916 errx(1, "-y and -G are not compatible");
917 minijail_inherit_usergroups(j);
918 inherit_suppl_gids = 1;
919 break;
920 case 'y':
921 if (inherit_suppl_gids)
922 errx(1, "-y and -G are not compatible");
923 minijail_keep_supplementary_gids(j);
924 keep_suppl_gids = 1;
925 break;
926 case 'N':
927 minijail_namespace_cgroups(j);
928 break;
929 case 'p':
930 minijail_namespace_pids(j);
931 break;
932 case 'e':
933 if (optarg)
934 minijail_namespace_enter_net(j, optarg);
935 else
936 minijail_namespace_net(j);
937 break;
938 case 'i':
939 *exit_immediately = 1;
940 break;
941 case 'H':
942 seccomp_filter_usage(argv[0]);
943 exit(0);
944 case 'I':
945 minijail_namespace_pids(j);
946 minijail_run_as_init(j);
947 break;
948 case 'U':
949 minijail_namespace_user(j);
950 minijail_namespace_pids(j);
951 break;
952 case 'm':
953 set_uidmap = 1;
954 if (uidmap) {
955 free(uidmap);
956 uidmap = NULL;
957 }
958 if (optarg)
959 uidmap = xstrdup(optarg);
960 break;
961 case 'M':
962 set_gidmap = 1;
963 if (gidmap) {
964 free(gidmap);
965 gidmap = NULL;
966 }
967 if (optarg)
968 gidmap = xstrdup(optarg);
969 break;
970 case 'a':
971 if (0 != minijail_use_alt_syscall(j, optarg))
972 errx(1, "Could not set alt-syscall table");
973 break;
974 case 'R':
975 add_rlimit(j, optarg);
976 break;
977 case 'T':
978 if (streq(optarg, "static"))
979 *elftype = ELFSTATIC;
980 else if (streq(optarg, "dynamic"))
981 *elftype = ELFDYNAMIC;
982 else {
983 errx(1, "ELF type must be 'static' or "
984 "'dynamic'");
985 }
986 break;
987 case 'w':
988 minijail_new_session_keyring(j);
989 break;
990 case 'Y':
991 minijail_set_seccomp_filter_tsync(j);
992 break;
993 case 'z':
994 forward = 0;
995 break;
996 case 'd':
997 minijail_namespace_vfs(j);
998 minijail_mount_dev(j);
999 break;
1000 /* Long options. */
1001 case OPT_AMBIENT:
1002 ambient_caps = 1;
1003 minijail_set_ambient_caps(j);
1004 break;
1005 case OPT_UTS:
1006 minijail_namespace_uts(j);
1007 if (optarg)
1008 minijail_namespace_set_hostname(j, optarg);
1009 break;
1010 case OPT_LOGGING:
1011 if (streq(optarg, "auto"))
1012 log_to_stderr = -1;
1013 else if (streq(optarg, "syslog"))
1014 log_to_stderr = 0;
1015 else if (streq(optarg, "stderr"))
1016 log_to_stderr = 1;
1017 else
1018 errx(1,
1019 "--logger must be 'syslog' or 'stderr'");
1020 break;
1021 case OPT_PROFILE:
1022 use_profile(j, optarg, &pivot_root, chroot, &tmp_size);
1023 break;
1024 case OPT_PRELOAD_LIBRARY:
1025 *preload_path = optarg;
1026 break;
1027 case OPT_FS_DEFAULT_PATHS:
1028 minijail_enable_default_fs_restrictions(j);
1029 break;
1030 case OPT_FS_PATH_RX:
1031 minijail_add_fs_restriction_rx(j, optarg);
1032 break;
1033 case OPT_FS_PATH_RO:
1034 minijail_add_fs_restriction_ro(j, optarg);
1035 break;
1036 case OPT_FS_PATH_RW:
1037 minijail_add_fs_restriction_rw(j, optarg);
1038 break;
1039 case OPT_FS_PATH_ADVANCED_RW:
1040 minijail_add_fs_restriction_advanced_rw(j, optarg);
1041 break;
1042 case OPT_SECCOMP_BPF_BINARY:
1043 if (seccomp != None && seccomp != BpfBinaryFilter) {
1044 errx(1, "Do not use -s, -S, or "
1045 "--seccomp-bpf-binary together");
1046 }
1047 if (use_seccomp_log == 1)
1048 errx(1, "-L does not work with "
1049 "--seccomp-bpf-binary");
1050 seccomp = BpfBinaryFilter;
1051 minijail_use_seccomp_filter(j);
1052 filter_path = optarg;
1053 use_seccomp_filter_binary = 1;
1054 break;
1055 case OPT_ADD_SUPPL_GROUP:
1056 suppl_group_add(&suppl_gids_count, &suppl_gids, optarg);
1057 break;
1058 case OPT_ALLOW_SPECULATIVE_EXECUTION:
1059 minijail_set_seccomp_filter_allow_speculation(j);
1060 break;
1061 case OPT_CONFIG: {
1062 if (conf_entry_list != NULL) {
1063 errx(1, "Nested config file specification is "
1064 "not allowed.");
1065 }
1066 conf_entry_list = new_config_entry_list();
1067 conf_index = 0;
1068 #if defined(BLOCK_NOEXEC_CONF)
1069 /*
1070 * Check the conf file is in a exec mount.
1071 * With a W^X invariant, it excludes writable
1072 * mounts.
1073 */
1074 struct statfs conf_statfs;
1075 if (statfs(optarg, &conf_statfs) != 0)
1076 err(1, "statfs(%s) failed.", optarg);
1077 if ((conf_statfs.f_flags & MS_NOEXEC) != 0)
1078 errx(1,
1079 "Conf file must be in a exec "
1080 "mount: %s",
1081 optarg);
1082 #endif
1083 #if defined(ENFORCE_ROOTFS_CONF)
1084 /* Make sure the conf file is in the same device as the
1085 * rootfs. */
1086 struct stat root_stat;
1087 struct stat conf_stat;
1088 if (stat("/", &root_stat) != 0)
1089 err(1, "stat(/) failed.");
1090 if (stat(optarg, &conf_stat) != 0)
1091 err(1, "stat(%s) failed.", optarg);
1092 if (root_stat.st_dev != conf_stat.st_dev)
1093 errx(1, "Conf file must be in the rootfs.");
1094 #endif
1095 attribute_cleanup_fp FILE *config_file =
1096 fopen(optarg, "re");
1097 if (!config_file)
1098 err(1, "Failed to open %s", optarg);
1099 if (!parse_config_file(config_file, conf_entry_list)) {
1100 errx(
1101 1,
1102 "Unable to parse %s as Minijail conf file, "
1103 "please refer to minijail0(5) for syntax "
1104 "and examples.",
1105 optarg);
1106 }
1107 break;
1108 }
1109 case OPT_ENV_ADD:
1110 /*
1111 * We either copy our current env to the child env
1112 * then add the requested envvar to it, or just
1113 * add the requested envvar to the already existing
1114 * envp.
1115 */
1116 set_child_env(envp, optarg, environ);
1117 break;
1118 case OPT_ENV_RESET:
1119 if (*envp && *envp != environ) {
1120 /*
1121 * We already started to initialize the future
1122 * child env, because we got some --env-add
1123 * earlier on the command-line, so first,
1124 * free the memory we allocated.
1125 * If |*envp| happens to point to |environ|,
1126 * don't attempt to free it.
1127 */
1128 minijail_free_env(*envp);
1129 }
1130 /* Allocate an empty environment for the child. */
1131 *envp = calloc(1, sizeof(char *));
1132 if (!*envp)
1133 err(1, "Failed to allocate memory.");
1134 break;
1135 default:
1136 usage(argv[0]);
1137 exit(opt == 'h' ? 0 : 1);
1138 }
1139 }
1140
1141 if (log_to_stderr == -1) {
1142 /* Autodetect default logging output. */
1143 log_to_stderr = isatty(STDIN_FILENO) ? 1 : 0;
1144 }
1145 if (log_to_stderr) {
1146 init_logging(LOG_TO_FD, STDERR_FILENO, LOG_INFO);
1147 /*
1148 * When logging to stderr, ensure the FD survives the jailing.
1149 */
1150 if (0 !=
1151 minijail_preserve_fd(j, STDERR_FILENO, STDERR_FILENO)) {
1152 errx(1, "Could not preserve stderr");
1153 }
1154 }
1155
1156 /* Set up uid/gid mapping. */
1157 if (set_uidmap || set_gidmap) {
1158 set_ugid_mapping(j, set_uidmap, uid, uidmap, set_gidmap, gid,
1159 gidmap);
1160 }
1161
1162 /* Can only set ambient caps when using regular caps. */
1163 if (ambient_caps && !caps) {
1164 errx(1, "Can't set ambient capabilities (--ambient) "
1165 "without actually using capabilities (-c)");
1166 }
1167
1168 /* Set up signal handlers in minijail unless asked not to. */
1169 if (forward)
1170 minijail_forward_signals(j);
1171
1172 /*
1173 * Only allow bind mounts when entering a chroot, using pivot_root, or
1174 * a new mount namespace.
1175 */
1176 if (binding && !(chroot || pivot_root || mount_ns)) {
1177 errx(1, "Bind mounts require a chroot, pivot_root, or "
1178 " new mount namespace");
1179 }
1180
1181 /*
1182 * / is only remounted when entering a new mount namespace, so unless
1183 * that's set there is no need for the -K/-K<mode> flags.
1184 */
1185 if (change_remount && !mount_ns) {
1186 errx(1, "No need to use -K (skip remounting '/') or "
1187 "-K<mode> (remount '/' as <mode>) "
1188 "without -v (new mount namespace).\n"
1189 "Do you need to add '-v' explicitly?");
1190 }
1191
1192 /* Configure the remount flag here to avoid having -v override it. */
1193 if (change_remount) {
1194 if (remount_mode != NULL) {
1195 set_remount_mode(j, remount_mode);
1196 } else {
1197 minijail_skip_remount_private(j);
1198 }
1199 }
1200
1201 /*
1202 * Proceed in setting the supplementary gids specified on the
1203 * cmdline options.
1204 */
1205 if (suppl_gids_count) {
1206 minijail_set_supplementary_gids(j, suppl_gids_count,
1207 suppl_gids);
1208 free(suppl_gids);
1209 }
1210
1211 /*
1212 * We parse seccomp filters here to make sure we've collected all
1213 * cmdline options.
1214 */
1215 if (use_seccomp_filter) {
1216 minijail_parse_seccomp_filters(j, filter_path);
1217 } else if (use_seccomp_filter_binary) {
1218 struct sock_fprog filter;
1219 read_seccomp_filter(filter_path, &filter);
1220 minijail_set_seccomp_filters(j, &filter);
1221 free((void *)filter.filter);
1222 }
1223
1224 /* Mount a tmpfs under /tmp and set its size. */
1225 if (tmp_size)
1226 minijail_mount_tmp_size(j, tmp_size);
1227
1228 /*
1229 * Copy our current env to the child if its |*envp| has not
1230 * already been initialized from --env-(reset|add) usage.
1231 */
1232 if (!*envp) {
1233 *envp = minijail_copy_env(environ);
1234 if (!*envp)
1235 err(1, "Failed to allocate memory.");
1236 }
1237
1238 /*
1239 * There should be at least one additional unparsed argument: the
1240 * executable name.
1241 */
1242 if (argc == optind) {
1243 usage(argv[0]);
1244 exit(1);
1245 }
1246
1247 if (*elftype == ELFERROR) {
1248 /*
1249 * -T was not specified.
1250 * Get the path to the program adjusted for changing root.
1251 */
1252 char *program_path =
1253 minijail_get_original_path(j, argv[optind]);
1254
1255 /* Check that we can access the target program. */
1256 if (access(program_path, X_OK)) {
1257 errx(1, "Target program '%s' is not accessible",
1258 argv[optind]);
1259 }
1260
1261 /* Check if target is statically or dynamically linked. */
1262 *elftype = get_elf_linkage(program_path);
1263 free(program_path);
1264 }
1265
1266 /*
1267 * Setting capabilities need either a dynamically-linked binary, or the
1268 * use of ambient capabilities for them to be able to survive an
1269 * execve(2).
1270 */
1271 if (caps && *elftype == ELFSTATIC && !ambient_caps) {
1272 errx(1, "Can't run statically-linked binaries with capabilities"
1273 " (-c) without also setting ambient capabilities. "
1274 "Try passing --ambient.");
1275 }
1276
1277 return optind;
1278 }
1279