xref: /aosp_15_r20/external/sandboxed-api/sandboxed_api/sandbox2/namespace.cc (revision ec63e07ab9515d95e79c211197c445ef84cefa6a)
1 // Copyright 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Implementation file for the sandbox2::Namespace class.
16 
17 #include "sandboxed_api/sandbox2/namespace.h"
18 
19 #include <fcntl.h>
20 #include <net/if.h>
21 #include <sched.h>
22 #include <sys/ioctl.h>
23 #include <sys/mount.h>
24 #include <sys/socket.h>
25 #include <sys/stat.h>
26 #include <syscall.h>
27 #include <unistd.h>
28 
29 #include <cstdint>
30 #include <cstring>
31 #include <memory>
32 #include <string>
33 #include <utility>
34 #include <vector>
35 
36 #include "absl/strings/str_cat.h"
37 #include "sandboxed_api/sandbox2/violation.pb.h"
38 #include "sandboxed_api/util/fileops.h"
39 #include "sandboxed_api/util/path.h"
40 #include "sandboxed_api/util/raw_logging.h"
41 
42 namespace sandbox2 {
43 
44 namespace file = ::sapi::file;
45 namespace file_util = ::sapi::file_util;
46 
47 static constexpr char kSandbox2ChrootPath[] = "/tmp/.sandbox2chroot";
48 
49 namespace {
MountFallbackToReadOnly(const char * source,const char * target,const char * filesystem,uintptr_t flags,const void * data)50 int MountFallbackToReadOnly(const char* source, const char* target,
51                             const char* filesystem, uintptr_t flags,
52                             const void* data) {
53   int rv = mount(source, target, filesystem, flags, data);
54   if (rv != 0 && (flags & MS_RDONLY) == 0) {
55     SAPI_RAW_PLOG(WARNING, "Mounting %s on %s (fs type %s) read-write failed",
56                   source, target, filesystem);
57     rv = mount(source, target, filesystem, flags | MS_RDONLY, data);
58     if (rv == 0) {
59       SAPI_RAW_LOG(INFO, "Mounted %s on %s (fs type %s) as read-only", source,
60                    target, filesystem);
61     }
62   }
63   return rv;
64 }
65 
PrepareChroot(const Mounts & mounts)66 void PrepareChroot(const Mounts& mounts) {
67   // Create a tmpfs mount for the new rootfs.
68   SAPI_RAW_CHECK(
69       file_util::fileops::CreateDirectoryRecursively(kSandbox2ChrootPath, 0700),
70       "could not create directory for rootfs");
71   SAPI_RAW_PCHECK(mount("none", kSandbox2ChrootPath, "tmpfs", 0, nullptr) == 0,
72                   "mounting rootfs failed");
73 
74   // Walk the tree and perform all the mount operations.
75   mounts.CreateMounts(kSandbox2ChrootPath);
76 
77   if (mounts.IsRootReadOnly()) {
78     // Remount the chroot read-only
79     SAPI_RAW_PCHECK(mount(kSandbox2ChrootPath, kSandbox2ChrootPath, "",
80                           MS_BIND | MS_REMOUNT | MS_RDONLY, nullptr) == 0,
81                     "remounting chroot read-only failed");
82   }
83 }
84 
TryDenySetgroups()85 void TryDenySetgroups() {
86   file_util::fileops::FDCloser fd(
87       TEMP_FAILURE_RETRY(open("/proc/self/setgroups", O_WRONLY | O_CLOEXEC)));
88   // We ignore errors since they are most likely due to an old kernel.
89   if (fd.get() == -1) {
90     return;
91   }
92 
93   dprintf(fd.get(), "deny");
94 }
95 
WriteIDMap(const char * map_path,int32_t uid)96 void WriteIDMap(const char* map_path, int32_t uid) {
97   file_util::fileops::FDCloser fd(
98       TEMP_FAILURE_RETRY(open(map_path, O_WRONLY | O_CLOEXEC)));
99   SAPI_RAW_PCHECK(fd.get() != -1, "Couldn't open %s", map_path);
100 
101   SAPI_RAW_PCHECK(dprintf(fd.get(), "1000 %d 1", uid) >= 0,
102                   "Could not write %d to %s", uid, map_path);
103 }
104 
SetupIDMaps(uid_t uid,gid_t gid)105 void SetupIDMaps(uid_t uid, gid_t gid) {
106   TryDenySetgroups();
107   WriteIDMap("/proc/self/uid_map", uid);
108   WriteIDMap("/proc/self/gid_map", gid);
109 }
110 
ActivateLoopbackInterface()111 void ActivateLoopbackInterface() {
112   ifreq ifreq;
113 
114   ifreq.ifr_flags = 0;
115   strncpy(ifreq.ifr_name, "lo", IFNAMSIZ);
116 
117   // Create an AF_INET6 socket to perform the IF FLAGS ioctls on.
118   int fd = socket(AF_INET6, SOCK_DGRAM, 0);
119   SAPI_RAW_PCHECK(fd != -1, "creating socket for activating loopback failed");
120 
121   file_util::fileops::FDCloser fd_closer{fd};
122 
123   // First get the existing flags.
124   SAPI_RAW_PCHECK(ioctl(fd, SIOCGIFFLAGS, &ifreq) != -1,
125                   "Getting existing flags");
126 
127   // From 812 kernels, we don't have CAP_NET_ADMIN anymore. But the interface is
128   // already up, so we can skip the next ioctl.
129   if (ifreq.ifr_flags & IFF_UP) {
130     return;
131   }
132 
133   // Set the UP flag and write the flags back.
134   ifreq.ifr_flags |= IFF_UP;
135   SAPI_RAW_PCHECK(ioctl(fd, SIOCSIFFLAGS, &ifreq) != -1, "Setting IFF_UP flag");
136 }
137 
138 // Logs the filesystem contents if verbose logging is enabled.
LogFilesystem(const std::string & dir)139 void LogFilesystem(const std::string& dir) {
140   std::vector<std::string> entries;
141   std::string error;
142   if (!file_util::fileops::ListDirectoryEntries(dir, &entries, &error)) {
143     SAPI_RAW_PLOG(ERROR, "could not list directory entries for %s", dir);
144     return;
145   }
146 
147   for (const auto& entry : entries) {
148     struct stat64 st;
149     std::string full_path = file::JoinPath(dir, entry);
150     if (lstat64(full_path.c_str(), &st) != 0) {
151       SAPI_RAW_PLOG(ERROR, "could not stat %s", full_path);
152       continue;
153     }
154 
155     char ftype;
156     switch (st.st_mode & S_IFMT) {
157       case S_IFREG:
158         ftype = '-';
159         break;
160       case S_IFDIR:
161         ftype = 'd';
162         break;
163       case S_IFLNK:
164         ftype = 'l';
165         break;
166       default:
167         ftype = '?';
168         break;
169     }
170 
171     std::string type_and_mode;
172     type_and_mode += ftype;
173     type_and_mode += st.st_mode & S_IRUSR ? 'r' : '-';
174     type_and_mode += st.st_mode & S_IWUSR ? 'w' : '-';
175     type_and_mode += st.st_mode & S_IXUSR ? 'x' : '-';
176     type_and_mode += st.st_mode & S_IRGRP ? 'r' : '-';
177     type_and_mode += st.st_mode & S_IWGRP ? 'w' : '-';
178     type_and_mode += st.st_mode & S_IXGRP ? 'x' : '-';
179     type_and_mode += st.st_mode & S_IROTH ? 'r' : '-';
180     type_and_mode += st.st_mode & S_IWOTH ? 'w' : '-';
181     type_and_mode += st.st_mode & S_IXOTH ? 'x' : '-';
182 
183     std::string link;
184     if (S_ISLNK(st.st_mode)) {
185       link = absl::StrCat(" -> ", file_util::fileops::ReadLink(full_path));
186     }
187     SAPI_RAW_VLOG(2, "%s %s%s", type_and_mode.c_str(), full_path.c_str(),
188                   link.c_str());
189 
190     if (S_ISDIR(st.st_mode)) {
191       LogFilesystem(full_path);
192     }
193   }
194 }
195 
196 }  // namespace
197 
Namespace(bool allow_unrestricted_networking,Mounts mounts,std::string hostname,bool allow_mount_propagation)198 Namespace::Namespace(bool allow_unrestricted_networking, Mounts mounts,
199                      std::string hostname, bool allow_mount_propagation)
200     : mounts_(std::move(mounts)),
201       hostname_(std::move(hostname)),
202       allow_mount_propagation_(allow_mount_propagation) {
203   if (allow_unrestricted_networking) {
204     clone_flags_ &= ~CLONE_NEWNET;
205   }
206 }
207 
InitializeNamespaces(uid_t uid,gid_t gid,int32_t clone_flags,const Mounts & mounts,const std::string & hostname,bool avoid_pivot_root,bool allow_mount_propagation)208 void Namespace::InitializeNamespaces(uid_t uid, gid_t gid, int32_t clone_flags,
209                                      const Mounts& mounts,
210                                      const std::string& hostname,
211                                      bool avoid_pivot_root,
212                                      bool allow_mount_propagation) {
213   if (clone_flags & CLONE_NEWUSER && !avoid_pivot_root) {
214     SetupIDMaps(uid, gid);
215   }
216 
217   if (!(clone_flags & CLONE_NEWNS)) {
218     // CLONE_NEWNS is always set if we're running in namespaces.
219     return;
220   }
221 
222   std::unique_ptr<file_util::fileops::FDCloser> root_fd;
223   if (avoid_pivot_root) {
224     // We want to bind-mount chrooted to real root, so that symlinks work.
225     // Reference to main root is kept to escape later from the chroot
226     root_fd = std::make_unique<file_util::fileops::FDCloser>(
227         TEMP_FAILURE_RETRY(open("/", O_PATH)));
228     SAPI_RAW_CHECK(root_fd->get() != -1, "creating fd for main root");
229 
230     SAPI_RAW_PCHECK(chroot("/realroot") != -1, "chrooting to real root");
231     SAPI_RAW_PCHECK(chdir("/") != -1, "chdir / after chrooting real root");
232   }
233 
234   SAPI_RAW_PCHECK(mount("", "/proc", "proc", MS_NODEV | MS_NOEXEC | MS_NOSUID,
235                         nullptr) != -1,
236                   "Could not mount a new /proc"
237   );
238 
239   if (clone_flags & CLONE_NEWNET) {
240     // Some things can only be done if inside a new network namespace, like
241     // mounting /sys, setting a hostname or bringing up lo if necessary.
242 
243     SAPI_RAW_PCHECK(
244         MountFallbackToReadOnly("", "/sys", "sysfs",
245                                 MS_NODEV | MS_NOEXEC | MS_NOSUID,
246                                 nullptr) != -1,
247         "Could not mount a new /sys"
248     );
249 
250     SAPI_RAW_PCHECK(sethostname(hostname.c_str(), hostname.size()) != -1,
251                     "Could not set network namespace hostname '%s'", hostname);
252     ActivateLoopbackInterface();
253   }
254 
255   PrepareChroot(mounts);
256 
257   if (avoid_pivot_root) {
258     // Keep a reference to /proc/self as it might not be mounted later
259     file_util::fileops::FDCloser proc_self_fd(
260         TEMP_FAILURE_RETRY(open("/proc/self/", O_PATH)));
261     SAPI_RAW_PCHECK(proc_self_fd.get() != -1, "opening /proc/self");
262 
263     // Return to the main root
264     SAPI_RAW_PCHECK(fchdir(root_fd->get()) != -1, "chdir to main root");
265     SAPI_RAW_PCHECK(chroot(".") != -1, "chrooting to main root");
266     SAPI_RAW_PCHECK(chdir("/") != -1, "chdir / after chrooting main root");
267 
268     // Get a refrence to /realroot to umount it later
269     file_util::fileops::FDCloser realroot_fd(
270         TEMP_FAILURE_RETRY(open("/realroot", O_PATH)));
271 
272     // Move the chroot out of realroot to /
273     std::string chroot_path = file::JoinPath("/realroot", kSandbox2ChrootPath);
274     SAPI_RAW_PCHECK(chdir(chroot_path.c_str()) != -1, "chdir to chroot");
275     SAPI_RAW_PCHECK(mount(".", "/", "", MS_MOVE, nullptr) == 0,
276                     "moving rootfs failed");
277     SAPI_RAW_PCHECK(chroot(".") != -1, "chrooting moved chroot");
278     SAPI_RAW_PCHECK(chdir("/") != -1, "chdir / after chroot");
279 
280     // Umount the realroot so that no reference is left
281     SAPI_RAW_PCHECK(fchdir(realroot_fd.get()) != -1, "fchdir to /realroot");
282     SAPI_RAW_PCHECK(umount2(".", MNT_DETACH) != -1, "detaching old root");
283 
284     if (clone_flags & CLONE_NEWUSER) {
285       // Also CLONE_NEWNS so that / mount becomes locked
286       SAPI_RAW_PCHECK(unshare(CLONE_NEWUSER | CLONE_NEWNS) != -1,
287                       "unshare(CLONE_NEWUSER | CLONE_NEWNS)");
288       // Setup ID maps using reference to /proc/self obatined earlier
289       file_util::fileops::FDCloser setgroups_fd(TEMP_FAILURE_RETRY(
290           openat(proc_self_fd.get(), "setgroups", O_WRONLY | O_CLOEXEC)));
291       // We ignore errors since they are most likely due to an old kernel.
292       if (setgroups_fd.get() != -1) {
293         dprintf(setgroups_fd.get(), "deny");
294       }
295       file_util::fileops::FDCloser uid_map_fd(
296           TEMP_FAILURE_RETRY(openat(proc_self_fd.get(), "uid_map", O_WRONLY)));
297       SAPI_RAW_PCHECK(uid_map_fd.get() != -1, "Couldn't open uid_map");
298       SAPI_RAW_PCHECK(dprintf(uid_map_fd.get(), "1000 1000 1") >= 0,
299                       "Could not write uid_map");
300       file_util::fileops::FDCloser gid_map_fd(
301           TEMP_FAILURE_RETRY(openat(proc_self_fd.get(), "gid_map", O_WRONLY)));
302       SAPI_RAW_PCHECK(gid_map_fd.get() != -1, "Couldn't open gid_map");
303       SAPI_RAW_PCHECK(dprintf(gid_map_fd.get(), "1000 1000 1") >= 0,
304                       "Could not write gid_map");
305     }
306   } else {
307     // This requires some explanation: It's actually possible to pivot_root('/',
308     // '/'). After this operation has been completed, the old root is mounted
309     // over the new root, and it's OK to simply umount('/') now, and to have
310     // new_root as '/'. This allows us not care about providing any special
311     // directory for old_root, which is sometimes not easy, given that e.g. /tmp
312     // might not always be present inside new_root.
313     SAPI_RAW_PCHECK(syscall(__NR_pivot_root, kSandbox2ChrootPath,
314                             kSandbox2ChrootPath) != -1,
315                     "pivot root");
316     SAPI_RAW_PCHECK(umount2("/", MNT_DETACH) != -1, "detaching old root");
317   }
318 
319   SAPI_RAW_PCHECK(chdir("/") == 0,
320                   "changing cwd after mntns initialization failed");
321 
322   if (allow_mount_propagation) {
323     SAPI_RAW_PCHECK(mount("/", "/", "", MS_SLAVE | MS_REC, nullptr) == 0,
324                     "changing mount propagation to slave failed");
325   } else {
326     SAPI_RAW_PCHECK(mount("/", "/", "", MS_PRIVATE | MS_REC, nullptr) == 0,
327                     "changing mount propagation to private failed");
328   }
329 
330   if (SAPI_RAW_VLOG_IS_ON(2)) {
331     SAPI_RAW_VLOG(2, "Dumping the sandboxee's filesystem:");
332     LogFilesystem("/");
333   }
334 }
335 
InitializeInitialNamespaces(uid_t uid,gid_t gid)336 void Namespace::InitializeInitialNamespaces(uid_t uid, gid_t gid) {
337   SetupIDMaps(uid, gid);
338   SAPI_RAW_CHECK(
339       file_util::fileops::CreateDirectoryRecursively(kSandbox2ChrootPath, 0700),
340       "could not create directory for rootfs");
341   SAPI_RAW_PCHECK(mount("none", kSandbox2ChrootPath, "tmpfs", 0, nullptr) == 0,
342                   "mounting rootfs failed");
343   auto realroot_path = file::JoinPath(kSandbox2ChrootPath, "/realroot");
344   SAPI_RAW_CHECK(
345       file_util::fileops::CreateDirectoryRecursively(realroot_path, 0700),
346       "could not create directory for real root");
347   SAPI_RAW_PCHECK(syscall(__NR_pivot_root, kSandbox2ChrootPath,
348                           realroot_path.c_str()) != -1,
349                   "pivot root");
350   SAPI_RAW_PCHECK(symlink("/realroot/proc", "/proc") != -1, "symlinking /proc");
351   SAPI_RAW_PCHECK(
352       mount("/", "/", "", MS_BIND | MS_REMOUNT | MS_RDONLY, nullptr) == 0,
353       "remounting rootfs read-only failed");
354 }
355 
GetNamespaceDescription(NamespaceDescription * pb_description) const356 void Namespace::GetNamespaceDescription(
357     NamespaceDescription* pb_description) const {
358   pb_description->set_clone_flags(clone_flags_);
359   *pb_description->mutable_mount_tree_mounts() = mounts_.GetMountTree();
360 }
361 
362 }  // namespace sandbox2
363