xref: /aosp_15_r20/external/sandboxed-api/sandboxed_api/sandbox2/policy.cc (revision ec63e07ab9515d95e79c211197c445ef84cefa6a)
1*ec63e07aSXin Li // Copyright 2019 Google LLC
2*ec63e07aSXin Li //
3*ec63e07aSXin Li // Licensed under the Apache License, Version 2.0 (the "License");
4*ec63e07aSXin Li // you may not use this file except in compliance with the License.
5*ec63e07aSXin Li // You may obtain a copy of the License at
6*ec63e07aSXin Li //
7*ec63e07aSXin Li //     https://www.apache.org/licenses/LICENSE-2.0
8*ec63e07aSXin Li //
9*ec63e07aSXin Li // Unless required by applicable law or agreed to in writing, software
10*ec63e07aSXin Li // distributed under the License is distributed on an "AS IS" BASIS,
11*ec63e07aSXin Li // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*ec63e07aSXin Li // See the License for the specific language governing permissions and
13*ec63e07aSXin Li // limitations under the License.
14*ec63e07aSXin Li 
15*ec63e07aSXin Li // Implementation of the sandbox2::Policy class.
16*ec63e07aSXin Li 
17*ec63e07aSXin Li #include "sandboxed_api/sandbox2/policy.h"
18*ec63e07aSXin Li 
19*ec63e07aSXin Li #include <fcntl.h>
20*ec63e07aSXin Li #include <linux/audit.h>
21*ec63e07aSXin Li #include <linux/bpf_common.h>
22*ec63e07aSXin Li #include <linux/filter.h>
23*ec63e07aSXin Li #include <linux/seccomp.h>
24*ec63e07aSXin Li #include <sched.h>
25*ec63e07aSXin Li #include <syscall.h>
26*ec63e07aSXin Li 
27*ec63e07aSXin Li #include <cstdint>
28*ec63e07aSXin Li #include <limits>
29*ec63e07aSXin Li #include <optional>
30*ec63e07aSXin Li #include <string>
31*ec63e07aSXin Li #include <vector>
32*ec63e07aSXin Li 
33*ec63e07aSXin Li #include "absl/flags/flag.h"
34*ec63e07aSXin Li #include "absl/log/log.h"
35*ec63e07aSXin Li #include "absl/strings/string_view.h"
36*ec63e07aSXin Li #include "sandboxed_api/config.h"
37*ec63e07aSXin Li #include "sandboxed_api/sandbox2/bpfdisassembler.h"
38*ec63e07aSXin Li #include "sandboxed_api/sandbox2/comms.h"
39*ec63e07aSXin Li #include "sandboxed_api/sandbox2/syscall.h"
40*ec63e07aSXin Li #include "sandboxed_api/sandbox2/util/bpf_helper.h"
41*ec63e07aSXin Li #include "sandboxed_api/util/raw_logging.h"
42*ec63e07aSXin Li 
43*ec63e07aSXin Li #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
44*ec63e07aSXin Li #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
45*ec63e07aSXin Li #endif
46*ec63e07aSXin Li 
47*ec63e07aSXin Li #ifndef SECCOMP_RET_USER_NOTIF
48*ec63e07aSXin Li #define SECCOMP_RET_USER_NOTIF 0x7fc00000U /* notifies userspace */
49*ec63e07aSXin Li #endif
50*ec63e07aSXin Li 
51*ec63e07aSXin Li #define DO_USER_NOTIF BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_USER_NOTIF)
52*ec63e07aSXin Li 
53*ec63e07aSXin Li ABSL_FLAG(bool, sandbox2_danger_danger_permit_all, false,
54*ec63e07aSXin Li           "Allow all syscalls, useful for testing");
55*ec63e07aSXin Li ABSL_FLAG(std::string, sandbox2_danger_danger_permit_all_and_log, "",
56*ec63e07aSXin Li           "Allow all syscalls and log them into specified file");
57*ec63e07aSXin Li 
58*ec63e07aSXin Li namespace sandbox2 {
59*ec63e07aSXin Li 
60*ec63e07aSXin Li // The final policy is the concatenation of:
61*ec63e07aSXin Li //   1. default policy (GetDefaultPolicy, private),
62*ec63e07aSXin Li //   2. user policy (user_policy_, public),
63*ec63e07aSXin Li //   3. default KILL action (avoid failing open if user policy did not do it).
GetPolicy(bool user_notif) const64*ec63e07aSXin Li std::vector<sock_filter> Policy::GetPolicy(bool user_notif) const {
65*ec63e07aSXin Li   if (absl::GetFlag(FLAGS_sandbox2_danger_danger_permit_all) ||
66*ec63e07aSXin Li       !absl::GetFlag(FLAGS_sandbox2_danger_danger_permit_all_and_log).empty()) {
67*ec63e07aSXin Li     return GetTrackingPolicy();
68*ec63e07aSXin Li   }
69*ec63e07aSXin Li 
70*ec63e07aSXin Li   // Now we can start building the policy.
71*ec63e07aSXin Li   // 1. Start with the default policy (e.g. syscall architecture checks).
72*ec63e07aSXin Li   auto policy = GetDefaultPolicy(user_notif);
73*ec63e07aSXin Li   VLOG(3) << "Default policy:\n" << bpf::Disasm(policy);
74*ec63e07aSXin Li 
75*ec63e07aSXin Li   // 2. Append user policy.
76*ec63e07aSXin Li   VLOG(3) << "User policy:\n" << bpf::Disasm(user_policy_);
77*ec63e07aSXin Li   // Add default syscall_nr loading in case the user forgets.
78*ec63e07aSXin Li   policy.push_back(LOAD_SYSCALL_NR);
79*ec63e07aSXin Li   policy.insert(policy.end(), user_policy_.begin(), user_policy_.end());
80*ec63e07aSXin Li 
81*ec63e07aSXin Li   // 3. Finish with default KILL action.
82*ec63e07aSXin Li   policy.push_back(KILL);
83*ec63e07aSXin Li 
84*ec63e07aSXin Li   // In seccomp_unotify mode replace all KILLS with unotify
85*ec63e07aSXin Li   if (user_notif) {
86*ec63e07aSXin Li     for (sock_filter& filter : policy) {
87*ec63e07aSXin Li       if (filter.code == BPF_RET + BPF_K && filter.k == SECCOMP_RET_KILL) {
88*ec63e07aSXin Li         filter = DO_USER_NOTIF;
89*ec63e07aSXin Li       }
90*ec63e07aSXin Li     }
91*ec63e07aSXin Li   }
92*ec63e07aSXin Li 
93*ec63e07aSXin Li   VLOG(2) << "Final policy:\n" << bpf::Disasm(policy);
94*ec63e07aSXin Li   return policy;
95*ec63e07aSXin Li }
96*ec63e07aSXin Li 
97*ec63e07aSXin Li // If you modify this function, you should also modify.
98*ec63e07aSXin Li // Monitor::LogAccessViolation to keep them in sync.
99*ec63e07aSXin Li //
100*ec63e07aSXin Li // Produces a policy which returns SECCOMP_RET_TRACE instead of SECCOMP_RET_KILL
101*ec63e07aSXin Li // for the __NR_execve syscall, so the tracer can make a decision to allow or
102*ec63e07aSXin Li // disallow it depending on which occurrence of __NR_execve it was.
103*ec63e07aSXin Li // LINT.IfChange
GetDefaultPolicy(bool user_notif) const104*ec63e07aSXin Li std::vector<sock_filter> Policy::GetDefaultPolicy(bool user_notif) const {
105*ec63e07aSXin Li   bpf_labels l = {0};
106*ec63e07aSXin Li 
107*ec63e07aSXin Li   std::vector<sock_filter> policy;
108*ec63e07aSXin Li   if (user_notif) {
109*ec63e07aSXin Li     policy = {
110*ec63e07aSXin Li         // If compiled arch is different from the runtime one, inform the
111*ec63e07aSXin Li         // Monitor.
112*ec63e07aSXin Li         LOAD_ARCH,
113*ec63e07aSXin Li         JNE32(Syscall::GetHostAuditArch(), DENY),
114*ec63e07aSXin Li         LOAD_SYSCALL_NR,
115*ec63e07aSXin Li         // TODO(b/271400371) Use NOTIF_FLAG_CONTINUE once generally available
116*ec63e07aSXin Li         JNE32(__NR_seccomp, JUMP(&l, past_seccomp_l)),
117*ec63e07aSXin Li         ARG_32(3),
118*ec63e07aSXin Li         JNE32(internal::kExecveMagic, JUMP(&l, past_seccomp_l)),
119*ec63e07aSXin Li         ALLOW,
120*ec63e07aSXin Li         LABEL(&l, past_seccomp_l),
121*ec63e07aSXin Li         LOAD_SYSCALL_NR,
122*ec63e07aSXin Li         JNE32(__NR_execveat, JUMP(&l, past_execveat_l)),
123*ec63e07aSXin Li         ARG_32(4),
124*ec63e07aSXin Li         JNE32(AT_EMPTY_PATH, JUMP(&l, past_execveat_l)),
125*ec63e07aSXin Li         ARG_32(5),
126*ec63e07aSXin Li         JNE32(internal::kExecveMagic, JUMP(&l, past_execveat_l)),
127*ec63e07aSXin Li         ALLOW,
128*ec63e07aSXin Li         LABEL(&l, past_execveat_l),
129*ec63e07aSXin Li 
130*ec63e07aSXin Li         LOAD_SYSCALL_NR,
131*ec63e07aSXin Li     };
132*ec63e07aSXin Li   } else {
133*ec63e07aSXin Li     policy = {
134*ec63e07aSXin Li       // If compiled arch is different from the runtime one, inform the Monitor.
135*ec63e07aSXin Li       LOAD_ARCH,
136*ec63e07aSXin Li       JEQ32(Syscall::GetHostAuditArch(), JUMP(&l, past_arch_check_l)),
137*ec63e07aSXin Li #if defined(SAPI_X86_64)
138*ec63e07aSXin Li       JEQ32(AUDIT_ARCH_I386, TRACE(sapi::cpu::kX86)),  // 32-bit sandboxee
139*ec63e07aSXin Li #endif
140*ec63e07aSXin Li       TRACE(sapi::cpu::kUnknown),
141*ec63e07aSXin Li       LABEL(&l, past_arch_check_l),
142*ec63e07aSXin Li 
143*ec63e07aSXin Li       // After the policy is uploaded, forkserver will execve the sandboxee. We
144*ec63e07aSXin Li       // need to allow this execve but not others. Since BPF does not have
145*ec63e07aSXin Li       // state, we need to inform the Monitor to decide, and for that we use a
146*ec63e07aSXin Li       // magic value in syscall args 5. Note that this value is not supposed to
147*ec63e07aSXin Li       // be secret, but just an optimization so that the monitor is not
148*ec63e07aSXin Li       // triggered on every call to execveat.
149*ec63e07aSXin Li       LOAD_SYSCALL_NR,
150*ec63e07aSXin Li       JNE32(__NR_execveat, JUMP(&l, past_execveat_l)),
151*ec63e07aSXin Li       ARG_32(4),
152*ec63e07aSXin Li       JNE32(AT_EMPTY_PATH, JUMP(&l, past_execveat_l)),
153*ec63e07aSXin Li       ARG_32(5),
154*ec63e07aSXin Li       JNE32(internal::kExecveMagic, JUMP(&l, past_execveat_l)),
155*ec63e07aSXin Li       SANDBOX2_TRACE,
156*ec63e07aSXin Li       LABEL(&l, past_execveat_l),
157*ec63e07aSXin Li 
158*ec63e07aSXin Li       LOAD_SYSCALL_NR,
159*ec63e07aSXin Li     };
160*ec63e07aSXin Li   }
161*ec63e07aSXin Li 
162*ec63e07aSXin Li   // Forbid ptrace because it's unsafe or too risky. The user policy can only
163*ec63e07aSXin Li   // block (i.e. return an error instead of killing the process) but not allow
164*ec63e07aSXin Li   // ptrace. This uses LOAD_SYSCALL_NR from above.
165*ec63e07aSXin Li   if (!user_policy_handles_ptrace_) {
166*ec63e07aSXin Li     policy.insert(policy.end(), {JEQ32(__NR_ptrace, DENY)});
167*ec63e07aSXin Li   }
168*ec63e07aSXin Li 
169*ec63e07aSXin Li   // If user policy doesn't mention it, then forbid bpf because it's unsafe or
170*ec63e07aSXin Li   // too risky.  This uses LOAD_SYSCALL_NR from above.
171*ec63e07aSXin Li   if (!user_policy_handles_bpf_) {
172*ec63e07aSXin Li     policy.insert(policy.end(), {JEQ32(__NR_bpf, DENY)});
173*ec63e07aSXin Li   }
174*ec63e07aSXin Li #ifndef CLONE_NEWCGROUP
175*ec63e07aSXin Li #define CLONE_NEWCGROUP 0x02000000
176*ec63e07aSXin Li #endif
177*ec63e07aSXin Li   constexpr uintptr_t kNewNamespacesFlags =
178*ec63e07aSXin Li       CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWUTS |
179*ec63e07aSXin Li       CLONE_NEWCGROUP | CLONE_NEWIPC | CLONE_NEWPID;
180*ec63e07aSXin Li   static_assert(kNewNamespacesFlags <= std::numeric_limits<uint32_t>::max());
181*ec63e07aSXin Li   constexpr uintptr_t kUnsafeCloneFlags = kNewNamespacesFlags | CLONE_UNTRACED;
182*ec63e07aSXin Li   static_assert(kUnsafeCloneFlags <= std::numeric_limits<uint32_t>::max());
183*ec63e07aSXin Li   policy.insert(policy.end(),
184*ec63e07aSXin Li                 {
185*ec63e07aSXin Li #ifdef __NR_clone3
186*ec63e07aSXin Li                     // Disallow clone3. Errno instead of DENY so that libraries
187*ec63e07aSXin Li                     // can fallback to regular clone/clone2.
188*ec63e07aSXin Li                     JEQ32(__NR_clone3, ERRNO(ENOSYS)),
189*ec63e07aSXin Li #endif
190*ec63e07aSXin Li                     // Disallow clone3 and clone with unsafe flags.  This uses
191*ec63e07aSXin Li                     // LOAD_SYSCALL_NR from above.
192*ec63e07aSXin Li                     JNE32(__NR_clone, JUMP(&l, past_clone_unsafe_l)),
193*ec63e07aSXin Li                     // Regardless of arch, we only care about the lower 32-bits
194*ec63e07aSXin Li                     // of the flags.
195*ec63e07aSXin Li                     ARG_32(0),
196*ec63e07aSXin Li                     JA32(kUnsafeCloneFlags, DENY),
197*ec63e07aSXin Li                     LABEL(&l, past_clone_unsafe_l),
198*ec63e07aSXin Li                     // Disallow unshare with unsafe flags.
199*ec63e07aSXin Li                     LOAD_SYSCALL_NR,
200*ec63e07aSXin Li                     JNE32(__NR_unshare, JUMP(&l, past_unshare_unsafe_l)),
201*ec63e07aSXin Li                     // Regardless of arch, we only care about the lower 32-bits
202*ec63e07aSXin Li                     // of the flags.
203*ec63e07aSXin Li                     ARG_32(0),
204*ec63e07aSXin Li                     JA32(kNewNamespacesFlags, DENY),
205*ec63e07aSXin Li                     LABEL(&l, past_unshare_unsafe_l),
206*ec63e07aSXin Li                     // Disallow seccomp with SECCOMP_FILTER_FLAG_NEW_LISTENER
207*ec63e07aSXin Li                     // flag.
208*ec63e07aSXin Li                     LOAD_SYSCALL_NR,
209*ec63e07aSXin Li                     JNE32(__NR_seccomp, JUMP(&l, past_seccomp_new_listener)),
210*ec63e07aSXin Li                     // Regardless of arch, we only care about the lower 32-bits
211*ec63e07aSXin Li                     // of the flags.
212*ec63e07aSXin Li                     ARG_32(1),
213*ec63e07aSXin Li                     JA32(SECCOMP_FILTER_FLAG_NEW_LISTENER, DENY),
214*ec63e07aSXin Li                     LABEL(&l, past_seccomp_new_listener),
215*ec63e07aSXin Li                 });
216*ec63e07aSXin Li 
217*ec63e07aSXin Li   if (bpf_resolve_jumps(&l, policy.data(), policy.size()) != 0) {
218*ec63e07aSXin Li     LOG(FATAL) << "Cannot resolve bpf jumps";
219*ec63e07aSXin Li   }
220*ec63e07aSXin Li 
221*ec63e07aSXin Li   return policy;
222*ec63e07aSXin Li }
223*ec63e07aSXin Li // LINT.ThenChange(monitor_ptrace.cc)
224*ec63e07aSXin Li 
GetTrackingPolicy() const225*ec63e07aSXin Li std::vector<sock_filter> Policy::GetTrackingPolicy() const {
226*ec63e07aSXin Li   return {
227*ec63e07aSXin Li     LOAD_ARCH,
228*ec63e07aSXin Li #if defined(SAPI_X86_64)
229*ec63e07aSXin Li         JEQ32(AUDIT_ARCH_X86_64, TRACE(sapi::cpu::kX8664)),
230*ec63e07aSXin Li         JEQ32(AUDIT_ARCH_I386, TRACE(sapi::cpu::kX86)),
231*ec63e07aSXin Li #elif defined(SAPI_PPC64_LE)
232*ec63e07aSXin Li         JEQ32(AUDIT_ARCH_PPC64LE, TRACE(sapi::cpu::kPPC64LE)),
233*ec63e07aSXin Li #elif defined(SAPI_ARM64)
234*ec63e07aSXin Li         JEQ32(AUDIT_ARCH_AARCH64, TRACE(sapi::cpu::kArm64)),
235*ec63e07aSXin Li #elif defined(SAPI_ARM)
236*ec63e07aSXin Li         JEQ32(AUDIT_ARCH_ARM, TRACE(sapi::cpu::kArm)),
237*ec63e07aSXin Li #endif
238*ec63e07aSXin Li         TRACE(sapi::cpu::kUnknown),
239*ec63e07aSXin Li   };
240*ec63e07aSXin Li }
241*ec63e07aSXin Li 
SendPolicy(Comms * comms,bool user_notif) const242*ec63e07aSXin Li bool Policy::SendPolicy(Comms* comms, bool user_notif) const {
243*ec63e07aSXin Li   auto policy = GetPolicy(user_notif);
244*ec63e07aSXin Li   if (!comms->SendBytes(
245*ec63e07aSXin Li           reinterpret_cast<uint8_t*>(policy.data()),
246*ec63e07aSXin Li           static_cast<uint64_t>(policy.size()) * sizeof(sock_filter))) {
247*ec63e07aSXin Li     LOG(ERROR) << "Couldn't send policy";
248*ec63e07aSXin Li     return false;
249*ec63e07aSXin Li   }
250*ec63e07aSXin Li 
251*ec63e07aSXin Li   return true;
252*ec63e07aSXin Li }
253*ec63e07aSXin Li 
GetPolicyDescription(PolicyDescription * policy) const254*ec63e07aSXin Li void Policy::GetPolicyDescription(PolicyDescription* policy) const {
255*ec63e07aSXin Li   policy->set_user_bpf_policy(user_policy_.data(),
256*ec63e07aSXin Li                               user_policy_.size() * sizeof(sock_filter));
257*ec63e07aSXin Li   if (policy_builder_description_) {
258*ec63e07aSXin Li     *policy->mutable_policy_builder_description() =
259*ec63e07aSXin Li         *policy_builder_description_;
260*ec63e07aSXin Li   }
261*ec63e07aSXin Li 
262*ec63e07aSXin Li   if (namespace_) {
263*ec63e07aSXin Li     namespace_->GetNamespaceDescription(
264*ec63e07aSXin Li         policy->mutable_namespace_description());
265*ec63e07aSXin Li   }
266*ec63e07aSXin Li }
267*ec63e07aSXin Li 
268*ec63e07aSXin Li }  // namespace sandbox2
269