| // Copyright 2019 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // Implementation of the sandbox2::Policy class. |
| |
| #include "sandboxed_api/sandbox2/policy.h" |
| |
| #include <fcntl.h> |
| #include <linux/audit.h> |
| #include <linux/bpf_common.h> |
| #include <linux/filter.h> |
| #include <linux/seccomp.h> |
| #include <sched.h> |
| #include <syscall.h> |
| |
| #include <cstdint> |
| #include <limits> |
| #include <optional> |
| #include <string> |
| #include <vector> |
| |
| #include "absl/flags/flag.h" |
| #include "absl/log/log.h" |
| #include "absl/strings/string_view.h" |
| #include "sandboxed_api/config.h" |
| #include "sandboxed_api/sandbox2/bpfdisassembler.h" |
| #include "sandboxed_api/sandbox2/comms.h" |
| #include "sandboxed_api/sandbox2/syscall.h" |
| #include "sandboxed_api/sandbox2/util/bpf_helper.h" |
| #include "sandboxed_api/util/raw_logging.h" |
| |
| #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER |
| #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) |
| #endif |
| |
| #ifndef SECCOMP_RET_USER_NOTIF |
| #define SECCOMP_RET_USER_NOTIF 0x7fc00000U /* notifies userspace */ |
| #endif |
| |
| #define DO_USER_NOTIF BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_USER_NOTIF) |
| |
| ABSL_FLAG(bool, sandbox2_danger_danger_permit_all, false, |
| "Allow all syscalls, useful for testing"); |
| ABSL_FLAG(std::string, sandbox2_danger_danger_permit_all_and_log, "", |
| "Allow all syscalls and log them into specified file"); |
| |
| namespace sandbox2 { |
| |
| // The final policy is the concatenation of: |
| // 1. default policy (GetDefaultPolicy, private), |
| // 2. user policy (user_policy_, public), |
| // 3. default KILL action (avoid failing open if user policy did not do it). |
| std::vector<sock_filter> Policy::GetPolicy(bool user_notif) const { |
| if (absl::GetFlag(FLAGS_sandbox2_danger_danger_permit_all) || |
| !absl::GetFlag(FLAGS_sandbox2_danger_danger_permit_all_and_log).empty()) { |
| return GetTrackingPolicy(); |
| } |
| |
| // Now we can start building the policy. |
| // 1. Start with the default policy (e.g. syscall architecture checks). |
| auto policy = GetDefaultPolicy(user_notif); |
| VLOG(3) << "Default policy:\n" << bpf::Disasm(policy); |
| |
| // 2. Append user policy. |
| VLOG(3) << "User policy:\n" << bpf::Disasm(user_policy_); |
| // Add default syscall_nr loading in case the user forgets. |
| policy.push_back(LOAD_SYSCALL_NR); |
| policy.insert(policy.end(), user_policy_.begin(), user_policy_.end()); |
| |
| // 3. Finish with default KILL action. |
| policy.push_back(KILL); |
| |
| // In seccomp_unotify mode replace all KILLS with unotify |
| if (user_notif) { |
| for (sock_filter& filter : policy) { |
| if (filter.code == BPF_RET + BPF_K && filter.k == SECCOMP_RET_KILL) { |
| filter = DO_USER_NOTIF; |
| } |
| } |
| } |
| |
| VLOG(2) << "Final policy:\n" << bpf::Disasm(policy); |
| return policy; |
| } |
| |
| // If you modify this function, you should also modify. |
| // Monitor::LogAccessViolation to keep them in sync. |
| // |
| // Produces a policy which returns SECCOMP_RET_TRACE instead of SECCOMP_RET_KILL |
| // for the __NR_execve syscall, so the tracer can make a decision to allow or |
| // disallow it depending on which occurrence of __NR_execve it was. |
| // LINT.IfChange |
| std::vector<sock_filter> Policy::GetDefaultPolicy(bool user_notif) const { |
| bpf_labels l = {0}; |
| |
| std::vector<sock_filter> policy; |
| if (user_notif) { |
| policy = { |
| // If compiled arch is different from the runtime one, inform the |
| // Monitor. |
| LOAD_ARCH, |
| JNE32(Syscall::GetHostAuditArch(), DENY), |
| LOAD_SYSCALL_NR, |
| // TODO(b/271400371) Use NOTIF_FLAG_CONTINUE once generally available |
| JNE32(__NR_seccomp, JUMP(&l, past_seccomp_l)), |
| ARG_32(3), |
| JNE32(internal::kExecveMagic, JUMP(&l, past_seccomp_l)), |
| ALLOW, |
| LABEL(&l, past_seccomp_l), |
| LOAD_SYSCALL_NR, |
| JNE32(__NR_execveat, JUMP(&l, past_execveat_l)), |
| ARG_32(4), |
| JNE32(AT_EMPTY_PATH, JUMP(&l, past_execveat_l)), |
| ARG_32(5), |
| JNE32(internal::kExecveMagic, JUMP(&l, past_execveat_l)), |
| ALLOW, |
| LABEL(&l, past_execveat_l), |
| |
| LOAD_SYSCALL_NR, |
| }; |
| } else { |
| policy = { |
| // If compiled arch is different from the runtime one, inform the Monitor. |
| LOAD_ARCH, |
| JEQ32(Syscall::GetHostAuditArch(), JUMP(&l, past_arch_check_l)), |
| #if defined(SAPI_X86_64) |
| JEQ32(AUDIT_ARCH_I386, TRACE(sapi::cpu::kX86)), // 32-bit sandboxee |
| #endif |
| TRACE(sapi::cpu::kUnknown), |
| LABEL(&l, past_arch_check_l), |
| |
| // After the policy is uploaded, forkserver will execve the sandboxee. We |
| // need to allow this execve but not others. Since BPF does not have |
| // state, we need to inform the Monitor to decide, and for that we use a |
| // magic value in syscall args 5. Note that this value is not supposed to |
| // be secret, but just an optimization so that the monitor is not |
| // triggered on every call to execveat. |
| LOAD_SYSCALL_NR, |
| JNE32(__NR_execveat, JUMP(&l, past_execveat_l)), |
| ARG_32(4), |
| JNE32(AT_EMPTY_PATH, JUMP(&l, past_execveat_l)), |
| ARG_32(5), |
| JNE32(internal::kExecveMagic, JUMP(&l, past_execveat_l)), |
| SANDBOX2_TRACE, |
| LABEL(&l, past_execveat_l), |
| |
| LOAD_SYSCALL_NR, |
| }; |
| } |
| |
| // Forbid ptrace because it's unsafe or too risky. The user policy can only |
| // block (i.e. return an error instead of killing the process) but not allow |
| // ptrace. This uses LOAD_SYSCALL_NR from above. |
| if (!user_policy_handles_ptrace_) { |
| policy.insert(policy.end(), {JEQ32(__NR_ptrace, DENY)}); |
| } |
| |
| // If user policy doesn't mention it, then forbid bpf because it's unsafe or |
| // too risky. This uses LOAD_SYSCALL_NR from above. |
| if (!user_policy_handles_bpf_) { |
| policy.insert(policy.end(), {JEQ32(__NR_bpf, DENY)}); |
| } |
| #ifndef CLONE_NEWCGROUP |
| #define CLONE_NEWCGROUP 0x02000000 |
| #endif |
| constexpr uintptr_t kNewNamespacesFlags = |
| CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWUTS | |
| CLONE_NEWCGROUP | CLONE_NEWIPC | CLONE_NEWPID; |
| static_assert(kNewNamespacesFlags <= std::numeric_limits<uint32_t>::max()); |
| constexpr uintptr_t kUnsafeCloneFlags = kNewNamespacesFlags | CLONE_UNTRACED; |
| static_assert(kUnsafeCloneFlags <= std::numeric_limits<uint32_t>::max()); |
| policy.insert(policy.end(), |
| { |
| #ifdef __NR_clone3 |
| // Disallow clone3. Errno instead of DENY so that libraries |
| // can fallback to regular clone/clone2. |
| JEQ32(__NR_clone3, ERRNO(ENOSYS)), |
| #endif |
| // Disallow clone3 and clone with unsafe flags. This uses |
| // LOAD_SYSCALL_NR from above. |
| JNE32(__NR_clone, JUMP(&l, past_clone_unsafe_l)), |
| // Regardless of arch, we only care about the lower 32-bits |
| // of the flags. |
| ARG_32(0), |
| JA32(kUnsafeCloneFlags, DENY), |
| LABEL(&l, past_clone_unsafe_l), |
| // Disallow unshare with unsafe flags. |
| LOAD_SYSCALL_NR, |
| JNE32(__NR_unshare, JUMP(&l, past_unshare_unsafe_l)), |
| // Regardless of arch, we only care about the lower 32-bits |
| // of the flags. |
| ARG_32(0), |
| JA32(kNewNamespacesFlags, DENY), |
| LABEL(&l, past_unshare_unsafe_l), |
| // Disallow seccomp with SECCOMP_FILTER_FLAG_NEW_LISTENER |
| // flag. |
| LOAD_SYSCALL_NR, |
| JNE32(__NR_seccomp, JUMP(&l, past_seccomp_new_listener)), |
| // Regardless of arch, we only care about the lower 32-bits |
| // of the flags. |
| ARG_32(1), |
| JA32(SECCOMP_FILTER_FLAG_NEW_LISTENER, DENY), |
| LABEL(&l, past_seccomp_new_listener), |
| }); |
| |
| if (bpf_resolve_jumps(&l, policy.data(), policy.size()) != 0) { |
| LOG(FATAL) << "Cannot resolve bpf jumps"; |
| } |
| |
| return policy; |
| } |
| // LINT.ThenChange(monitor_ptrace.cc) |
| |
| std::vector<sock_filter> Policy::GetTrackingPolicy() const { |
| return { |
| LOAD_ARCH, |
| #if defined(SAPI_X86_64) |
| JEQ32(AUDIT_ARCH_X86_64, TRACE(sapi::cpu::kX8664)), |
| JEQ32(AUDIT_ARCH_I386, TRACE(sapi::cpu::kX86)), |
| #elif defined(SAPI_PPC64_LE) |
| JEQ32(AUDIT_ARCH_PPC64LE, TRACE(sapi::cpu::kPPC64LE)), |
| #elif defined(SAPI_ARM64) |
| JEQ32(AUDIT_ARCH_AARCH64, TRACE(sapi::cpu::kArm64)), |
| #elif defined(SAPI_ARM) |
| JEQ32(AUDIT_ARCH_ARM, TRACE(sapi::cpu::kArm)), |
| #endif |
| TRACE(sapi::cpu::kUnknown), |
| }; |
| } |
| |
| bool Policy::SendPolicy(Comms* comms, bool user_notif) const { |
| auto policy = GetPolicy(user_notif); |
| if (!comms->SendBytes( |
| reinterpret_cast<uint8_t*>(policy.data()), |
| static_cast<uint64_t>(policy.size()) * sizeof(sock_filter))) { |
| LOG(ERROR) << "Couldn't send policy"; |
| return false; |
| } |
| |
| return true; |
| } |
| |
| void Policy::GetPolicyDescription(PolicyDescription* policy) const { |
| policy->set_user_bpf_policy(user_policy_.data(), |
| user_policy_.size() * sizeof(sock_filter)); |
| if (policy_builder_description_) { |
| *policy->mutable_policy_builder_description() = |
| *policy_builder_description_; |
| } |
| |
| if (namespace_) { |
| namespace_->GetNamespaceDescription( |
| policy->mutable_namespace_description()); |
| } |
| } |
| |
| } // namespace sandbox2 |