1 // Copyright 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // Implementation file for the sandbox2::MonitorBase class.
16
17 #include "sandboxed_api/sandbox2/monitor_base.h"
18
19 #include <pthread.h>
20 #include <sched.h>
21 #include <signal.h>
22 #include <sys/resource.h>
23 #include <syscall.h>
24
25 #include <cerrno>
26 #include <cstdint>
27 #include <cstdio>
28 #include <iomanip>
29 #include <memory>
30 #include <optional>
31 #include <string>
32 #include <utility>
33 #include <vector>
34
35 #include "absl/cleanup/cleanup.h"
36 #include "absl/flags/declare.h"
37 #include "absl/flags/flag.h"
38 #include "absl/log/check.h"
39 #include "absl/log/log.h"
40 #include "absl/status/status.h"
41 #include "absl/status/statusor.h"
42 #include "absl/strings/match.h"
43 #include "absl/strings/str_cat.h"
44 #include "absl/strings/string_view.h"
45 #include "absl/synchronization/notification.h"
46 #include "absl/time/time.h"
47 #include "sandboxed_api/sandbox2/client.h"
48 #include "sandboxed_api/sandbox2/comms.h"
49 #include "sandboxed_api/sandbox2/executor.h"
50 #include "sandboxed_api/sandbox2/limits.h"
51 #include "sandboxed_api/sandbox2/mounts.h"
52 #include "sandboxed_api/sandbox2/namespace.h"
53 #include "sandboxed_api/sandbox2/network_proxy/client.h"
54 #include "sandboxed_api/sandbox2/network_proxy/server.h"
55 #include "sandboxed_api/sandbox2/notify.h"
56 #include "sandboxed_api/sandbox2/policy.h"
57 #include "sandboxed_api/sandbox2/result.h"
58 #include "sandboxed_api/sandbox2/stack_trace.h"
59 #include "sandboxed_api/sandbox2/syscall.h"
60 #include "sandboxed_api/sandbox2/util.h"
61 #include "sandboxed_api/util/file_helpers.h"
62 #include "sandboxed_api/util/raw_logging.h"
63 #include "sandboxed_api/util/strerror.h"
64 #include "sandboxed_api/util/temp_file.h"
65
66 ABSL_FLAG(bool, sandbox2_report_on_sandboxee_signal, true,
67 "Report sandbox2 sandboxee deaths caused by signals");
68
69 ABSL_FLAG(bool, sandbox2_report_on_sandboxee_timeout, true,
70 "Report sandbox2 sandboxee timeouts");
71
72 ABSL_DECLARE_FLAG(bool, sandbox2_danger_danger_permit_all);
73 ABSL_DECLARE_FLAG(std::string, sandbox2_danger_danger_permit_all_and_log);
74
75 ABSL_DECLARE_FLAG(bool, sandbox_libunwind_crash_handler);
76
77 namespace sandbox2 {
78 namespace {
79
MaybeEnableTomoyoLsmWorkaround(Mounts & mounts,std::string & comms_fd_dev)80 void MaybeEnableTomoyoLsmWorkaround(Mounts& mounts, std::string& comms_fd_dev) {
81 static auto tomoyo_active = []() -> bool {
82 std::string lsm_list;
83 if (auto status = sapi::file::GetContents(
84 "/sys/kernel/security/lsm", &lsm_list, sapi::file::Defaults());
85 !status.ok() && !absl::IsNotFound(status)) {
86 VLOG(1) << "Checking active LSMs failed: " << status.message() << ": "
87 << sapi::StrError(errno);
88 return false;
89 }
90 return absl::StrContains(lsm_list, "tomoyo");
91 }();
92
93 if (!tomoyo_active) {
94 return;
95 }
96 VLOG(1) << "Tomoyo LSM active, enabling workaround";
97
98 if (mounts.ResolvePath("/dev").ok() || mounts.ResolvePath("/dev/fd").ok()) {
99 // Avoid shadowing /dev/fd/1022 below if /dev or /dev/fd is already mapped.
100 VLOG(1) << "Parent dir already mapped, skipping";
101 return;
102 }
103
104 auto temp_file = sapi::CreateNamedTempFileAndClose("/tmp/");
105 if (!temp_file.ok()) {
106 LOG(WARNING) << "Failed to create empty temp file: " << temp_file.status();
107 return;
108 }
109 comms_fd_dev = std::move(*temp_file);
110
111 // Ignore errors here, as the file itself might already be mapped.
112 if (auto status = mounts.AddFileAt(
113 comms_fd_dev, absl::StrCat("/dev/fd/", Comms::kSandbox2TargetExecFD),
114 false);
115 !status.ok()) {
116 VLOG(1) << "Mapping comms FD: %s" << status.message();
117 }
118 }
119
LogContainer(const std::vector<std::string> & container)120 void LogContainer(const std::vector<std::string>& container) {
121 for (size_t i = 0; i < container.size(); ++i) {
122 LOG(INFO) << "[" << std::setfill('0') << std::setw(4) << i
123 << "]=" << container[i];
124 }
125 }
126
127 } // namespace
128
MonitorBase(Executor * executor,Policy * policy,Notify * notify)129 MonitorBase::MonitorBase(Executor* executor, Policy* policy, Notify* notify)
130 : executor_(executor),
131 notify_(notify),
132 policy_(policy),
133 // NOLINTNEXTLINE clang-diagnostic-deprecated-declarations
134 comms_(executor_->ipc()->comms()),
135 ipc_(executor_->ipc()),
136 uses_custom_forkserver_(executor_->fork_client_ != nullptr) {
137 // It's a pre-connected Comms channel, no need to accept new connection.
138 CHECK(comms_->IsConnected());
139 std::string path =
140 absl::GetFlag(FLAGS_sandbox2_danger_danger_permit_all_and_log);
141 if (!path.empty()) {
142 log_file_ = std::fopen(path.c_str(), "a+");
143 PCHECK(log_file_ != nullptr) << "Failed to open log file '" << path << "'";
144 }
145
146 if (auto& ns = policy_->namespace_; ns) {
147 // Check for the Tomoyo LSM, which is active by default in several common
148 // distribution kernels (esp. Debian).
149 MaybeEnableTomoyoLsmWorkaround(ns->mounts(), comms_fd_dev_);
150 }
151 }
152
~MonitorBase()153 MonitorBase::~MonitorBase() {
154 if (!comms_fd_dev_.empty()) {
155 std::remove(comms_fd_dev_.c_str());
156 }
157 if (log_file_) {
158 std::fclose(log_file_);
159 }
160 if (network_proxy_server_) {
161 network_proxy_thread_.join();
162 }
163 }
164
OnDone()165 void MonitorBase::OnDone() {
166 if (done_notification_.HasBeenNotified()) {
167 return;
168 }
169
170 notify_->EventFinished(result_);
171 ipc_->InternalCleanupFdMap();
172 done_notification_.Notify();
173 }
174
Launch()175 void MonitorBase::Launch() {
176
177 absl::Cleanup process_cleanup = [this] {
178 if (process_.init_pid > 0) {
179 kill(process_.init_pid, SIGKILL);
180 } else if (process_.main_pid > 0) {
181 kill(process_.main_pid, SIGKILL);
182 }
183 };
184 absl::Cleanup monitor_done = [this] { OnDone(); };
185
186 const Namespace* ns = policy_->GetNamespaceOrNull();
187 if (SAPI_VLOG_IS_ON(1) && ns != nullptr) {
188 std::vector<std::string> outside_entries;
189 std::vector<std::string> inside_entries;
190 ns->mounts().RecursivelyListMounts(
191 /*outside_entries=*/&outside_entries,
192 /*inside_entries=*/&inside_entries);
193 VLOG(1) << "Outside entries mapped to chroot:";
194 LogContainer(outside_entries);
195 VLOG(1) << "Inside entries as they appear in chroot:";
196 LogContainer(inside_entries);
197 }
198
199 // Don't trace the child: it will allow to use 'strace -f' with the whole
200 // sandbox master/monitor, which ptrace_attach'es to the child.
201 int clone_flags = CLONE_UNTRACED;
202
203 if (policy_->allowed_hosts_) {
204 EnableNetworkProxyServer();
205 }
206
207 // Get PID of the sandboxee.
208 bool should_have_init = ns && (ns->clone_flags() & CLONE_NEWPID);
209 absl::StatusOr<SandboxeeProcess> process =
210 executor_->StartSubProcess(clone_flags, ns, type_);
211
212 if (!process.ok()) {
213 LOG(ERROR) << "Starting sandboxed subprocess failed: " << process.status();
214 SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_SUBPROCESS);
215 return;
216 }
217
218 process_ = *std::move(process);
219
220 if (process_.main_pid <= 0 || (should_have_init && process_.init_pid <= 0)) {
221 SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_SUBPROCESS);
222 return;
223 }
224
225 if (!notify_->EventStarted(process_.main_pid, comms_)) {
226 SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_NOTIFY);
227 return;
228 }
229 if (!InitSendIPC()) {
230 SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_IPC);
231 return;
232 }
233 if (!InitSendCwd()) {
234 SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_CWD);
235 return;
236 }
237 if (!InitSendPolicy()) {
238 SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_POLICY);
239 return;
240 }
241 if (!WaitForSandboxReady()) {
242 SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_WAIT);
243 return;
244 }
245 if (!InitApplyLimits()) {
246 SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_LIMITS);
247 return;
248 }
249 std::move(process_cleanup).Cancel();
250
251 RunInternal();
252 std::move(monitor_done).Cancel();
253 }
254
AwaitResultWithTimeout(absl::Duration timeout)255 absl::StatusOr<Result> MonitorBase::AwaitResultWithTimeout(
256 absl::Duration timeout) {
257 auto done = done_notification_.WaitForNotificationWithTimeout(timeout);
258 if (!done) {
259 return absl::DeadlineExceededError("Sandbox did not finish within timeout");
260 }
261
262 Join();
263 return result_;
264 }
265
SetExitStatusCode(Result::StatusEnum final_status,uintptr_t reason_code)266 void MonitorBase::SetExitStatusCode(Result::StatusEnum final_status,
267 uintptr_t reason_code) {
268 CHECK(result_.final_status() == Result::UNSET);
269 result_.SetExitStatusCode(final_status, reason_code);
270 }
271
InitSendPolicy()272 bool MonitorBase::InitSendPolicy() {
273 if (!policy_->SendPolicy(comms_, type_ == FORKSERVER_MONITOR_UNOTIFY)) {
274 LOG(ERROR) << "Couldn't send policy";
275 return false;
276 }
277
278 return true;
279 }
280
InitSendCwd()281 bool MonitorBase::InitSendCwd() {
282 if (!comms_->SendString(executor_->cwd_)) {
283 PLOG(ERROR) << "Couldn't send cwd";
284 return false;
285 }
286
287 return true;
288 }
289
InitApplyLimit(pid_t pid,int resource,const rlimit64 & rlim) const290 bool MonitorBase::InitApplyLimit(pid_t pid, int resource,
291 const rlimit64& rlim) const {
292 #if defined(__ANDROID__)
293 using RlimitResource = int;
294 #else
295 using RlimitResource = __rlimit_resource;
296 #endif
297
298 rlimit64 curr_limit;
299 if (prlimit64(pid, static_cast<RlimitResource>(resource), nullptr,
300 &curr_limit) == -1) {
301 PLOG(ERROR) << "prlimit64(" << pid << ", " << util::GetRlimitName(resource)
302 << ")";
303 } else if (rlim.rlim_cur > curr_limit.rlim_max) {
304 // In such case, don't update the limits, as it will fail. Just stick to the
305 // current ones (which are already lower than intended).
306 LOG(ERROR) << util::GetRlimitName(resource)
307 << ": new.current > current.max (" << rlim.rlim_cur << " > "
308 << curr_limit.rlim_max << "), skipping";
309 return true;
310 }
311
312 if (prlimit64(pid, static_cast<RlimitResource>(resource), &rlim, nullptr) ==
313 -1) {
314 PLOG(ERROR) << "prlimit64(" << pid << ", " << util::GetRlimitName(resource)
315 << ", " << rlim.rlim_cur << ")";
316 return false;
317 }
318
319 return true;
320 }
321
InitApplyLimits()322 bool MonitorBase::InitApplyLimits() {
323 Limits* limits = executor_->limits();
324 return InitApplyLimit(process_.main_pid, RLIMIT_AS, limits->rlimit_as()) &&
325 InitApplyLimit(process_.main_pid, RLIMIT_CPU, limits->rlimit_cpu()) &&
326 InitApplyLimit(process_.main_pid, RLIMIT_FSIZE,
327 limits->rlimit_fsize()) &&
328 InitApplyLimit(process_.main_pid, RLIMIT_NOFILE,
329 limits->rlimit_nofile()) &&
330 InitApplyLimit(process_.main_pid, RLIMIT_CORE, limits->rlimit_core());
331 }
332
InitSendIPC()333 bool MonitorBase::InitSendIPC() { return ipc_->SendFdsOverComms(); }
334
WaitForSandboxReady()335 bool MonitorBase::WaitForSandboxReady() {
336 uint32_t tmp;
337 if (!comms_->RecvUint32(&tmp)) {
338 LOG(ERROR) << "Couldn't receive 'Client::kClient2SandboxReady' message";
339 return false;
340 }
341 if (tmp != Client::kClient2SandboxReady) {
342 LOG(ERROR) << "Received " << tmp << " != Client::kClient2SandboxReady ("
343 << Client::kClient2SandboxReady << ")";
344 return false;
345 }
346 return true;
347 }
348
LogSyscallViolation(const Syscall & syscall) const349 void MonitorBase::LogSyscallViolation(const Syscall& syscall) const {
350 // Do not unwind libunwind.
351 if (executor_->libunwind_sbox_for_pid_ != 0) {
352 LOG(ERROR) << "Sandbox violation during execution of libunwind: "
353 << syscall.GetDescription();
354 return;
355 }
356
357 // So, this is an invalid syscall. Will be killed by seccomp-bpf policies as
358 // well, but we should be on a safe side here as well.
359 LOG(ERROR) << "SANDBOX VIOLATION : PID: " << syscall.pid() << ", PROG: '"
360 << util::GetProgName(syscall.pid())
361 << "' : " << syscall.GetDescription();
362 if (SAPI_VLOG_IS_ON(1)) {
363 VLOG(1) << "Cmdline: " << util::GetCmdLine(syscall.pid());
364 VLOG(1) << "Task Name: " << util::GetProcStatusLine(syscall.pid(), "Name");
365 VLOG(1) << "Tgid: " << util::GetProcStatusLine(syscall.pid(), "Tgid");
366 }
367
368 LogSyscallViolationExplanation(syscall);
369 }
370
LogSyscallViolationExplanation(const Syscall & syscall) const371 void MonitorBase::LogSyscallViolationExplanation(const Syscall& syscall) const {
372 const uintptr_t syscall_nr = syscall.nr();
373 const uintptr_t arg0 = syscall.args()[0];
374
375 // This follows policy in Policy::GetDefaultPolicy - keep it in sync.
376 if (syscall.arch() != Syscall::GetHostArch()) {
377 LOG(ERROR)
378 << "This is a violation because the syscall was issued because the"
379 << " sandboxee and executor architectures are different.";
380 return;
381 }
382 if (syscall_nr == __NR_ptrace) {
383 LOG(ERROR)
384 << "This is a violation because the ptrace syscall would be unsafe in"
385 << " sandbox2, so it has been blocked.";
386 return;
387 }
388 if (syscall_nr == __NR_bpf) {
389 LOG(ERROR)
390 << "This is a violation because the bpf syscall would be risky in"
391 << " a sandbox, so it has been blocked.";
392 return;
393 }
394 if (syscall_nr == __NR_clone && ((arg0 & CLONE_UNTRACED) != 0)) {
395 LOG(ERROR) << "This is a violation because calling clone with CLONE_UNTRACE"
396 << " would be unsafe in sandbox2, so it has been blocked.";
397 return;
398 }
399 }
400
StackTraceCollectionPossible() const401 bool MonitorBase::StackTraceCollectionPossible() const {
402 // Only get the stacktrace if we are not in the libunwind sandbox (avoid
403 // recursion).
404 if ((policy_->GetNamespace() ||
405 absl::GetFlag(FLAGS_sandbox_libunwind_crash_handler) == false) &&
406 executor_->libunwind_recursion_depth() <= 1) {
407 return true;
408 }
409 LOG(ERROR) << "Cannot collect stack trace. Unwind pid "
410 << executor_->libunwind_sbox_for_pid_ << ", namespace "
411 << policy_->GetNamespaceOrNull();
412 return false;
413 }
414
EnableNetworkProxyServer()415 void MonitorBase::EnableNetworkProxyServer() {
416 int fd = ipc_->ReceiveFd(NetworkProxyClient::kFDName);
417
418 network_proxy_server_ = std::make_unique<NetworkProxyServer>(
419 fd, &policy_->allowed_hosts_.value(), pthread_self());
420
421 network_proxy_thread_ = std::thread(&NetworkProxyServer::Run,
422 network_proxy_server_.get());
423 }
424
ShouldCollectStackTrace(Result::StatusEnum status) const425 bool MonitorBase::ShouldCollectStackTrace(Result::StatusEnum status) const {
426 if (!StackTraceCollectionPossible()) {
427 return false;
428 }
429 switch (status) {
430 case Result::EXTERNAL_KILL:
431 return policy_->collect_stacktrace_on_kill_;
432 case Result::TIMEOUT:
433 return policy_->collect_stacktrace_on_timeout_;
434 case Result::SIGNALED:
435 return policy_->collect_stacktrace_on_signal_;
436 case Result::VIOLATION:
437 return policy_->collect_stacktrace_on_violation_;
438 case Result::OK:
439 return policy_->collect_stacktrace_on_exit_;
440 default:
441 return false;
442 }
443 }
444
GetStackTrace(const Regs * regs)445 absl::StatusOr<std::vector<std::string>> MonitorBase::GetStackTrace(
446 const Regs* regs) {
447 return sandbox2::GetStackTrace(regs, policy_->GetNamespaceOrNull(),
448 uses_custom_forkserver_,
449 executor_->libunwind_recursion_depth() + 1);
450 }
451
GetAndLogStackTrace(const Regs * regs)452 absl::StatusOr<std::vector<std::string>> MonitorBase::GetAndLogStackTrace(
453 const Regs* regs) {
454 auto stack_trace = GetStackTrace(regs);
455 if (!stack_trace.ok()) {
456 return stack_trace.status();
457 }
458
459 LOG(INFO) << "Stack trace: [";
460 for (const auto& frame : CompactStackTrace(*stack_trace)) {
461 LOG(INFO) << " " << frame;
462 }
463 LOG(INFO) << "]";
464
465 return stack_trace;
466 }
467 } // namespace sandbox2
468