1 // Copyright 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // Implementation file for the sandbox2::PtraceMonitor class.
16
17 #include "sandboxed_api/sandbox2/monitor_ptrace.h"
18
19 #include <sys/ptrace.h>
20 #include <sys/resource.h>
21 #include <sys/wait.h>
22 #include <syscall.h>
23 #include <unistd.h>
24
25 #include <algorithm>
26 #include <atomic>
27 #include <cerrno>
28 #include <cstdint>
29 #include <ctime>
30 #include <deque>
31 #include <fstream>
32 #include <ios>
33 #include <memory>
34 #include <sstream>
35 #include <string>
36 #include <utility>
37 #include <vector>
38
39 #include "absl/base/optimization.h"
40 #include "absl/cleanup/cleanup.h"
41 #include "absl/container/flat_hash_map.h"
42 #include "absl/container/flat_hash_set.h"
43 #include "absl/flags/declare.h"
44 #include "absl/flags/flag.h"
45 #include "absl/log/check.h"
46 #include "absl/log/log.h"
47 #include "absl/status/status.h"
48 #include "absl/status/statusor.h"
49 #include "absl/strings/str_cat.h"
50 #include "absl/strings/str_format.h"
51 #include "absl/strings/string_view.h"
52 #include "absl/synchronization/mutex.h"
53 #include "absl/synchronization/notification.h"
54 #include "absl/time/clock.h"
55 #include "absl/time/time.h"
56 #include "sandboxed_api/config.h"
57 #include "sandboxed_api/sandbox2/client.h"
58 #include "sandboxed_api/sandbox2/comms.h"
59 #include "sandboxed_api/sandbox2/executor.h"
60 #include "sandboxed_api/sandbox2/notify.h"
61 #include "sandboxed_api/sandbox2/policy.h"
62 #include "sandboxed_api/sandbox2/regs.h"
63 #include "sandboxed_api/sandbox2/result.h"
64 #include "sandboxed_api/sandbox2/sanitizer.h"
65 #include "sandboxed_api/sandbox2/syscall.h"
66 #include "sandboxed_api/sandbox2/util.h"
67 #include "sandboxed_api/util/raw_logging.h"
68 #include "sandboxed_api/util/status_macros.h"
69
70 ABSL_FLAG(bool, sandbox2_log_all_stack_traces, false,
71 "If set, sandbox2 monitor will log stack traces of all monitored "
72 "threads/processes that are reported to terminate with a signal.");
73
74 ABSL_FLAG(absl::Duration, sandbox2_stack_traces_collection_timeout,
75 absl::Seconds(1),
76 "How much time should be spent on logging threads' stack traces on "
77 "monitor shut down. Only relevent when collection of all stack "
78 "traces is enabled.");
79
80 ABSL_DECLARE_FLAG(bool, sandbox2_danger_danger_permit_all);
81
82 namespace sandbox2 {
83 namespace {
84
85 // Since waitpid() is biased towards newer threads, we run the risk of starving
86 // older threads if the newer ones raise a lot of events.
87 // To avoid it, we use this class to gather all the waiting threads and then
88 // return them one at a time on each call to Wait().
89 // In this way, everyone gets their chance.
90 class PidWaiter {
91 public:
92 // Constructs a PidWaiter where the given priority_pid is checked first.
PidWaiter(pid_t priority_pid)93 explicit PidWaiter(pid_t priority_pid) : priority_pid_(priority_pid) {}
94
95 // Returns the PID of a thread that needs attention, populating 'status' with
96 // the status returned by the waitpid() call. It returns 0 if no threads
97 // require attention at the moment, or -1 if there was an error, in which case
98 // the error value can be found in 'errno'.
Wait(int * status)99 int Wait(int* status) {
100 RefillStatuses();
101
102 if (statuses_.empty()) {
103 if (last_errno_ == 0) return 0;
104 errno = last_errno_;
105 last_errno_ = 0;
106 return -1;
107 }
108
109 const auto& entry = statuses_.front();
110 pid_t pid = entry.first;
111 *status = entry.second;
112 statuses_.pop_front();
113 return pid;
114 }
115
116 private:
CheckStatus(pid_t pid)117 bool CheckStatus(pid_t pid) {
118 int status;
119 // It should be a non-blocking operation (hence WNOHANG), so this function
120 // returns quickly if there are no events to be processed.
121 pid_t ret =
122 waitpid(pid, &status, __WNOTHREAD | __WALL | WUNTRACED | WNOHANG);
123 if (ret < 0) {
124 last_errno_ = errno;
125 return true;
126 }
127 if (ret == 0) {
128 return false;
129 }
130 statuses_.emplace_back(ret, status);
131 return true;
132 }
133
RefillStatuses()134 void RefillStatuses() {
135 constexpr int kMaxIterations = 1000;
136 constexpr int kPriorityCheckPeriod = 100;
137 if (!statuses_.empty()) {
138 return;
139 }
140 for (int i = 0; last_errno_ == 0 && i < kMaxIterations; ++i) {
141 bool should_check_priority = (i % kPriorityCheckPeriod) == 0;
142 if (should_check_priority && CheckStatus(priority_pid_)) {
143 return;
144 }
145 if (!CheckStatus(-1)) {
146 break;
147 }
148 }
149 }
150
151 pid_t priority_pid_;
152 std::deque<std::pair<pid_t, int>> statuses_ = {};
153 int last_errno_ = 0;
154 };
155
156 // We could use the ProcMapsIterator, however we want the full file content.
ReadProcMaps(pid_t pid)157 std::string ReadProcMaps(pid_t pid) {
158 std::ifstream input(absl::StrCat("/proc/", pid, "/maps"),
159 std::ios_base::in | std::ios_base::binary);
160 std::ostringstream contents;
161 contents << input.rdbuf();
162 return contents.str();
163 }
164
ContinueProcess(pid_t pid,int signo)165 void ContinueProcess(pid_t pid, int signo) {
166 if (ptrace(PTRACE_CONT, pid, 0, signo) == -1) {
167 if (errno == ESRCH) {
168 LOG(WARNING) << "Process " << pid
169 << " died while trying to PTRACE_CONT it";
170 } else {
171 PLOG(ERROR) << "ptrace(PTRACE_CONT, pid=" << pid << ", sig=" << signo
172 << ")";
173 }
174 }
175 }
176
StopProcess(pid_t pid,int signo)177 void StopProcess(pid_t pid, int signo) {
178 if (ptrace(PTRACE_LISTEN, pid, 0, signo) == -1) {
179 if (errno == ESRCH) {
180 LOG(WARNING) << "Process " << pid
181 << " died while trying to PTRACE_LISTEN it";
182 } else {
183 PLOG(ERROR) << "ptrace(PTRACE_LISTEN, pid=" << pid << ", sig=" << signo
184 << ")";
185 }
186 }
187 }
188
CompleteSyscall(pid_t pid,int signo)189 void CompleteSyscall(pid_t pid, int signo) {
190 if (ptrace(PTRACE_SYSCALL, pid, 0, signo) == -1) {
191 if (errno == ESRCH) {
192 LOG(WARNING) << "Process " << pid
193 << " died while trying to PTRACE_SYSCALL it";
194 } else {
195 PLOG(ERROR) << "ptrace(PTRACE_SYSCALL, pid=" << pid << ", sig=" << signo
196 << ")";
197 }
198 }
199 }
200
201 } // namespace
202
PtraceMonitor(Executor * executor,Policy * policy,Notify * notify)203 PtraceMonitor::PtraceMonitor(Executor* executor, Policy* policy, Notify* notify)
204 : MonitorBase(executor, policy, notify),
205 wait_for_execve_(executor->enable_sandboxing_pre_execve_) {
206 if (executor_->limits()->wall_time_limit() != absl::ZeroDuration()) {
207 auto deadline = absl::Now() + executor_->limits()->wall_time_limit();
208 deadline_millis_.store(absl::ToUnixMillis(deadline),
209 std::memory_order_relaxed);
210 }
211 external_kill_request_flag_.test_and_set(std::memory_order_relaxed);
212 dump_stack_request_flag_.test_and_set(std::memory_order_relaxed);
213 }
214
IsActivelyMonitoring()215 bool PtraceMonitor::IsActivelyMonitoring() {
216 // If we're still waiting for execve(), then we allow all syscalls.
217 return !wait_for_execve_;
218 }
219
SetActivelyMonitoring()220 void PtraceMonitor::SetActivelyMonitoring() { wait_for_execve_ = false; }
221
SetAdditionalResultInfo(std::unique_ptr<Regs> regs)222 void PtraceMonitor::SetAdditionalResultInfo(std::unique_ptr<Regs> regs) {
223 pid_t pid = regs->pid();
224 result_.SetRegs(std::move(regs));
225 result_.SetProgName(util::GetProgName(pid));
226 result_.SetProcMaps(ReadProcMaps(pid));
227 if (!ShouldCollectStackTrace(result_.final_status())) {
228 VLOG(1) << "Stack traces have been disabled";
229 return;
230 }
231
232 absl::StatusOr<std::vector<std::string>> stack_trace =
233 GetAndLogStackTrace(result_.GetRegs());
234 if (!stack_trace.ok()) {
235 LOG(ERROR) << "Could not obtain stack trace: " << stack_trace.status();
236 return;
237 }
238 result_.set_stack_trace(*stack_trace);
239 }
240
KillSandboxee()241 bool PtraceMonitor::KillSandboxee() {
242 VLOG(1) << "Sending SIGKILL to the PID: " << process_.main_pid;
243 if (kill(process_.main_pid, SIGKILL) != 0) {
244 PLOG(ERROR) << "Could not send SIGKILL to PID " << process_.main_pid;
245 SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_KILL);
246 return false;
247 }
248 constexpr absl::Duration kGracefullKillTimeout = absl::Milliseconds(1000);
249 if (hard_deadline_ == absl::InfiniteFuture()) {
250 hard_deadline_ = absl::Now() + kGracefullKillTimeout;
251 }
252 return true;
253 }
254
InterruptSandboxee()255 bool PtraceMonitor::InterruptSandboxee() {
256 if (ptrace(PTRACE_INTERRUPT, process_.main_pid, 0, 0) == -1) {
257 PLOG(ERROR) << "Could not send interrupt to pid=" << process_.main_pid;
258 SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_INTERRUPT);
259 return false;
260 }
261 return true;
262 }
263
264 // Not defined in glibc.
265 #define __WPTRACEEVENT(x) ((x & 0xff0000) >> 16)
266
NotifyMonitor()267 void PtraceMonitor::NotifyMonitor() {
268 absl::ReaderMutexLock lock(¬ify_mutex_);
269 if (thread_ != nullptr) {
270 pthread_kill(thread_->native_handle(), SIGCHLD);
271 }
272 }
273
Join()274 void PtraceMonitor::Join() {
275 absl::MutexLock lock(¬ify_mutex_);
276 if (thread_) {
277 thread_->join();
278 CHECK(IsDone()) << "Monitor did not terminate";
279 VLOG(1) << "Final execution status: " << result_.ToString();
280 CHECK(result_.final_status() != Result::UNSET);
281 thread_.reset();
282 }
283 }
284
RunInternal()285 void PtraceMonitor::RunInternal() {
286 thread_ = std::make_unique<std::thread>(&PtraceMonitor::Run, this);
287
288 // Wait for the Monitor to set-up the sandboxee correctly (or fail while
289 // doing that). From here on, it is safe to use the IPC object for
290 // non-sandbox-related data exchange.
291 setup_notification_.WaitForNotification();
292 }
293
Run()294 void PtraceMonitor::Run() {
295 absl::Cleanup monitor_done = [this] {
296 getrusage(RUSAGE_THREAD, result_.GetRUsageMonitor());
297 OnDone();
298 };
299
300 absl::Cleanup setup_notify = [this] { setup_notification_.Notify(); };
301 // It'd be costly to initialize the sigset_t for each sigtimedwait()
302 // invocation, so do it once per Monitor.
303 if (!InitSetupSignals()) {
304 SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_SIGNALS);
305 return;
306 }
307 // This call should be the last in the init sequence, because it can cause the
308 // sandboxee to enter ptrace-stopped state, in which it will not be able to
309 // send any messages over the Comms channel.
310 if (!InitPtraceAttach()) {
311 SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_PTRACE);
312 return;
313 }
314
315 // Tell the parent thread (Sandbox2 object) that we're done with the initial
316 // set-up process of the sandboxee.
317 std::move(setup_notify).Invoke();
318
319 bool sandboxee_exited = false;
320 PidWaiter pid_waiter(process_.main_pid);
321 int status;
322 // All possible still running children of main process, will be killed due to
323 // PTRACE_O_EXITKILL ptrace() flag.
324 while (result().final_status() == Result::UNSET) {
325 if (absl::Now() >= hard_deadline_) {
326 LOG(WARNING) << "Hard deadline exceeded (timed_out=" << timed_out_
327 << ", external_kill=" << external_kill_
328 << ", network_violation=" << network_violation_ << ").";
329 SetExitStatusCode(Result::TIMEOUT, 0);
330 break;
331 }
332 int64_t deadline = deadline_millis_.load(std::memory_order_relaxed);
333 if (deadline != 0 && absl::Now() >= absl::FromUnixMillis(deadline)) {
334 VLOG(1) << "Sandbox process hit timeout due to the walltime timer";
335 timed_out_ = true;
336 if (!KillSandboxee()) {
337 break;
338 }
339 }
340
341 if (!dump_stack_request_flag_.test_and_set(std::memory_order_relaxed)) {
342 should_dump_stack_ = true;
343 if (!InterruptSandboxee()) {
344 break;
345 }
346 }
347
348 if (!external_kill_request_flag_.test_and_set(std::memory_order_relaxed)) {
349 external_kill_ = true;
350 if (!KillSandboxee()) {
351 break;
352 }
353 }
354
355 if (network_proxy_server_ &&
356 network_proxy_server_->violation_occurred_.load(
357 std::memory_order_acquire) &&
358 !network_violation_) {
359 network_violation_ = true;
360 if (!KillSandboxee()) {
361 break;
362 }
363 }
364
365 pid_t ret = pid_waiter.Wait(&status);
366 if (ret == 0) {
367 constexpr timespec ts = {kWakeUpPeriodSec, kWakeUpPeriodNSec};
368 int signo = sigtimedwait(&sset_, nullptr, &ts);
369 LOG_IF(ERROR, signo != -1 && signo != SIGCHLD)
370 << "Unknown signal received: " << signo;
371 continue;
372 }
373
374 if (ret == -1) {
375 if (errno == ECHILD) {
376 LOG(ERROR) << "PANIC(). The main process has not exited yet, "
377 << "yet we haven't seen its exit event";
378 SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_CHILD);
379 } else {
380 PLOG(ERROR) << "waitpid() failed";
381 }
382 continue;
383 }
384
385 VLOG(3) << "waitpid() returned with PID: " << ret << ", status: " << status;
386
387 if (WIFEXITED(status)) {
388 VLOG(1) << "PID: " << ret
389 << " finished with code: " << WEXITSTATUS(status);
390 // That's the main process, set the exit code, and exit. It will kill
391 // all remaining processes (if there are any) because of the
392 // PTRACE_O_EXITKILL ptrace() flag.
393 if (ret == process_.main_pid) {
394 if (IsActivelyMonitoring()) {
395 SetExitStatusCode(Result::OK, WEXITSTATUS(status));
396 } else {
397 SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_MONITOR);
398 }
399 sandboxee_exited = true;
400 }
401 } else if (WIFSIGNALED(status)) {
402 // This usually does not happen, but might.
403 // Quote from the manual:
404 // A SIGKILL signal may still cause a PTRACE_EVENT_EXIT stop before
405 // actual signal death. This may be changed in the future;
406 VLOG(1) << "PID: " << ret << " terminated with signal: "
407 << util::GetSignalName(WTERMSIG(status));
408 if (ret == process_.main_pid) {
409 if (network_violation_) {
410 SetExitStatusCode(Result::VIOLATION, Result::VIOLATION_NETWORK);
411 result_.SetNetworkViolation(network_proxy_server_->violation_msg_);
412 } else if (external_kill_) {
413 SetExitStatusCode(Result::EXTERNAL_KILL, 0);
414 } else if (timed_out_) {
415 SetExitStatusCode(Result::TIMEOUT, 0);
416 } else {
417 SetExitStatusCode(Result::SIGNALED, WTERMSIG(status));
418 }
419 sandboxee_exited = true;
420 }
421 } else if (WIFSTOPPED(status)) {
422 VLOG(2) << "PID: " << ret
423 << " received signal: " << util::GetSignalName(WSTOPSIG(status))
424 << " with event: "
425 << util::GetPtraceEventName(__WPTRACEEVENT(status));
426 StateProcessStopped(ret, status);
427 } else if (WIFCONTINUED(status)) {
428 VLOG(2) << "PID: " << ret << " is being continued";
429 }
430 }
431
432 if (!sandboxee_exited) {
433 const bool log_stack_traces =
434 result_.final_status() != Result::OK &&
435 absl::GetFlag(FLAGS_sandbox2_log_all_stack_traces);
436 constexpr auto kGracefullExitTimeout = absl::Milliseconds(200);
437 auto deadline = absl::Now() + kGracefullExitTimeout;
438 if (log_stack_traces) {
439 deadline = absl::Now() +
440 absl::GetFlag(FLAGS_sandbox2_stack_traces_collection_timeout);
441 }
442 for (;;) {
443 auto left = deadline - absl::Now();
444 if (absl::Now() >= deadline) {
445 LOG(WARNING)
446 << "Waiting for sandboxee exit timed out. Sandboxee result: "
447 << result_.ToString();
448 break;
449 }
450 pid_t ret = pid_waiter.Wait(&status);
451 if (ret == -1) {
452 if (!log_stack_traces || ret != ECHILD) {
453 PLOG(ERROR) << "waitpid() failed";
454 }
455 break;
456 }
457 if (!log_stack_traces) {
458 if (ret == process_.main_pid &&
459 (WIFSIGNALED(status) || WIFEXITED(status))) {
460 break;
461 }
462 kill(process_.main_pid, SIGKILL);
463 }
464
465 if (ret == 0) {
466 auto ts = absl::ToTimespec(left);
467 sigtimedwait(&sset_, nullptr, &ts);
468 continue;
469 }
470
471 if (WIFSTOPPED(status)) {
472 if (log_stack_traces) {
473 LogStackTraceOfPid(ret);
474 }
475
476 if (__WPTRACEEVENT(status) == PTRACE_EVENT_EXIT) {
477 VLOG(2) << "PID: " << ret << " PTRACE_EVENT_EXIT ";
478 ContinueProcess(ret, 0);
479 continue;
480 }
481 }
482 }
483 }
484 }
485
LogStackTraceOfPid(pid_t pid)486 void PtraceMonitor::LogStackTraceOfPid(pid_t pid) {
487 if (!StackTraceCollectionPossible()) {
488 return;
489 }
490
491 Regs regs(pid);
492 if (auto status = regs.Fetch(); !status.ok()) {
493 LOG(ERROR) << "Failed to get regs, PID:" << pid << " status:" << status;
494 return;
495 }
496
497 if (auto stack_trace = GetAndLogStackTrace(®s); !stack_trace.ok()) {
498 LOG(ERROR) << "Failed to get stack trace, PID:" << pid
499 << " status:" << stack_trace.status();
500 }
501 }
502
InitSetupSignals()503 bool PtraceMonitor::InitSetupSignals() {
504 if (sigemptyset(&sset_) == -1) {
505 PLOG(ERROR) << "sigemptyset()";
506 return false;
507 }
508
509 // sigtimedwait will react (wake-up) to arrival of this signal.
510 if (sigaddset(&sset_, SIGCHLD) == -1) {
511 PLOG(ERROR) << "sigaddset(SIGCHLD)";
512 return false;
513 }
514
515 if (pthread_sigmask(SIG_BLOCK, &sset_, nullptr) == -1) {
516 PLOG(ERROR) << "pthread_sigmask(SIG_BLOCK, SIGCHLD)";
517 return false;
518 }
519
520 return true;
521 }
522
InitPtraceAttach()523 bool PtraceMonitor::InitPtraceAttach() {
524 if (process_.init_pid > 0) {
525 if (ptrace(PTRACE_SEIZE, process_.init_pid, 0, PTRACE_O_EXITKILL) != 0) {
526 if (errno != ESRCH) {
527 PLOG(ERROR) << "attaching to init process failed";
528 }
529 return false;
530 }
531 }
532
533 // Get a list of tasks.
534 absl::flat_hash_set<int> tasks;
535 if (auto task_list = sanitizer::GetListOfTasks(process_.main_pid);
536 task_list.ok()) {
537 tasks = *std::move(task_list);
538 } else {
539 LOG(ERROR) << "Could not get list of tasks: "
540 << task_list.status().message();
541 return false;
542 }
543
544 if (tasks.find(process_.main_pid) == tasks.end()) {
545 LOG(ERROR) << "The pid " << process_.main_pid
546 << " was not found in its own tasklist.";
547 return false;
548 }
549
550 // With TSYNC, we can allow threads: seccomp applies to all threads.
551 if (tasks.size() > 1) {
552 LOG(WARNING) << "PID " << process_.main_pid << " has " << tasks.size()
553 << " threads,"
554 << " at the time of call to SandboxMeHere. If you are seeing"
555 << " more sandbox violations than expected, this might be"
556 << " the reason why"
557 << ".";
558 }
559
560 absl::flat_hash_set<int> tasks_attached;
561 int retries = 0;
562 absl::Time deadline = absl::Now() + absl::Seconds(2);
563
564 // In some situations we allow ptrace to try again when it fails.
565 while (!tasks.empty()) {
566 absl::flat_hash_set<int> tasks_left;
567 for (int task : tasks) {
568 constexpr intptr_t options =
569 PTRACE_O_TRACESYSGOOD | PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK |
570 PTRACE_O_TRACEVFORKDONE | PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC |
571 PTRACE_O_TRACEEXIT | PTRACE_O_TRACESECCOMP | PTRACE_O_EXITKILL;
572 int ret = ptrace(PTRACE_SEIZE, task, 0, options);
573 if (ret != 0) {
574 if (errno == EPERM) {
575 // Sometimes when a task is exiting we can get an EPERM from ptrace.
576 // Let's try again up until the timeout in this situation.
577 PLOG(WARNING) << "ptrace(PTRACE_SEIZE, " << task << ", "
578 << absl::StrCat("0x", absl::Hex(options))
579 << "), trying again...";
580 tasks_left.insert(task);
581 continue;
582 }
583 if (errno == ESRCH) {
584 // A task may have exited since we captured the task list, we will
585 // allow things to continue after we log a warning.
586 PLOG(WARNING)
587 << "ptrace(PTRACE_SEIZE, " << task << ", "
588 << absl::StrCat("0x", absl::Hex(options))
589 << ") skipping exited task. Continuing with other tasks.";
590 continue;
591 }
592 // Any other errno will be considered a failure.
593 PLOG(ERROR) << "ptrace(PTRACE_SEIZE, " << task << ", "
594 << absl::StrCat("0x", absl::Hex(options)) << ") failed.";
595 return false;
596 }
597 tasks_attached.insert(task);
598 }
599 if (!tasks_left.empty()) {
600 if (absl::Now() < deadline) {
601 LOG(ERROR) << "Attaching to sandboxee timed out: could not attach to "
602 << tasks_left.size() << " tasks";
603 return false;
604 }
605 // Exponential Backoff.
606 constexpr absl::Duration kInitialRetry = absl::Milliseconds(1);
607 constexpr absl::Duration kMaxRetry = absl::Milliseconds(20);
608 const absl::Duration retry_interval =
609 kInitialRetry * (1 << std::min(10, retries++));
610 absl::SleepFor(
611 std::min({retry_interval, kMaxRetry, deadline - absl::Now()}));
612 }
613 tasks = std::move(tasks_left);
614 }
615
616 // Get a list of tasks after attaching.
617 if (auto tasks_list = sanitizer::GetListOfTasks(process_.main_pid);
618 tasks_list.ok()) {
619 tasks = *std::move(tasks_list);
620 } else {
621 LOG(ERROR) << "Could not get list of tasks: "
622 << tasks_list.status().message();
623 return false;
624 }
625
626 // Check that we attached to all the threads
627 if (tasks_attached != tasks) {
628 LOG(ERROR) << "The pid " << process_.main_pid
629 << " spawned new threads while we were trying to attach to it.";
630 return false;
631 }
632
633 // No glibc wrapper for gettid - see 'man gettid'.
634 VLOG(1) << "Monitor (PID: " << getpid()
635 << ", TID: " << util::Syscall(__NR_gettid)
636 << ") attached to PID: " << process_.main_pid;
637
638 // Technically, the sandboxee can be in a ptrace-stopped state right now,
639 // because some signal could have arrived in the meantime. Yet, this
640 // Comms::SendUint32 call shouldn't lock our process, because the underlying
641 // socketpair() channel is buffered, hence it will accept the uint32_t message
642 // no matter what is the current state of the sandboxee, and it will allow for
643 // our process to continue and unlock the sandboxee with the proper ptrace
644 // event handling.
645 if (!comms_->SendUint32(Client::kSandbox2ClientDone)) {
646 LOG(ERROR) << "Couldn't send Client::kSandbox2ClientDone message";
647 return false;
648 }
649 return true;
650 }
651
ActionProcessSyscall(Regs * regs,const Syscall & syscall)652 void PtraceMonitor::ActionProcessSyscall(Regs* regs, const Syscall& syscall) {
653 // If the sandboxing is not enabled yet, allow the first __NR_execveat.
654 if (syscall.nr() == __NR_execveat && !IsActivelyMonitoring()) {
655 VLOG(1) << "[PERMITTED/BEFORE_EXECVEAT]: "
656 << "SYSCALL ::: PID: " << regs->pid() << ", PROG: '"
657 << util::GetProgName(regs->pid())
658 << "' : " << syscall.GetDescription();
659 ContinueProcess(regs->pid(), 0);
660 return;
661 }
662
663 // Notify can decide whether we want to allow this syscall. It could be useful
664 // for sandbox setups in which some syscalls might still need some logging,
665 // but nonetheless be allowed ('permissible syscalls' in sandbox v1).
666 auto trace_response = notify_->EventSyscallTrace(syscall);
667 if (trace_response == Notify::TraceAction::kAllow) {
668 ContinueProcess(regs->pid(), 0);
669 return;
670 }
671 if (trace_response == Notify::TraceAction::kInspectAfterReturn) {
672 // Note that a process might die without an exit-stop before the syscall is
673 // completed (eg. a thread calls execve() and the thread group leader dies),
674 // so the entry is removed when the process exits.
675 syscalls_in_progress_[regs->pid()] = syscall;
676 CompleteSyscall(regs->pid(), 0);
677 return;
678 }
679
680 if (absl::GetFlag(FLAGS_sandbox2_danger_danger_permit_all) || log_file_) {
681 std::string syscall_description = syscall.GetDescription();
682 if (log_file_) {
683 PCHECK(absl::FPrintF(log_file_, "PID: %d %s\n", regs->pid(),
684 syscall_description) >= 0);
685 }
686 VLOG(1) << "PID: " << regs->pid() << " " << syscall_description;
687 ContinueProcess(regs->pid(), 0);
688 return;
689 }
690
691 ActionProcessSyscallViolation(regs, syscall, kSyscallViolation);
692 }
693
ActionProcessSyscallViolation(Regs * regs,const Syscall & syscall,ViolationType violation_type)694 void PtraceMonitor::ActionProcessSyscallViolation(
695 Regs* regs, const Syscall& syscall, ViolationType violation_type) {
696 LogSyscallViolation(syscall);
697 notify_->EventSyscallViolation(syscall, violation_type);
698 SetExitStatusCode(Result::VIOLATION, syscall.nr());
699 result_.SetSyscall(std::make_unique<Syscall>(syscall));
700 SetAdditionalResultInfo(std::make_unique<Regs>(*regs));
701 // Rewrite the syscall argument to something invalid (-1).
702 // The process will be killed anyway so this is just a precaution.
703 auto status = regs->SkipSyscallReturnValue(-ENOSYS);
704 if (!status.ok()) {
705 LOG(ERROR) << status;
706 }
707 }
708
EventPtraceSeccomp(pid_t pid,int event_msg)709 void PtraceMonitor::EventPtraceSeccomp(pid_t pid, int event_msg) {
710 if (event_msg < sapi::cpu::Architecture::kUnknown ||
711 event_msg > sapi::cpu::Architecture::kMax) {
712 // We've observed that, if the process has exited, the event_msg may contain
713 // the exit status even though we haven't received the exit event yet.
714 // To work around this, if the event msg is not in the range of the known
715 // architectures, we assume that it's an exit status. We deal with it by
716 // ignoring this event, and we'll get the exit event in the next iteration.
717 LOG(WARNING) << "received event_msg for unknown architecture: " << event_msg
718 << "; the program may have exited";
719 return;
720 }
721
722 // If the seccomp-policy is using RET_TRACE, we request that it returns the
723 // syscall architecture identifier in the SECCOMP_RET_DATA.
724 const auto syscall_arch = static_cast<sapi::cpu::Architecture>(event_msg);
725 Regs regs(pid);
726 auto status = regs.Fetch();
727 if (!status.ok()) {
728 // Ignore if process is killed in the meanwhile
729 if (absl::IsNotFound(status)) {
730 LOG(WARNING) << "failed to fetch regs: " << status;
731 return;
732 }
733 LOG(ERROR) << "failed to fetch regs: " << status;
734 SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_FETCH);
735 return;
736 }
737
738 Syscall syscall = regs.ToSyscall(syscall_arch);
739 // If the architecture of the syscall used is different that the current host
740 // architecture, report a violation.
741 if (syscall_arch != Syscall::GetHostArch()) {
742 ActionProcessSyscallViolation(®s, syscall, kArchitectureSwitchViolation);
743 return;
744 }
745
746 ActionProcessSyscall(®s, syscall);
747 }
748
EventSyscallExit(pid_t pid)749 void PtraceMonitor::EventSyscallExit(pid_t pid) {
750 // Check that the monitor wants to inspect the current syscall's return value.
751 auto index = syscalls_in_progress_.find(pid);
752 if (index == syscalls_in_progress_.end()) {
753 LOG(ERROR) << "Expected a syscall in progress in PID " << pid;
754 SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_INSPECT);
755 return;
756 }
757 Regs regs(pid);
758 auto status = regs.Fetch();
759 if (!status.ok()) {
760 // Ignore if process is killed in the meanwhile
761 if (absl::IsNotFound(status)) {
762 LOG(WARNING) << "failed to fetch regs: " << status;
763 return;
764 }
765 LOG(ERROR) << "failed to fetch regs: " << status;
766 SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_FETCH);
767 return;
768 }
769 int64_t return_value = regs.GetReturnValue(sapi::host_cpu::Architecture());
770 notify_->EventSyscallReturn(index->second, return_value);
771 syscalls_in_progress_.erase(index);
772 ContinueProcess(pid, 0);
773 }
774
EventPtraceNewProcess(pid_t pid,int event_msg)775 void PtraceMonitor::EventPtraceNewProcess(pid_t pid, int event_msg) {
776 // ptrace doesn't issue syscall-exit-stops for successful fork/vfork/clone
777 // system calls. Check if the monitor wanted to inspect the syscall's return
778 // value, and call EventSyscallReturn for the parent process if so.
779 auto index = syscalls_in_progress_.find(pid);
780 if (index != syscalls_in_progress_.end()) {
781 auto syscall_nr = index->second.nr();
782 bool creating_new_process = syscall_nr == __NR_clone;
783 #ifdef __NR_clone3
784 creating_new_process = creating_new_process || syscall_nr == __NR_clone3;
785 #endif
786 #ifdef __NR_fork
787 creating_new_process = creating_new_process || syscall_nr == __NR_fork;
788 #endif
789 #ifdef __NR_vfork
790 creating_new_process = creating_new_process || syscall_nr == __NR_vfork;
791 #endif
792 if (!creating_new_process) {
793 LOG(ERROR) << "Expected a fork/vfork/clone syscall in progress in PID "
794 << pid << "; actual: " << index->second.GetDescription();
795 SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_INSPECT);
796 return;
797 }
798 notify_->EventSyscallReturn(index->second, event_msg);
799 syscalls_in_progress_.erase(index);
800 }
801 ContinueProcess(pid, 0);
802 }
803
EventPtraceExec(pid_t pid,int event_msg)804 void PtraceMonitor::EventPtraceExec(pid_t pid, int event_msg) {
805 if (!IsActivelyMonitoring()) {
806 VLOG(1) << "PTRACE_EVENT_EXEC seen from PID: " << event_msg
807 << ". SANDBOX ENABLED!";
808 SetActivelyMonitoring();
809 } else {
810 // ptrace doesn't issue syscall-exit-stops for successful execve/execveat
811 // system calls. Check if the monitor wanted to inspect the syscall's return
812 // value, and call EventSyscallReturn if so.
813 auto index = syscalls_in_progress_.find(pid);
814 if (index != syscalls_in_progress_.end()) {
815 auto syscall_nr = index->second.nr();
816 if (syscall_nr != __NR_execve && syscall_nr != __NR_execveat) {
817 LOG(ERROR) << "Expected an execve/execveat syscall in progress in PID "
818 << pid << "; actual: " << index->second.GetDescription();
819 SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_INSPECT);
820 return;
821 }
822 notify_->EventSyscallReturn(index->second, 0);
823 syscalls_in_progress_.erase(index);
824 }
825 }
826 ContinueProcess(pid, 0);
827 }
828
EventPtraceExit(pid_t pid,int event_msg)829 void PtraceMonitor::EventPtraceExit(pid_t pid, int event_msg) {
830 // Forget about any syscalls in progress for this PID.
831 syscalls_in_progress_.erase(pid);
832
833 // A regular exit, let it continue (fast-path).
834 if (ABSL_PREDICT_TRUE(WIFEXITED(event_msg) &&
835 (!policy_->collect_stacktrace_on_exit() ||
836 pid != process_.main_pid))) {
837 ContinueProcess(pid, 0);
838 return;
839 }
840
841 const bool is_seccomp =
842 WIFSIGNALED(event_msg) && WTERMSIG(event_msg) == SIGSYS;
843 const bool log_stack_trace =
844 absl::GetFlag(FLAGS_sandbox2_log_all_stack_traces);
845 // Fetch the registers as we'll need them to fill the result in any case
846 auto regs = std::make_unique<Regs>(pid);
847 if (is_seccomp || pid == process_.main_pid || log_stack_trace) {
848 auto status = regs->Fetch();
849 if (!status.ok()) {
850 LOG(ERROR) << "failed to fetch regs: " << status;
851 SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_FETCH);
852 return;
853 }
854 }
855
856 // Process signaled due to seccomp violation.
857 if (is_seccomp) {
858 VLOG(1) << "PID: " << pid << " violation uncovered via the EXIT_EVENT";
859 ActionProcessSyscallViolation(
860 regs.get(), regs->ToSyscall(Syscall::GetHostArch()), kSyscallViolation);
861 return;
862 }
863
864 // This can be reached in four cases:
865 // 1) Process was killed from the sandbox.
866 // 2) Process was killed because it hit a timeout.
867 // 3) Regular signal/other exit cause.
868 // 4) Normal exit for which we want to obtain stack trace.
869 if (pid == process_.main_pid) {
870 VLOG(1) << "PID: " << pid << " main special exit";
871 if (network_violation_) {
872 SetExitStatusCode(Result::VIOLATION, Result::VIOLATION_NETWORK);
873 result_.SetNetworkViolation(network_proxy_server_->violation_msg_);
874 } else if (external_kill_) {
875 SetExitStatusCode(Result::EXTERNAL_KILL, 0);
876 } else if (timed_out_) {
877 SetExitStatusCode(Result::TIMEOUT, 0);
878 } else if (WIFEXITED(event_msg)) {
879 SetExitStatusCode(Result::OK, WEXITSTATUS(event_msg));
880 } else {
881 SetExitStatusCode(Result::SIGNALED, WTERMSIG(event_msg));
882 }
883 SetAdditionalResultInfo(std::move(regs));
884 } else if (log_stack_trace) {
885 // In case pid == pid_ the stack trace will be logged anyway. So we need
886 // to do explicit logging only when this is not a main PID.
887 if (StackTraceCollectionPossible()) {
888 if (auto stack_trace = GetAndLogStackTrace(regs.get());
889 !stack_trace.ok()) {
890 LOG(ERROR) << "Failed to get stack trace, PID:" << pid
891 << " status:" << stack_trace.status();
892 }
893 }
894 }
895 VLOG(1) << "Continuing";
896 ContinueProcess(pid, 0);
897 }
898
EventPtraceStop(pid_t pid,int stopsig)899 void PtraceMonitor::EventPtraceStop(pid_t pid, int stopsig) {
900 // It's not a real stop signal. For example PTRACE_O_TRACECLONE and similar
901 // flags to ptrace(PTRACE_SEIZE) might generate this event with SIGTRAP.
902 if (stopsig != SIGSTOP && stopsig != SIGTSTP && stopsig != SIGTTIN &&
903 stopsig != SIGTTOU) {
904 ContinueProcess(pid, 0);
905 return;
906 }
907 // It's our PID stop signal. Stop it.
908 VLOG(2) << "PID: " << pid << " stopped due to "
909 << util::GetSignalName(stopsig);
910 StopProcess(pid, 0);
911 }
912
StateProcessStopped(pid_t pid,int status)913 void PtraceMonitor::StateProcessStopped(pid_t pid, int status) {
914 int stopsig = WSTOPSIG(status);
915 // We use PTRACE_O_TRACESYSGOOD, so we can tell it's a syscall stop without
916 // calling PTRACE_GETSIGINFO by checking the value of the reported signal.
917 bool is_syscall_exit = stopsig == (SIGTRAP | 0x80);
918 if (__WPTRACEEVENT(status) == 0 && !is_syscall_exit) {
919 // Must be a regular signal delivery.
920 VLOG(2) << "PID: " << pid
921 << " received signal: " << util::GetSignalName(stopsig);
922 notify_->EventSignal(pid, stopsig);
923 ContinueProcess(pid, stopsig);
924 return;
925 }
926
927 unsigned long event_msg; // NOLINT
928 if (ptrace(PTRACE_GETEVENTMSG, pid, 0, &event_msg) == -1) {
929 if (errno == ESRCH) {
930 // This happens from time to time, the kernel does not guarantee us that
931 // we get the event in time.
932 PLOG(INFO) << "ptrace(PTRACE_GETEVENTMSG, " << pid << ")";
933 return;
934 }
935 PLOG(ERROR) << "ptrace(PTRACE_GETEVENTMSG, " << pid << ")";
936 SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_GETEVENT);
937 return;
938 }
939
940 if (ABSL_PREDICT_FALSE(pid == process_.main_pid && should_dump_stack_ &&
941 executor_->libunwind_sbox_for_pid_ == 0 &&
942 policy_->GetNamespace())) {
943 auto stack_trace = [this,
944 pid]() -> absl::StatusOr<std::vector<std::string>> {
945 Regs regs(pid);
946 SAPI_RETURN_IF_ERROR(regs.Fetch());
947 return GetStackTrace(®s);
948 }();
949
950 if (!stack_trace.ok()) {
951 LOG(WARNING) << "FAILED TO GET SANDBOX STACK : " << stack_trace.status();
952 } else if (SAPI_VLOG_IS_ON(0)) {
953 VLOG(0) << "SANDBOX STACK: PID: " << pid << ", [";
954 for (const auto& frame : *stack_trace) {
955 VLOG(0) << " " << frame;
956 }
957 VLOG(0) << "]";
958 }
959 should_dump_stack_ = false;
960 }
961
962 #ifndef PTRACE_EVENT_STOP
963 #define PTRACE_EVENT_STOP 128
964 #endif
965
966 if (is_syscall_exit) {
967 VLOG(2) << "PID: " << pid << " syscall-exit-stop: " << event_msg;
968 EventSyscallExit(pid);
969 return;
970 }
971
972 switch (__WPTRACEEVENT(status)) {
973 case PTRACE_EVENT_FORK:
974 VLOG(2) << "PID: " << pid << " PTRACE_EVENT_FORK, PID: " << event_msg;
975 EventPtraceNewProcess(pid, event_msg);
976 break;
977 case PTRACE_EVENT_VFORK:
978 VLOG(2) << "PID: " << pid << " PTRACE_EVENT_VFORK, PID: " << event_msg;
979 EventPtraceNewProcess(pid, event_msg);
980 break;
981 case PTRACE_EVENT_CLONE:
982 VLOG(2) << "PID: " << pid << " PTRACE_EVENT_CLONE, PID: " << event_msg;
983 EventPtraceNewProcess(pid, event_msg);
984 break;
985 case PTRACE_EVENT_VFORK_DONE:
986 ContinueProcess(pid, 0);
987 break;
988 case PTRACE_EVENT_EXEC:
989 VLOG(2) << "PID: " << pid << " PTRACE_EVENT_EXEC, PID: " << event_msg;
990 EventPtraceExec(pid, event_msg);
991 break;
992 case PTRACE_EVENT_EXIT:
993 VLOG(2) << "PID: " << pid << " PTRACE_EVENT_EXIT: " << event_msg;
994 EventPtraceExit(pid, event_msg);
995 break;
996 case PTRACE_EVENT_STOP:
997 VLOG(2) << "PID: " << pid << " PTRACE_EVENT_STOP: " << event_msg;
998 EventPtraceStop(pid, stopsig);
999 break;
1000 case PTRACE_EVENT_SECCOMP:
1001 VLOG(2) << "PID: " << pid << " PTRACE_EVENT_SECCOMP: " << event_msg;
1002 EventPtraceSeccomp(pid, event_msg);
1003 break;
1004 default:
1005 LOG(ERROR) << "Unknown ptrace event: " << __WPTRACEEVENT(status)
1006 << " with data: " << event_msg;
1007 break;
1008 }
1009 }
1010
1011 } // namespace sandbox2
1012