xref: /aosp_15_r20/external/sandboxed-api/sandboxed_api/sandbox2/monitor_ptrace.cc (revision ec63e07ab9515d95e79c211197c445ef84cefa6a)
1 // Copyright 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Implementation file for the sandbox2::PtraceMonitor class.
16 
17 #include "sandboxed_api/sandbox2/monitor_ptrace.h"
18 
19 #include <sys/ptrace.h>
20 #include <sys/resource.h>
21 #include <sys/wait.h>
22 #include <syscall.h>
23 #include <unistd.h>
24 
25 #include <algorithm>
26 #include <atomic>
27 #include <cerrno>
28 #include <cstdint>
29 #include <ctime>
30 #include <deque>
31 #include <fstream>
32 #include <ios>
33 #include <memory>
34 #include <sstream>
35 #include <string>
36 #include <utility>
37 #include <vector>
38 
39 #include "absl/base/optimization.h"
40 #include "absl/cleanup/cleanup.h"
41 #include "absl/container/flat_hash_map.h"
42 #include "absl/container/flat_hash_set.h"
43 #include "absl/flags/declare.h"
44 #include "absl/flags/flag.h"
45 #include "absl/log/check.h"
46 #include "absl/log/log.h"
47 #include "absl/status/status.h"
48 #include "absl/status/statusor.h"
49 #include "absl/strings/str_cat.h"
50 #include "absl/strings/str_format.h"
51 #include "absl/strings/string_view.h"
52 #include "absl/synchronization/mutex.h"
53 #include "absl/synchronization/notification.h"
54 #include "absl/time/clock.h"
55 #include "absl/time/time.h"
56 #include "sandboxed_api/config.h"
57 #include "sandboxed_api/sandbox2/client.h"
58 #include "sandboxed_api/sandbox2/comms.h"
59 #include "sandboxed_api/sandbox2/executor.h"
60 #include "sandboxed_api/sandbox2/notify.h"
61 #include "sandboxed_api/sandbox2/policy.h"
62 #include "sandboxed_api/sandbox2/regs.h"
63 #include "sandboxed_api/sandbox2/result.h"
64 #include "sandboxed_api/sandbox2/sanitizer.h"
65 #include "sandboxed_api/sandbox2/syscall.h"
66 #include "sandboxed_api/sandbox2/util.h"
67 #include "sandboxed_api/util/raw_logging.h"
68 #include "sandboxed_api/util/status_macros.h"
69 
70 ABSL_FLAG(bool, sandbox2_log_all_stack_traces, false,
71           "If set, sandbox2 monitor will log stack traces of all monitored "
72           "threads/processes that are reported to terminate with a signal.");
73 
74 ABSL_FLAG(absl::Duration, sandbox2_stack_traces_collection_timeout,
75           absl::Seconds(1),
76           "How much time should be spent on logging threads' stack traces on "
77           "monitor shut down. Only relevent when collection of all stack "
78           "traces is enabled.");
79 
80 ABSL_DECLARE_FLAG(bool, sandbox2_danger_danger_permit_all);
81 
82 namespace sandbox2 {
83 namespace {
84 
85 // Since waitpid() is biased towards newer threads, we run the risk of starving
86 // older threads if the newer ones raise a lot of events.
87 // To avoid it, we use this class to gather all the waiting threads and then
88 // return them one at a time on each call to Wait().
89 // In this way, everyone gets their chance.
90 class PidWaiter {
91  public:
92   // Constructs a PidWaiter where the given priority_pid is checked first.
PidWaiter(pid_t priority_pid)93   explicit PidWaiter(pid_t priority_pid) : priority_pid_(priority_pid) {}
94 
95   // Returns the PID of a thread that needs attention, populating 'status' with
96   // the status returned by the waitpid() call. It returns 0 if no threads
97   // require attention at the moment, or -1 if there was an error, in which case
98   // the error value can be found in 'errno'.
Wait(int * status)99   int Wait(int* status) {
100     RefillStatuses();
101 
102     if (statuses_.empty()) {
103       if (last_errno_ == 0) return 0;
104       errno = last_errno_;
105       last_errno_ = 0;
106       return -1;
107     }
108 
109     const auto& entry = statuses_.front();
110     pid_t pid = entry.first;
111     *status = entry.second;
112     statuses_.pop_front();
113     return pid;
114   }
115 
116  private:
CheckStatus(pid_t pid)117   bool CheckStatus(pid_t pid) {
118     int status;
119     // It should be a non-blocking operation (hence WNOHANG), so this function
120     // returns quickly if there are no events to be processed.
121     pid_t ret =
122         waitpid(pid, &status, __WNOTHREAD | __WALL | WUNTRACED | WNOHANG);
123     if (ret < 0) {
124       last_errno_ = errno;
125       return true;
126     }
127     if (ret == 0) {
128       return false;
129     }
130     statuses_.emplace_back(ret, status);
131     return true;
132   }
133 
RefillStatuses()134   void RefillStatuses() {
135     constexpr int kMaxIterations = 1000;
136     constexpr int kPriorityCheckPeriod = 100;
137     if (!statuses_.empty()) {
138       return;
139     }
140     for (int i = 0; last_errno_ == 0 && i < kMaxIterations; ++i) {
141       bool should_check_priority = (i % kPriorityCheckPeriod) == 0;
142       if (should_check_priority && CheckStatus(priority_pid_)) {
143         return;
144       }
145       if (!CheckStatus(-1)) {
146         break;
147       }
148     }
149   }
150 
151   pid_t priority_pid_;
152   std::deque<std::pair<pid_t, int>> statuses_ = {};
153   int last_errno_ = 0;
154 };
155 
156 // We could use the ProcMapsIterator, however we want the full file content.
ReadProcMaps(pid_t pid)157 std::string ReadProcMaps(pid_t pid) {
158   std::ifstream input(absl::StrCat("/proc/", pid, "/maps"),
159                       std::ios_base::in | std::ios_base::binary);
160   std::ostringstream contents;
161   contents << input.rdbuf();
162   return contents.str();
163 }
164 
ContinueProcess(pid_t pid,int signo)165 void ContinueProcess(pid_t pid, int signo) {
166   if (ptrace(PTRACE_CONT, pid, 0, signo) == -1) {
167     if (errno == ESRCH) {
168       LOG(WARNING) << "Process " << pid
169                    << " died while trying to PTRACE_CONT it";
170     } else {
171       PLOG(ERROR) << "ptrace(PTRACE_CONT, pid=" << pid << ", sig=" << signo
172                   << ")";
173     }
174   }
175 }
176 
StopProcess(pid_t pid,int signo)177 void StopProcess(pid_t pid, int signo) {
178   if (ptrace(PTRACE_LISTEN, pid, 0, signo) == -1) {
179     if (errno == ESRCH) {
180       LOG(WARNING) << "Process " << pid
181                    << " died while trying to PTRACE_LISTEN it";
182     } else {
183       PLOG(ERROR) << "ptrace(PTRACE_LISTEN, pid=" << pid << ", sig=" << signo
184                   << ")";
185     }
186   }
187 }
188 
CompleteSyscall(pid_t pid,int signo)189 void CompleteSyscall(pid_t pid, int signo) {
190   if (ptrace(PTRACE_SYSCALL, pid, 0, signo) == -1) {
191     if (errno == ESRCH) {
192       LOG(WARNING) << "Process " << pid
193                    << " died while trying to PTRACE_SYSCALL it";
194     } else {
195       PLOG(ERROR) << "ptrace(PTRACE_SYSCALL, pid=" << pid << ", sig=" << signo
196                   << ")";
197     }
198   }
199 }
200 
201 }  // namespace
202 
PtraceMonitor(Executor * executor,Policy * policy,Notify * notify)203 PtraceMonitor::PtraceMonitor(Executor* executor, Policy* policy, Notify* notify)
204     : MonitorBase(executor, policy, notify),
205       wait_for_execve_(executor->enable_sandboxing_pre_execve_) {
206   if (executor_->limits()->wall_time_limit() != absl::ZeroDuration()) {
207     auto deadline = absl::Now() + executor_->limits()->wall_time_limit();
208     deadline_millis_.store(absl::ToUnixMillis(deadline),
209                            std::memory_order_relaxed);
210   }
211   external_kill_request_flag_.test_and_set(std::memory_order_relaxed);
212   dump_stack_request_flag_.test_and_set(std::memory_order_relaxed);
213 }
214 
IsActivelyMonitoring()215 bool PtraceMonitor::IsActivelyMonitoring() {
216   // If we're still waiting for execve(), then we allow all syscalls.
217   return !wait_for_execve_;
218 }
219 
SetActivelyMonitoring()220 void PtraceMonitor::SetActivelyMonitoring() { wait_for_execve_ = false; }
221 
SetAdditionalResultInfo(std::unique_ptr<Regs> regs)222 void PtraceMonitor::SetAdditionalResultInfo(std::unique_ptr<Regs> regs) {
223   pid_t pid = regs->pid();
224   result_.SetRegs(std::move(regs));
225   result_.SetProgName(util::GetProgName(pid));
226   result_.SetProcMaps(ReadProcMaps(pid));
227   if (!ShouldCollectStackTrace(result_.final_status())) {
228     VLOG(1) << "Stack traces have been disabled";
229     return;
230   }
231 
232   absl::StatusOr<std::vector<std::string>> stack_trace =
233       GetAndLogStackTrace(result_.GetRegs());
234   if (!stack_trace.ok()) {
235     LOG(ERROR) << "Could not obtain stack trace: " << stack_trace.status();
236     return;
237   }
238   result_.set_stack_trace(*stack_trace);
239 }
240 
KillSandboxee()241 bool PtraceMonitor::KillSandboxee() {
242   VLOG(1) << "Sending SIGKILL to the PID: " << process_.main_pid;
243   if (kill(process_.main_pid, SIGKILL) != 0) {
244     PLOG(ERROR) << "Could not send SIGKILL to PID " << process_.main_pid;
245     SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_KILL);
246     return false;
247   }
248   constexpr absl::Duration kGracefullKillTimeout = absl::Milliseconds(1000);
249   if (hard_deadline_ == absl::InfiniteFuture()) {
250     hard_deadline_ = absl::Now() + kGracefullKillTimeout;
251   }
252   return true;
253 }
254 
InterruptSandboxee()255 bool PtraceMonitor::InterruptSandboxee() {
256   if (ptrace(PTRACE_INTERRUPT, process_.main_pid, 0, 0) == -1) {
257     PLOG(ERROR) << "Could not send interrupt to pid=" << process_.main_pid;
258     SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_INTERRUPT);
259     return false;
260   }
261   return true;
262 }
263 
264 // Not defined in glibc.
265 #define __WPTRACEEVENT(x) ((x & 0xff0000) >> 16)
266 
NotifyMonitor()267 void PtraceMonitor::NotifyMonitor() {
268   absl::ReaderMutexLock lock(&notify_mutex_);
269   if (thread_ != nullptr) {
270     pthread_kill(thread_->native_handle(), SIGCHLD);
271   }
272 }
273 
Join()274 void PtraceMonitor::Join() {
275   absl::MutexLock lock(&notify_mutex_);
276   if (thread_) {
277     thread_->join();
278     CHECK(IsDone()) << "Monitor did not terminate";
279     VLOG(1) << "Final execution status: " << result_.ToString();
280     CHECK(result_.final_status() != Result::UNSET);
281     thread_.reset();
282   }
283 }
284 
RunInternal()285 void PtraceMonitor::RunInternal() {
286   thread_ = std::make_unique<std::thread>(&PtraceMonitor::Run, this);
287 
288   // Wait for the Monitor to set-up the sandboxee correctly (or fail while
289   // doing that). From here on, it is safe to use the IPC object for
290   // non-sandbox-related data exchange.
291   setup_notification_.WaitForNotification();
292 }
293 
Run()294 void PtraceMonitor::Run() {
295   absl::Cleanup monitor_done = [this] {
296     getrusage(RUSAGE_THREAD, result_.GetRUsageMonitor());
297     OnDone();
298   };
299 
300   absl::Cleanup setup_notify = [this] { setup_notification_.Notify(); };
301   // It'd be costly to initialize the sigset_t for each sigtimedwait()
302   // invocation, so do it once per Monitor.
303   if (!InitSetupSignals()) {
304     SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_SIGNALS);
305     return;
306   }
307   // This call should be the last in the init sequence, because it can cause the
308   // sandboxee to enter ptrace-stopped state, in which it will not be able to
309   // send any messages over the Comms channel.
310   if (!InitPtraceAttach()) {
311     SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_PTRACE);
312     return;
313   }
314 
315   // Tell the parent thread (Sandbox2 object) that we're done with the initial
316   // set-up process of the sandboxee.
317   std::move(setup_notify).Invoke();
318 
319   bool sandboxee_exited = false;
320   PidWaiter pid_waiter(process_.main_pid);
321   int status;
322   // All possible still running children of main process, will be killed due to
323   // PTRACE_O_EXITKILL ptrace() flag.
324   while (result().final_status() == Result::UNSET) {
325     if (absl::Now() >= hard_deadline_) {
326       LOG(WARNING) << "Hard deadline exceeded (timed_out=" << timed_out_
327                    << ", external_kill=" << external_kill_
328                    << ", network_violation=" << network_violation_ << ").";
329       SetExitStatusCode(Result::TIMEOUT, 0);
330       break;
331     }
332     int64_t deadline = deadline_millis_.load(std::memory_order_relaxed);
333     if (deadline != 0 && absl::Now() >= absl::FromUnixMillis(deadline)) {
334       VLOG(1) << "Sandbox process hit timeout due to the walltime timer";
335       timed_out_ = true;
336       if (!KillSandboxee()) {
337         break;
338       }
339     }
340 
341     if (!dump_stack_request_flag_.test_and_set(std::memory_order_relaxed)) {
342       should_dump_stack_ = true;
343       if (!InterruptSandboxee()) {
344         break;
345       }
346     }
347 
348     if (!external_kill_request_flag_.test_and_set(std::memory_order_relaxed)) {
349       external_kill_ = true;
350       if (!KillSandboxee()) {
351         break;
352       }
353     }
354 
355     if (network_proxy_server_ &&
356         network_proxy_server_->violation_occurred_.load(
357             std::memory_order_acquire) &&
358         !network_violation_) {
359       network_violation_ = true;
360       if (!KillSandboxee()) {
361         break;
362       }
363     }
364 
365     pid_t ret = pid_waiter.Wait(&status);
366     if (ret == 0) {
367       constexpr timespec ts = {kWakeUpPeriodSec, kWakeUpPeriodNSec};
368       int signo = sigtimedwait(&sset_, nullptr, &ts);
369       LOG_IF(ERROR, signo != -1 && signo != SIGCHLD)
370           << "Unknown signal received: " << signo;
371       continue;
372     }
373 
374     if (ret == -1) {
375       if (errno == ECHILD) {
376         LOG(ERROR) << "PANIC(). The main process has not exited yet, "
377                    << "yet we haven't seen its exit event";
378         SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_CHILD);
379       } else {
380         PLOG(ERROR) << "waitpid() failed";
381       }
382       continue;
383     }
384 
385     VLOG(3) << "waitpid() returned with PID: " << ret << ", status: " << status;
386 
387     if (WIFEXITED(status)) {
388       VLOG(1) << "PID: " << ret
389               << " finished with code: " << WEXITSTATUS(status);
390       // That's the main process, set the exit code, and exit. It will kill
391       // all remaining processes (if there are any) because of the
392       // PTRACE_O_EXITKILL ptrace() flag.
393       if (ret == process_.main_pid) {
394         if (IsActivelyMonitoring()) {
395           SetExitStatusCode(Result::OK, WEXITSTATUS(status));
396         } else {
397           SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_MONITOR);
398         }
399         sandboxee_exited = true;
400       }
401     } else if (WIFSIGNALED(status)) {
402       //  This usually does not happen, but might.
403       //  Quote from the manual:
404       //   A SIGKILL signal may still cause a PTRACE_EVENT_EXIT stop before
405       //   actual signal death.  This may be changed in the future;
406       VLOG(1) << "PID: " << ret << " terminated with signal: "
407               << util::GetSignalName(WTERMSIG(status));
408       if (ret == process_.main_pid) {
409         if (network_violation_) {
410           SetExitStatusCode(Result::VIOLATION, Result::VIOLATION_NETWORK);
411           result_.SetNetworkViolation(network_proxy_server_->violation_msg_);
412         } else if (external_kill_) {
413           SetExitStatusCode(Result::EXTERNAL_KILL, 0);
414         } else if (timed_out_) {
415           SetExitStatusCode(Result::TIMEOUT, 0);
416         } else {
417           SetExitStatusCode(Result::SIGNALED, WTERMSIG(status));
418         }
419         sandboxee_exited = true;
420       }
421     } else if (WIFSTOPPED(status)) {
422       VLOG(2) << "PID: " << ret
423               << " received signal: " << util::GetSignalName(WSTOPSIG(status))
424               << " with event: "
425               << util::GetPtraceEventName(__WPTRACEEVENT(status));
426       StateProcessStopped(ret, status);
427     } else if (WIFCONTINUED(status)) {
428       VLOG(2) << "PID: " << ret << " is being continued";
429     }
430   }
431 
432   if (!sandboxee_exited) {
433     const bool log_stack_traces =
434         result_.final_status() != Result::OK &&
435         absl::GetFlag(FLAGS_sandbox2_log_all_stack_traces);
436     constexpr auto kGracefullExitTimeout = absl::Milliseconds(200);
437     auto deadline = absl::Now() + kGracefullExitTimeout;
438     if (log_stack_traces) {
439       deadline = absl::Now() +
440                  absl::GetFlag(FLAGS_sandbox2_stack_traces_collection_timeout);
441     }
442     for (;;) {
443       auto left = deadline - absl::Now();
444       if (absl::Now() >= deadline) {
445         LOG(WARNING)
446             << "Waiting for sandboxee exit timed out. Sandboxee result: "
447             << result_.ToString();
448         break;
449       }
450       pid_t ret = pid_waiter.Wait(&status);
451       if (ret == -1) {
452         if (!log_stack_traces || ret != ECHILD) {
453           PLOG(ERROR) << "waitpid() failed";
454         }
455         break;
456       }
457       if (!log_stack_traces) {
458         if (ret == process_.main_pid &&
459             (WIFSIGNALED(status) || WIFEXITED(status))) {
460           break;
461         }
462         kill(process_.main_pid, SIGKILL);
463       }
464 
465       if (ret == 0) {
466         auto ts = absl::ToTimespec(left);
467         sigtimedwait(&sset_, nullptr, &ts);
468         continue;
469       }
470 
471       if (WIFSTOPPED(status)) {
472         if (log_stack_traces) {
473           LogStackTraceOfPid(ret);
474         }
475 
476         if (__WPTRACEEVENT(status) == PTRACE_EVENT_EXIT) {
477           VLOG(2) << "PID: " << ret << " PTRACE_EVENT_EXIT ";
478           ContinueProcess(ret, 0);
479           continue;
480         }
481       }
482     }
483   }
484 }
485 
LogStackTraceOfPid(pid_t pid)486 void PtraceMonitor::LogStackTraceOfPid(pid_t pid) {
487   if (!StackTraceCollectionPossible()) {
488     return;
489   }
490 
491   Regs regs(pid);
492   if (auto status = regs.Fetch(); !status.ok()) {
493     LOG(ERROR) << "Failed to get regs, PID:" << pid << " status:" << status;
494     return;
495   }
496 
497   if (auto stack_trace = GetAndLogStackTrace(&regs); !stack_trace.ok()) {
498     LOG(ERROR) << "Failed to get stack trace, PID:" << pid
499                << " status:" << stack_trace.status();
500   }
501 }
502 
InitSetupSignals()503 bool PtraceMonitor::InitSetupSignals() {
504   if (sigemptyset(&sset_) == -1) {
505     PLOG(ERROR) << "sigemptyset()";
506     return false;
507   }
508 
509   // sigtimedwait will react (wake-up) to arrival of this signal.
510   if (sigaddset(&sset_, SIGCHLD) == -1) {
511     PLOG(ERROR) << "sigaddset(SIGCHLD)";
512     return false;
513   }
514 
515   if (pthread_sigmask(SIG_BLOCK, &sset_, nullptr) == -1) {
516     PLOG(ERROR) << "pthread_sigmask(SIG_BLOCK, SIGCHLD)";
517     return false;
518   }
519 
520   return true;
521 }
522 
InitPtraceAttach()523 bool PtraceMonitor::InitPtraceAttach() {
524   if (process_.init_pid > 0) {
525     if (ptrace(PTRACE_SEIZE, process_.init_pid, 0, PTRACE_O_EXITKILL) != 0) {
526       if (errno != ESRCH) {
527         PLOG(ERROR) << "attaching to init process failed";
528       }
529       return false;
530     }
531   }
532 
533   // Get a list of tasks.
534   absl::flat_hash_set<int> tasks;
535   if (auto task_list = sanitizer::GetListOfTasks(process_.main_pid);
536       task_list.ok()) {
537     tasks = *std::move(task_list);
538   } else {
539     LOG(ERROR) << "Could not get list of tasks: "
540                << task_list.status().message();
541     return false;
542   }
543 
544   if (tasks.find(process_.main_pid) == tasks.end()) {
545     LOG(ERROR) << "The pid " << process_.main_pid
546                << " was not found in its own tasklist.";
547     return false;
548   }
549 
550   // With TSYNC, we can allow threads: seccomp applies to all threads.
551   if (tasks.size() > 1) {
552     LOG(WARNING) << "PID " << process_.main_pid << " has " << tasks.size()
553                  << " threads,"
554                  << " at the time of call to SandboxMeHere. If you are seeing"
555                  << " more sandbox violations than expected, this might be"
556                  << " the reason why"
557                  << ".";
558   }
559 
560   absl::flat_hash_set<int> tasks_attached;
561   int retries = 0;
562   absl::Time deadline = absl::Now() + absl::Seconds(2);
563 
564   // In some situations we allow ptrace to try again when it fails.
565   while (!tasks.empty()) {
566     absl::flat_hash_set<int> tasks_left;
567     for (int task : tasks) {
568       constexpr intptr_t options =
569           PTRACE_O_TRACESYSGOOD | PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK |
570           PTRACE_O_TRACEVFORKDONE | PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC |
571           PTRACE_O_TRACEEXIT | PTRACE_O_TRACESECCOMP | PTRACE_O_EXITKILL;
572       int ret = ptrace(PTRACE_SEIZE, task, 0, options);
573       if (ret != 0) {
574         if (errno == EPERM) {
575           // Sometimes when a task is exiting we can get an EPERM from ptrace.
576           // Let's try again up until the timeout in this situation.
577           PLOG(WARNING) << "ptrace(PTRACE_SEIZE, " << task << ", "
578                         << absl::StrCat("0x", absl::Hex(options))
579                         << "), trying again...";
580           tasks_left.insert(task);
581           continue;
582         }
583         if (errno == ESRCH) {
584           // A task may have exited since we captured the task list, we will
585           // allow things to continue after we log a warning.
586           PLOG(WARNING)
587               << "ptrace(PTRACE_SEIZE, " << task << ", "
588               << absl::StrCat("0x", absl::Hex(options))
589               << ") skipping exited task. Continuing with other tasks.";
590           continue;
591         }
592         // Any other errno will be considered a failure.
593         PLOG(ERROR) << "ptrace(PTRACE_SEIZE, " << task << ", "
594                     << absl::StrCat("0x", absl::Hex(options)) << ") failed.";
595         return false;
596       }
597       tasks_attached.insert(task);
598     }
599     if (!tasks_left.empty()) {
600       if (absl::Now() < deadline) {
601         LOG(ERROR) << "Attaching to sandboxee timed out: could not attach to "
602                    << tasks_left.size() << " tasks";
603         return false;
604       }
605       // Exponential Backoff.
606       constexpr absl::Duration kInitialRetry = absl::Milliseconds(1);
607       constexpr absl::Duration kMaxRetry = absl::Milliseconds(20);
608       const absl::Duration retry_interval =
609           kInitialRetry * (1 << std::min(10, retries++));
610       absl::SleepFor(
611           std::min({retry_interval, kMaxRetry, deadline - absl::Now()}));
612     }
613     tasks = std::move(tasks_left);
614   }
615 
616   // Get a list of tasks after attaching.
617   if (auto tasks_list = sanitizer::GetListOfTasks(process_.main_pid);
618       tasks_list.ok()) {
619     tasks = *std::move(tasks_list);
620   } else {
621     LOG(ERROR) << "Could not get list of tasks: "
622                << tasks_list.status().message();
623     return false;
624   }
625 
626   // Check that we attached to all the threads
627   if (tasks_attached != tasks) {
628     LOG(ERROR) << "The pid " << process_.main_pid
629                << " spawned new threads while we were trying to attach to it.";
630     return false;
631   }
632 
633   // No glibc wrapper for gettid - see 'man gettid'.
634   VLOG(1) << "Monitor (PID: " << getpid()
635           << ", TID: " << util::Syscall(__NR_gettid)
636           << ") attached to PID: " << process_.main_pid;
637 
638   // Technically, the sandboxee can be in a ptrace-stopped state right now,
639   // because some signal could have arrived in the meantime. Yet, this
640   // Comms::SendUint32 call shouldn't lock our process, because the underlying
641   // socketpair() channel is buffered, hence it will accept the uint32_t message
642   // no matter what is the current state of the sandboxee, and it will allow for
643   // our process to continue and unlock the sandboxee with the proper ptrace
644   // event handling.
645   if (!comms_->SendUint32(Client::kSandbox2ClientDone)) {
646     LOG(ERROR) << "Couldn't send Client::kSandbox2ClientDone message";
647     return false;
648   }
649   return true;
650 }
651 
ActionProcessSyscall(Regs * regs,const Syscall & syscall)652 void PtraceMonitor::ActionProcessSyscall(Regs* regs, const Syscall& syscall) {
653   // If the sandboxing is not enabled yet, allow the first __NR_execveat.
654   if (syscall.nr() == __NR_execveat && !IsActivelyMonitoring()) {
655     VLOG(1) << "[PERMITTED/BEFORE_EXECVEAT]: "
656             << "SYSCALL ::: PID: " << regs->pid() << ", PROG: '"
657             << util::GetProgName(regs->pid())
658             << "' : " << syscall.GetDescription();
659     ContinueProcess(regs->pid(), 0);
660     return;
661   }
662 
663   // Notify can decide whether we want to allow this syscall. It could be useful
664   // for sandbox setups in which some syscalls might still need some logging,
665   // but nonetheless be allowed ('permissible syscalls' in sandbox v1).
666   auto trace_response = notify_->EventSyscallTrace(syscall);
667   if (trace_response == Notify::TraceAction::kAllow) {
668     ContinueProcess(regs->pid(), 0);
669     return;
670   }
671   if (trace_response == Notify::TraceAction::kInspectAfterReturn) {
672     // Note that a process might die without an exit-stop before the syscall is
673     // completed (eg. a thread calls execve() and the thread group leader dies),
674     // so the entry is removed when the process exits.
675     syscalls_in_progress_[regs->pid()] = syscall;
676     CompleteSyscall(regs->pid(), 0);
677     return;
678   }
679 
680   if (absl::GetFlag(FLAGS_sandbox2_danger_danger_permit_all) || log_file_) {
681     std::string syscall_description = syscall.GetDescription();
682     if (log_file_) {
683       PCHECK(absl::FPrintF(log_file_, "PID: %d %s\n", regs->pid(),
684                            syscall_description) >= 0);
685     }
686     VLOG(1) << "PID: " << regs->pid() << " " << syscall_description;
687     ContinueProcess(regs->pid(), 0);
688     return;
689   }
690 
691   ActionProcessSyscallViolation(regs, syscall, kSyscallViolation);
692 }
693 
ActionProcessSyscallViolation(Regs * regs,const Syscall & syscall,ViolationType violation_type)694 void PtraceMonitor::ActionProcessSyscallViolation(
695     Regs* regs, const Syscall& syscall, ViolationType violation_type) {
696   LogSyscallViolation(syscall);
697   notify_->EventSyscallViolation(syscall, violation_type);
698   SetExitStatusCode(Result::VIOLATION, syscall.nr());
699   result_.SetSyscall(std::make_unique<Syscall>(syscall));
700   SetAdditionalResultInfo(std::make_unique<Regs>(*regs));
701   // Rewrite the syscall argument to something invalid (-1).
702   // The process will be killed anyway so this is just a precaution.
703   auto status = regs->SkipSyscallReturnValue(-ENOSYS);
704   if (!status.ok()) {
705     LOG(ERROR) << status;
706   }
707 }
708 
EventPtraceSeccomp(pid_t pid,int event_msg)709 void PtraceMonitor::EventPtraceSeccomp(pid_t pid, int event_msg) {
710   if (event_msg < sapi::cpu::Architecture::kUnknown ||
711       event_msg > sapi::cpu::Architecture::kMax) {
712     // We've observed that, if the process has exited, the event_msg may contain
713     // the exit status even though we haven't received the exit event yet.
714     // To work around this, if the event msg is not in the range of the known
715     // architectures, we assume that it's an exit status. We deal with it by
716     // ignoring this event, and we'll get the exit event in the next iteration.
717     LOG(WARNING) << "received event_msg for unknown architecture: " << event_msg
718                  << "; the program may have exited";
719     return;
720   }
721 
722   // If the seccomp-policy is using RET_TRACE, we request that it returns the
723   // syscall architecture identifier in the SECCOMP_RET_DATA.
724   const auto syscall_arch = static_cast<sapi::cpu::Architecture>(event_msg);
725   Regs regs(pid);
726   auto status = regs.Fetch();
727   if (!status.ok()) {
728     // Ignore if process is killed in the meanwhile
729     if (absl::IsNotFound(status)) {
730       LOG(WARNING) << "failed to fetch regs: " << status;
731       return;
732     }
733     LOG(ERROR) << "failed to fetch regs: " << status;
734     SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_FETCH);
735     return;
736   }
737 
738   Syscall syscall = regs.ToSyscall(syscall_arch);
739   // If the architecture of the syscall used is different that the current host
740   // architecture, report a violation.
741   if (syscall_arch != Syscall::GetHostArch()) {
742     ActionProcessSyscallViolation(&regs, syscall, kArchitectureSwitchViolation);
743     return;
744   }
745 
746   ActionProcessSyscall(&regs, syscall);
747 }
748 
EventSyscallExit(pid_t pid)749 void PtraceMonitor::EventSyscallExit(pid_t pid) {
750   // Check that the monitor wants to inspect the current syscall's return value.
751   auto index = syscalls_in_progress_.find(pid);
752   if (index == syscalls_in_progress_.end()) {
753     LOG(ERROR) << "Expected a syscall in progress in PID " << pid;
754     SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_INSPECT);
755     return;
756   }
757   Regs regs(pid);
758   auto status = regs.Fetch();
759   if (!status.ok()) {
760     // Ignore if process is killed in the meanwhile
761     if (absl::IsNotFound(status)) {
762       LOG(WARNING) << "failed to fetch regs: " << status;
763       return;
764     }
765     LOG(ERROR) << "failed to fetch regs: " << status;
766     SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_FETCH);
767     return;
768   }
769   int64_t return_value = regs.GetReturnValue(sapi::host_cpu::Architecture());
770   notify_->EventSyscallReturn(index->second, return_value);
771   syscalls_in_progress_.erase(index);
772   ContinueProcess(pid, 0);
773 }
774 
EventPtraceNewProcess(pid_t pid,int event_msg)775 void PtraceMonitor::EventPtraceNewProcess(pid_t pid, int event_msg) {
776   // ptrace doesn't issue syscall-exit-stops for successful fork/vfork/clone
777   // system calls. Check if the monitor wanted to inspect the syscall's return
778   // value, and call EventSyscallReturn for the parent process if so.
779   auto index = syscalls_in_progress_.find(pid);
780   if (index != syscalls_in_progress_.end()) {
781     auto syscall_nr = index->second.nr();
782     bool creating_new_process = syscall_nr == __NR_clone;
783 #ifdef __NR_clone3
784     creating_new_process = creating_new_process || syscall_nr == __NR_clone3;
785 #endif
786 #ifdef __NR_fork
787     creating_new_process = creating_new_process || syscall_nr == __NR_fork;
788 #endif
789 #ifdef __NR_vfork
790     creating_new_process = creating_new_process || syscall_nr == __NR_vfork;
791 #endif
792     if (!creating_new_process) {
793       LOG(ERROR) << "Expected a fork/vfork/clone syscall in progress in PID "
794                  << pid << "; actual: " << index->second.GetDescription();
795       SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_INSPECT);
796       return;
797     }
798     notify_->EventSyscallReturn(index->second, event_msg);
799     syscalls_in_progress_.erase(index);
800   }
801   ContinueProcess(pid, 0);
802 }
803 
EventPtraceExec(pid_t pid,int event_msg)804 void PtraceMonitor::EventPtraceExec(pid_t pid, int event_msg) {
805   if (!IsActivelyMonitoring()) {
806     VLOG(1) << "PTRACE_EVENT_EXEC seen from PID: " << event_msg
807             << ". SANDBOX ENABLED!";
808     SetActivelyMonitoring();
809   } else {
810     // ptrace doesn't issue syscall-exit-stops for successful execve/execveat
811     // system calls. Check if the monitor wanted to inspect the syscall's return
812     // value, and call EventSyscallReturn if so.
813     auto index = syscalls_in_progress_.find(pid);
814     if (index != syscalls_in_progress_.end()) {
815       auto syscall_nr = index->second.nr();
816       if (syscall_nr != __NR_execve && syscall_nr != __NR_execveat) {
817         LOG(ERROR) << "Expected an execve/execveat syscall in progress in PID "
818                    << pid << "; actual: " << index->second.GetDescription();
819         SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_INSPECT);
820         return;
821       }
822       notify_->EventSyscallReturn(index->second, 0);
823       syscalls_in_progress_.erase(index);
824     }
825   }
826   ContinueProcess(pid, 0);
827 }
828 
EventPtraceExit(pid_t pid,int event_msg)829 void PtraceMonitor::EventPtraceExit(pid_t pid, int event_msg) {
830   // Forget about any syscalls in progress for this PID.
831   syscalls_in_progress_.erase(pid);
832 
833   // A regular exit, let it continue (fast-path).
834   if (ABSL_PREDICT_TRUE(WIFEXITED(event_msg) &&
835                         (!policy_->collect_stacktrace_on_exit() ||
836                          pid != process_.main_pid))) {
837     ContinueProcess(pid, 0);
838     return;
839   }
840 
841   const bool is_seccomp =
842       WIFSIGNALED(event_msg) && WTERMSIG(event_msg) == SIGSYS;
843   const bool log_stack_trace =
844       absl::GetFlag(FLAGS_sandbox2_log_all_stack_traces);
845   // Fetch the registers as we'll need them to fill the result in any case
846   auto regs = std::make_unique<Regs>(pid);
847   if (is_seccomp || pid == process_.main_pid || log_stack_trace) {
848     auto status = regs->Fetch();
849     if (!status.ok()) {
850       LOG(ERROR) << "failed to fetch regs: " << status;
851       SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_FETCH);
852       return;
853     }
854   }
855 
856   // Process signaled due to seccomp violation.
857   if (is_seccomp) {
858     VLOG(1) << "PID: " << pid << " violation uncovered via the EXIT_EVENT";
859     ActionProcessSyscallViolation(
860         regs.get(), regs->ToSyscall(Syscall::GetHostArch()), kSyscallViolation);
861     return;
862   }
863 
864   // This can be reached in four cases:
865   // 1) Process was killed from the sandbox.
866   // 2) Process was killed because it hit a timeout.
867   // 3) Regular signal/other exit cause.
868   // 4) Normal exit for which we want to obtain stack trace.
869   if (pid == process_.main_pid) {
870     VLOG(1) << "PID: " << pid << " main special exit";
871     if (network_violation_) {
872       SetExitStatusCode(Result::VIOLATION, Result::VIOLATION_NETWORK);
873       result_.SetNetworkViolation(network_proxy_server_->violation_msg_);
874     } else if (external_kill_) {
875       SetExitStatusCode(Result::EXTERNAL_KILL, 0);
876     } else if (timed_out_) {
877       SetExitStatusCode(Result::TIMEOUT, 0);
878     } else if (WIFEXITED(event_msg)) {
879       SetExitStatusCode(Result::OK, WEXITSTATUS(event_msg));
880     } else {
881       SetExitStatusCode(Result::SIGNALED, WTERMSIG(event_msg));
882     }
883     SetAdditionalResultInfo(std::move(regs));
884   } else if (log_stack_trace) {
885     // In case pid == pid_ the stack trace will be logged anyway. So we need
886     // to do explicit logging only when this is not a main PID.
887     if (StackTraceCollectionPossible()) {
888       if (auto stack_trace = GetAndLogStackTrace(regs.get());
889           !stack_trace.ok()) {
890         LOG(ERROR) << "Failed to get stack trace, PID:" << pid
891                    << " status:" << stack_trace.status();
892       }
893     }
894   }
895   VLOG(1) << "Continuing";
896   ContinueProcess(pid, 0);
897 }
898 
EventPtraceStop(pid_t pid,int stopsig)899 void PtraceMonitor::EventPtraceStop(pid_t pid, int stopsig) {
900   // It's not a real stop signal. For example PTRACE_O_TRACECLONE and similar
901   // flags to ptrace(PTRACE_SEIZE) might generate this event with SIGTRAP.
902   if (stopsig != SIGSTOP && stopsig != SIGTSTP && stopsig != SIGTTIN &&
903       stopsig != SIGTTOU) {
904     ContinueProcess(pid, 0);
905     return;
906   }
907   // It's our PID stop signal. Stop it.
908   VLOG(2) << "PID: " << pid << " stopped due to "
909           << util::GetSignalName(stopsig);
910   StopProcess(pid, 0);
911 }
912 
StateProcessStopped(pid_t pid,int status)913 void PtraceMonitor::StateProcessStopped(pid_t pid, int status) {
914   int stopsig = WSTOPSIG(status);
915   // We use PTRACE_O_TRACESYSGOOD, so we can tell it's a syscall stop without
916   // calling PTRACE_GETSIGINFO by checking the value of the reported signal.
917   bool is_syscall_exit = stopsig == (SIGTRAP | 0x80);
918   if (__WPTRACEEVENT(status) == 0 && !is_syscall_exit) {
919     // Must be a regular signal delivery.
920     VLOG(2) << "PID: " << pid
921             << " received signal: " << util::GetSignalName(stopsig);
922     notify_->EventSignal(pid, stopsig);
923     ContinueProcess(pid, stopsig);
924     return;
925   }
926 
927   unsigned long event_msg;  // NOLINT
928   if (ptrace(PTRACE_GETEVENTMSG, pid, 0, &event_msg) == -1) {
929     if (errno == ESRCH) {
930       // This happens from time to time, the kernel does not guarantee us that
931       // we get the event in time.
932       PLOG(INFO) << "ptrace(PTRACE_GETEVENTMSG, " << pid << ")";
933       return;
934     }
935     PLOG(ERROR) << "ptrace(PTRACE_GETEVENTMSG, " << pid << ")";
936     SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_GETEVENT);
937     return;
938   }
939 
940   if (ABSL_PREDICT_FALSE(pid == process_.main_pid && should_dump_stack_ &&
941                          executor_->libunwind_sbox_for_pid_ == 0 &&
942                          policy_->GetNamespace())) {
943     auto stack_trace = [this,
944                         pid]() -> absl::StatusOr<std::vector<std::string>> {
945       Regs regs(pid);
946       SAPI_RETURN_IF_ERROR(regs.Fetch());
947       return GetStackTrace(&regs);
948     }();
949 
950     if (!stack_trace.ok()) {
951       LOG(WARNING) << "FAILED TO GET SANDBOX STACK : " << stack_trace.status();
952     } else if (SAPI_VLOG_IS_ON(0)) {
953       VLOG(0) << "SANDBOX STACK: PID: " << pid << ", [";
954       for (const auto& frame : *stack_trace) {
955         VLOG(0) << "  " << frame;
956       }
957       VLOG(0) << "]";
958     }
959     should_dump_stack_ = false;
960   }
961 
962 #ifndef PTRACE_EVENT_STOP
963 #define PTRACE_EVENT_STOP 128
964 #endif
965 
966   if (is_syscall_exit) {
967     VLOG(2) << "PID: " << pid << " syscall-exit-stop: " << event_msg;
968     EventSyscallExit(pid);
969     return;
970   }
971 
972   switch (__WPTRACEEVENT(status)) {
973     case PTRACE_EVENT_FORK:
974       VLOG(2) << "PID: " << pid << " PTRACE_EVENT_FORK, PID: " << event_msg;
975       EventPtraceNewProcess(pid, event_msg);
976       break;
977     case PTRACE_EVENT_VFORK:
978       VLOG(2) << "PID: " << pid << " PTRACE_EVENT_VFORK, PID: " << event_msg;
979       EventPtraceNewProcess(pid, event_msg);
980       break;
981     case PTRACE_EVENT_CLONE:
982       VLOG(2) << "PID: " << pid << " PTRACE_EVENT_CLONE, PID: " << event_msg;
983       EventPtraceNewProcess(pid, event_msg);
984       break;
985     case PTRACE_EVENT_VFORK_DONE:
986       ContinueProcess(pid, 0);
987       break;
988     case PTRACE_EVENT_EXEC:
989       VLOG(2) << "PID: " << pid << " PTRACE_EVENT_EXEC, PID: " << event_msg;
990       EventPtraceExec(pid, event_msg);
991       break;
992     case PTRACE_EVENT_EXIT:
993       VLOG(2) << "PID: " << pid << " PTRACE_EVENT_EXIT: " << event_msg;
994       EventPtraceExit(pid, event_msg);
995       break;
996     case PTRACE_EVENT_STOP:
997       VLOG(2) << "PID: " << pid << " PTRACE_EVENT_STOP: " << event_msg;
998       EventPtraceStop(pid, stopsig);
999       break;
1000     case PTRACE_EVENT_SECCOMP:
1001       VLOG(2) << "PID: " << pid << " PTRACE_EVENT_SECCOMP: " << event_msg;
1002       EventPtraceSeccomp(pid, event_msg);
1003       break;
1004     default:
1005       LOG(ERROR) << "Unknown ptrace event: " << __WPTRACEEVENT(status)
1006                  << " with data: " << event_msg;
1007       break;
1008   }
1009 }
1010 
1011 }  // namespace sandbox2
1012