1 // Copyright 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // Implementation file for the sandbox2::Client class.
16
17 #include "sandboxed_api/sandbox2/client.h"
18
19 #include <fcntl.h>
20 #include <linux/bpf_common.h>
21 #include <linux/filter.h>
22 #include <linux/seccomp.h>
23 #include <sys/prctl.h>
24 #include <syscall.h>
25 #include <unistd.h>
26
27 #include <atomic>
28 #include <cerrno>
29 #include <cinttypes>
30 #include <cstdint>
31 #include <cstdlib>
32 #include <limits>
33 #include <memory>
34 #include <string>
35 #include <thread> // NOLINT(build/c++11)
36 #include <utility>
37 #include <vector>
38
39 #include "absl/base/attributes.h"
40 #include "absl/base/macros.h"
41 #include "absl/container/flat_hash_map.h"
42 #include "absl/status/status.h"
43 #include "absl/strings/numbers.h"
44 #include "absl/strings/str_cat.h"
45 #include "absl/strings/str_join.h"
46 #include "absl/strings/str_split.h"
47 #include "absl/strings/string_view.h"
48 #include "sandboxed_api/sandbox2/comms.h"
49 #include "sandboxed_api/sandbox2/policy.h"
50 #include "sandboxed_api/sandbox2/sanitizer.h"
51 #include "sandboxed_api/sandbox2/syscall.h"
52 #include "sandboxed_api/sandbox2/util/bpf_helper.h"
53 #include "sandboxed_api/util/raw_logging.h"
54
55 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
56 #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
57 #endif
58
59 namespace sandbox2 {
60 namespace {
61
InitSeccompUnotify(sock_fprog prog,Comms * comms)62 void InitSeccompUnotify(sock_fprog prog, Comms* comms) {
63 // The policy might not allow sending the notify FD.
64 // Create a separate thread that won't get the seccomp policy to send the FD.
65 // Synchronize with it using plain atomics + seccomp TSYNC, so we don't need
66 // any additional syscalls.
67 std::atomic<int> fd(-1);
68 std::atomic<int> tid(-1);
69
70 std::thread th([comms, &fd, &tid]() {
71 int notify_fd = -1;
72 while (notify_fd == -1) {
73 notify_fd = fd.load(std::memory_order_seq_cst);
74 }
75 SAPI_RAW_CHECK(comms->SendFD(notify_fd), "sending unotify fd");
76 SAPI_RAW_CHECK(close(notify_fd) == 0, "closing unotify fd");
77 sock_filter filter = ALLOW;
78 struct sock_fprog allow_prog = {
79 .len = 1,
80 .filter = &filter,
81 };
82 int result = syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, 0,
83 reinterpret_cast<uintptr_t>(&allow_prog));
84 SAPI_RAW_PCHECK(result != -1, "setting seccomp filter");
85 tid.store(syscall(__NR_gettid), std::memory_order_seq_cst);
86 });
87 th.detach();
88 int result = syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
89 SECCOMP_FILTER_FLAG_NEW_LISTENER,
90 reinterpret_cast<uintptr_t>(&prog));
91 SAPI_RAW_PCHECK(result != -1, "setting seccomp filter");
92 fd.store(result, std::memory_order_seq_cst);
93 pid_t child = -1;
94 while (child == -1) {
95 child = tid.load(std::memory_order_seq_cst);
96 }
97 // Apply seccomp.
98 struct sock_filter code[] = {
99 LOAD_ARCH,
100 JNE32(sandbox2::Syscall::GetHostAuditArch(), ALLOW),
101 LOAD_SYSCALL_NR,
102 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, __NR_seccomp, 0, 3),
103 ARG_32(3),
104 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, internal::kExecveMagic, 0, 1),
105 DENY,
106 ALLOW,
107 };
108 prog.len = ABSL_ARRAYSIZE(code);
109 prog.filter = code;
110 do {
111 result = syscall(
112 __NR_seccomp, SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
113 reinterpret_cast<uintptr_t>(&prog), internal::kExecveMagic);
114 } while (result == child);
115 SAPI_RAW_CHECK(result == 0, "Enabling seccomp filter");
116 }
117
InitSeccompRegular(sock_fprog prog)118 void InitSeccompRegular(sock_fprog prog) {
119 int result =
120 syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
121 reinterpret_cast<uintptr_t>(&prog));
122 SAPI_RAW_PCHECK(result != -1, "setting seccomp filter");
123 SAPI_RAW_PCHECK(result == 0,
124 "synchronizing threads using SECCOMP_FILTER_FLAG_TSYNC flag "
125 "for thread=%d",
126 result);
127 }
128
129 } // namespace
130
Client(Comms * comms)131 Client::Client(Comms* comms) : comms_(comms) {
132 char* fdmap_envvar = getenv(kFDMapEnvVar);
133 if (!fdmap_envvar) {
134 return;
135 }
136 absl::flat_hash_map<absl::string_view, absl::string_view> vars =
137 absl::StrSplit(fdmap_envvar, ',', absl::SkipEmpty());
138 for (const auto& [name, mapped_fd] : vars) {
139 int fd;
140 SAPI_RAW_CHECK(absl::SimpleAtoi(mapped_fd, &fd), "failed to parse fd map");
141 SAPI_RAW_CHECK(fd_map_.emplace(std::string(name), fd).second,
142 "could not insert mapping into fd map (duplicate)");
143 }
144 unsetenv(kFDMapEnvVar);
145 }
146
GetFdMapEnvVar() const147 std::string Client::GetFdMapEnvVar() const {
148 return absl::StrCat(kFDMapEnvVar, "=",
149 absl::StrJoin(fd_map_, ",", absl::PairFormatter(",")));
150 }
151
PrepareEnvironment(int * preserved_fd)152 void Client::PrepareEnvironment(int* preserved_fd) {
153 SetUpIPC(preserved_fd);
154 SetUpCwd();
155 }
156
EnableSandbox()157 void Client::EnableSandbox() {
158 ReceivePolicy();
159 ApplyPolicyAndBecomeTracee();
160 }
161
SandboxMeHere()162 void Client::SandboxMeHere() {
163 PrepareEnvironment();
164 EnableSandbox();
165 }
166
SetUpCwd()167 void Client::SetUpCwd() {
168 {
169 // Get the current working directory to check if we are in a mount
170 // namespace.
171 // Note: glibc 2.27 no longer returns a relative path in that case, but
172 // fails with ENOENT and returns a nullptr instead. The code still
173 // needs to run on lower version for the time being.
174 char cwd_buf[PATH_MAX + 1] = {0};
175 char* cwd = getcwd(cwd_buf, ABSL_ARRAYSIZE(cwd_buf));
176 SAPI_RAW_PCHECK(cwd != nullptr || errno == ENOENT,
177 "no current working directory");
178
179 // Outside of the mount namespace, the path is of the form
180 // '(unreachable)/...'. Only check for the slash, since Linux might make up
181 // other prefixes in the future.
182 if (errno == ENOENT || cwd_buf[0] != '/') {
183 SAPI_RAW_VLOG(1, "chdir into mount namespace, cwd was '%s'", cwd_buf);
184 // If we are in a mount namespace but fail to chdir, then it can lead to a
185 // sandbox escape -- we need to fail with FATAL if the chdir fails.
186 SAPI_RAW_PCHECK(chdir("/") != -1, "corrective chdir");
187 }
188 }
189
190 // Receive the user-supplied current working directory and change into it.
191 std::string cwd;
192 SAPI_RAW_CHECK(comms_->RecvString(&cwd), "receiving working directory");
193 if (!cwd.empty()) {
194 // On the other hand this chdir can fail without a sandbox escape. It will
195 // probably not have the intended behavior though.
196 if (chdir(cwd.c_str()) == -1 && SAPI_RAW_VLOG_IS_ON(1)) {
197 SAPI_RAW_PLOG(
198 INFO,
199 "chdir(%s) failed, falling back to previous cwd or / (with "
200 "namespaces). Use Executor::SetCwd() to set a working directory",
201 cwd.c_str());
202 }
203 }
204 }
205
SetUpIPC(int * preserved_fd)206 void Client::SetUpIPC(int* preserved_fd) {
207 uint32_t num_of_fd_pairs;
208 SAPI_RAW_CHECK(comms_->RecvUint32(&num_of_fd_pairs),
209 "receiving number of fd pairs");
210 SAPI_RAW_CHECK(fd_map_.empty(), "fd map not empty");
211
212 SAPI_RAW_VLOG(1, "Will receive %d file descriptor pairs", num_of_fd_pairs);
213
214 absl::flat_hash_map<int, int*> preserve_fds_map;
215 if (preserved_fd) {
216 preserve_fds_map.emplace(*preserved_fd, preserved_fd);
217 }
218
219 for (uint32_t i = 0; i < num_of_fd_pairs; ++i) {
220 int32_t requested_fd;
221 int32_t fd;
222 std::string name;
223
224 SAPI_RAW_CHECK(comms_->RecvInt32(&requested_fd), "receiving requested fd");
225 SAPI_RAW_CHECK(comms_->RecvFD(&fd), "receiving current fd");
226 SAPI_RAW_CHECK(comms_->RecvString(&name), "receiving name string");
227
228 if (auto it = preserve_fds_map.find(requested_fd);
229 it != preserve_fds_map.end()) {
230 int old_fd = it->first;
231 int new_fd = dup(old_fd);
232 SAPI_RAW_PCHECK(new_fd != -1, "Failed to duplicate preserved fd=%d",
233 old_fd);
234 SAPI_RAW_LOG(INFO, "Moved preserved fd=%d to %d", old_fd, new_fd);
235 close(old_fd);
236 int* pfd = it->second;
237 *pfd = new_fd;
238 preserve_fds_map.erase(it);
239 preserve_fds_map.emplace(new_fd, pfd);
240 }
241
242 if (requested_fd == comms_->GetConnectionFD()) {
243 comms_->MoveToAnotherFd();
244 SAPI_RAW_LOG(INFO,
245 "Trying to map over comms fd (%d). Remapped comms to %d",
246 requested_fd, comms_->GetConnectionFD());
247 }
248
249 if (requested_fd != -1 && fd != requested_fd) {
250 if (requested_fd > STDERR_FILENO && fcntl(requested_fd, F_GETFD) != -1) {
251 // Dup2 will silently close the FD if one is already at requested_fd.
252 // If someone is using the deferred sandbox entry, ie. SandboxMeHere,
253 // the application might have something actually using that fd.
254 // Therefore let's log a big warning if that FD is already in use.
255 // Note: this check doesn't happen for STDIN,STDOUT,STDERR.
256 SAPI_RAW_LOG(
257 WARNING,
258 "Cloning received fd %d over %d which is already open and will "
259 "be silently closed. This may lead to unexpected behavior!",
260 fd, requested_fd);
261 }
262
263 SAPI_RAW_VLOG(1, "Cloning received fd=%d onto fd=%d", fd, requested_fd);
264 SAPI_RAW_PCHECK(dup2(fd, requested_fd) != -1, "");
265
266 // Close the newly received FD if it differs from the new one.
267 close(fd);
268 fd = requested_fd;
269 }
270
271 if (!name.empty()) {
272 SAPI_RAW_CHECK(fd_map_.emplace(name, fd).second, "duplicate fd mapping");
273 }
274 }
275 }
276
ReceivePolicy()277 void Client::ReceivePolicy() {
278 std::vector<uint8_t> bytes;
279 SAPI_RAW_CHECK(comms_->RecvBytes(&bytes), "receive bytes");
280 policy_ = std::move(bytes);
281 }
282
ApplyPolicyAndBecomeTracee()283 void Client::ApplyPolicyAndBecomeTracee() {
284 // When running under *SAN, we need to notify *SANs background thread that we
285 // want it to exit and wait for it to be done. When not running under *SAN,
286 // this function does nothing.
287 sanitizer::WaitForSanitizer();
288
289 // Creds can be received w/o synchronization, once the connection is
290 // established.
291 pid_t cred_pid;
292 uid_t cred_uid ABSL_ATTRIBUTE_UNUSED;
293 gid_t cred_gid ABSL_ATTRIBUTE_UNUSED;
294 SAPI_RAW_CHECK(comms_->RecvCreds(&cred_pid, &cred_uid, &cred_gid),
295 "receiving credentials");
296
297 SAPI_RAW_CHECK(prctl(PR_SET_DUMPABLE, 1) == 0,
298 "setting PR_SET_DUMPABLE flag");
299 if (prctl(PR_SET_PTRACER, cred_pid) == -1) {
300 SAPI_RAW_VLOG(1, "No YAMA on this system. Continuing");
301 }
302
303 SAPI_RAW_CHECK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == 0,
304 "setting PR_SET_NO_NEW_PRIVS flag");
305 SAPI_RAW_CHECK(prctl(PR_SET_KEEPCAPS, 0) == 0,
306 "setting PR_SET_KEEPCAPS flag");
307
308 sock_fprog prog;
309 SAPI_RAW_CHECK(policy_.size() / sizeof(sock_filter) <=
310 std::numeric_limits<uint16_t>::max(),
311 "seccomp policy too long");
312 prog.len = static_cast<uint16_t>(policy_.size() / sizeof(sock_filter));
313 prog.filter = reinterpret_cast<sock_filter*>(&policy_.front());
314
315 SAPI_RAW_VLOG(1,
316 "Applying policy in PID %zd, sock_fprog.len: %" PRId16
317 " entries (%" PRIuPTR " bytes)",
318 syscall(__NR_gettid), prog.len, policy_.size());
319
320 // Signal executor we are ready to have limits applied on us and be ptraced.
321 // We want limits at the last moment to avoid triggering them too early and we
322 // want ptrace at the last moment to avoid synchronization deadlocks.
323 SAPI_RAW_CHECK(comms_->SendUint32(kClient2SandboxReady),
324 "receiving ready signal from executor");
325 uint32_t ret; // wait for confirmation
326 SAPI_RAW_CHECK(comms_->RecvUint32(&ret),
327 "receving confirmation from executor");
328 if (ret == kSandbox2ClientUnotify) {
329 InitSeccompUnotify(prog, comms_);
330 } else {
331 SAPI_RAW_CHECK(ret == kSandbox2ClientDone,
332 "invalid confirmation from executor");
333 InitSeccompRegular(prog);
334 }
335 }
336
GetMappedFD(const std::string & name)337 int Client::GetMappedFD(const std::string& name) {
338 auto it = fd_map_.find(name);
339 SAPI_RAW_CHECK(it != fd_map_.end(),
340 "mapped fd not found (function called twice?)");
341 int fd = it->second;
342 fd_map_.erase(it);
343 return fd;
344 }
345
HasMappedFD(const std::string & name)346 bool Client::HasMappedFD(const std::string& name) {
347 return fd_map_.find(name) != fd_map_.end();
348 }
349
SendLogsToSupervisor()350 void Client::SendLogsToSupervisor() {
351 // This LogSink will register itself and send all logs to the executor until
352 // the object is destroyed.
353 logsink_ = std::make_unique<LogSink>(GetMappedFD(LogSink::kLogFDName));
354 }
355
GetNetworkProxyClient()356 NetworkProxyClient* Client::GetNetworkProxyClient() {
357 if (proxy_client_ == nullptr) {
358 proxy_client_ = std::make_unique<NetworkProxyClient>(
359 GetMappedFD(NetworkProxyClient::kFDName));
360 }
361 return proxy_client_.get();
362 }
363
InstallNetworkProxyHandler()364 absl::Status Client::InstallNetworkProxyHandler() {
365 if (fd_map_.find(NetworkProxyClient::kFDName) == fd_map_.end()) {
366 return absl::FailedPreconditionError(
367 "InstallNetworkProxyHandler() must be called at most once after the "
368 "sandbox is installed. Also, the NetworkProxyServer needs to be "
369 "enabled.");
370 }
371 return NetworkProxyHandler::InstallNetworkProxyHandler(
372 GetNetworkProxyClient());
373 }
374
375 } // namespace sandbox2
376