1 /*
2  * Copyright (C) 2024 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "host/commands/process_sandboxer/pidfd.h"
17 
18 #include <dirent.h>
19 #include <fcntl.h>
20 #include <linux/sched.h>
21 #include <signal.h>
22 #include <sys/prctl.h>
23 #include <sys/syscall.h>
24 #include <sys/types.h>
25 #include <unistd.h>
26 
27 #include <fstream>
28 #include <memory>
29 #include <utility>
30 #include <vector>
31 
32 #include <absl/log/check.h>
33 #include <absl/log/log.h>
34 #include <absl/status/status.h>
35 #include <absl/status/statusor.h>
36 #include <absl/strings/numbers.h>
37 #include <absl/strings/str_cat.h>
38 #include <absl/strings/str_format.h>
39 #include <absl/strings/str_join.h>
40 #include <absl/strings/str_split.h>
41 #include <absl/types/span.h>
42 
43 #include "host/commands/process_sandboxer/unique_fd.h"
44 
45 namespace cuttlefish::process_sandboxer {
46 
FromRunningProcess(pid_t pid)47 absl::StatusOr<PidFd> PidFd::FromRunningProcess(pid_t pid) {
48   UniqueFd fd(syscall(__NR_pidfd_open, pid, 0));  // Always CLOEXEC
49   if (fd.Get() < 0) {
50     return absl::ErrnoToStatus(errno, "`pidfd_open` failed");
51   }
52   return PidFd(std::move(fd), pid);
53 }
54 
LaunchSubprocess(absl::Span<const std::string> argv,std::vector<std::pair<UniqueFd,int>> fds,absl::Span<const std::string> env)55 absl::StatusOr<PidFd> PidFd::LaunchSubprocess(
56     absl::Span<const std::string> argv,
57     std::vector<std::pair<UniqueFd, int>> fds,
58     absl::Span<const std::string> env) {
59   int pidfd;
60   clone_args args_for_clone = clone_args{
61       .flags = CLONE_PIDFD,
62       .pidfd = reinterpret_cast<std::uintptr_t>(&pidfd),
63   };
64 
65   pid_t res = syscall(__NR_clone3, &args_for_clone, sizeof(args_for_clone));
66   if (res < 0) {
67     std::string argv_str = absl::StrJoin(argv, "','");
68     std::string error = absl::StrCat("clone3 failed: argv=['", argv_str, "']");
69     return absl::ErrnoToStatus(errno, error);
70   } else if (res > 0) {
71     std::string argv_str = absl::StrJoin(argv, "','");
72     VLOG(1) << res << ": Running w/o sandbox ['" << argv_str << "]";
73 
74     UniqueFd fd(pidfd);
75     return PidFd(std::move(fd), res);
76   }
77 
78   /* Duplicate every input in `fds` into a range higher than the highest output
79    * in `fds`, in case there is any overlap between inputs and outputs. */
80   int minimum_backup_fd = -1;
81   for (const auto& [my_fd, target_fd] : fds) {
82     if (target_fd + 1 > minimum_backup_fd) {
83       minimum_backup_fd = target_fd + 1;
84     }
85   }
86 
87   std::unordered_map<int, int> backup_mapping;
88   for (const auto& [my_fd, target_fd] : fds) {
89     int backup = fcntl(my_fd.Get(), F_DUPFD, minimum_backup_fd);
90     PCHECK(backup >= 0) << "fcntl(..., F_DUPFD) failed";
91     int flags = fcntl(backup, F_GETFD);
92     PCHECK(flags >= 0) << "fcntl(..., F_GETFD failed";
93     flags &= FD_CLOEXEC;
94     PCHECK(fcntl(backup, F_SETFD, flags) >= 0) << "fcntl(..., F_SETFD failed";
95     backup_mapping[backup] = target_fd;
96   }
97 
98   for (const auto& [backup_fd, target_fd] : backup_mapping) {
99     // dup2 always unsets FD_CLOEXEC
100     PCHECK(dup2(backup_fd, target_fd) >= 0) << "dup2 failed";
101   }
102 
103   std::vector<std::string> argv_clone(argv.begin(), argv.end());
104   std::vector<char*> argv_cstr;
105   for (auto& arg : argv_clone) {
106     argv_cstr.emplace_back(arg.data());
107   }
108   argv_cstr.emplace_back(nullptr);
109 
110   std::vector<std::string> env_clone(env.begin(), env.end());
111   std::vector<char*> env_cstr;
112   for (std::string& env_member : env_clone) {
113     env_cstr.emplace_back(env_member.data());
114   }
115   env_cstr.emplace_back(nullptr);
116 
117   if (prctl(PR_SET_PDEATHSIG, SIGHUP) < 0) {  // Die when parent dies
118     PLOG(FATAL) << "prctl failed";
119   }
120 
121   execve(argv_cstr[0], argv_cstr.data(), env_cstr.data());
122 
123   PLOG(FATAL) << "execv failed";
124 }
125 
PidFd(UniqueFd fd,pid_t pid)126 PidFd::PidFd(UniqueFd fd, pid_t pid) : fd_(std::move(fd)), pid_(pid) {}
127 
Get() const128 int PidFd::Get() const { return fd_.Get(); }
129 
AllFds()130 absl::StatusOr<std::vector<std::pair<UniqueFd, int>>> PidFd::AllFds() {
131   std::vector<std::pair<UniqueFd, int>> fds;
132 
133   std::string dir_name = absl::StrFormat("/proc/%d/fd", pid_);
134   std::unique_ptr<DIR, int (*)(DIR*)> dir(opendir(dir_name.c_str()), closedir);
135   if (dir.get() == nullptr) {
136     return absl::ErrnoToStatus(errno, "`opendir` failed");
137   }
138   for (dirent* ent = readdir(dir.get()); ent; ent = readdir(dir.get())) {
139     int other_fd;
140     // `d_name` is guaranteed to be null terminated
141     std::string_view name{ent->d_name};
142     if (name == "." || name == "..") {
143       continue;
144     }
145     if (!absl::SimpleAtoi(name, &other_fd)) {
146       std::string error = absl::StrFormat("'%v/%v' not an int", dir_name, name);
147       return absl::InternalError(error);
148     }
149     // Always CLOEXEC
150     UniqueFd our_fd(syscall(__NR_pidfd_getfd, fd_.Get(), other_fd, 0));
151     if (our_fd.Get() < 0) {
152       return absl::ErrnoToStatus(errno, "`pidfd_getfd` failed");
153     }
154     fds.emplace_back(std::move(our_fd), other_fd);
155   }
156 
157   return fds;
158 }
159 
ReadNullSepFile(const std::string & path)160 static absl::StatusOr<std::vector<std::string>> ReadNullSepFile(
161     const std::string& path) {
162   std::ifstream cmdline_file(path, std::ios::binary);
163   if (!cmdline_file) {
164     auto err = absl::StrFormat("Failed to open '%v'", path);
165     return absl::InternalError(err);
166   }
167   std::stringstream buffer;
168   buffer << cmdline_file.rdbuf();
169   if (!cmdline_file) {
170     auto err = absl::StrFormat("Failed to read '%v'", path);
171     return absl::InternalError(err);
172   }
173 
174   std::vector<std::string> members = absl::StrSplit(buffer.str(), '\0');
175   if (members.empty()) {
176     return absl::InternalError(absl::StrFormat("'%v' is empty", path));
177   } else if (members.back() == "") {
178     members.pop_back();  // may end in a null terminator
179   }
180   return members;
181 }
182 
Argv()183 absl::StatusOr<std::vector<std::string>> PidFd::Argv() {
184   return ReadNullSepFile(absl::StrFormat("/proc/%d/cmdline", pid_));
185 }
186 
Env()187 absl::StatusOr<std::vector<std::string>> PidFd::Env() {
188   return ReadNullSepFile(absl::StrFormat("/proc/%d/environ", pid_));
189 }
190 
HaltHierarchy()191 absl::Status PidFd::HaltHierarchy() {
192   if (absl::Status stop = SendSignal(SIGSTOP); !stop.ok()) {
193     return stop;
194   }
195   if (absl::Status halt_children = HaltChildHierarchy(); !halt_children.ok()) {
196     return halt_children;
197   }
198   return SendSignal(SIGKILL);
199 }
200 
201 /* Assumes the process referred to by `pid` does not spawn any more children or
202  * reap any children while this function is running. */
FindChildPids(pid_t pid)203 static absl::StatusOr<std::vector<pid_t>> FindChildPids(pid_t pid) {
204   std::vector<pid_t> child_pids;
205 
206   std::string task_dir = absl::StrFormat("/proc/%d/task", pid);
207   std::unique_ptr<DIR, int (*)(DIR*)> dir(opendir(task_dir.c_str()), closedir);
208   if (dir.get() == nullptr) {
209     return absl::ErrnoToStatus(errno, "`opendir` failed");
210   }
211 
212   while (dirent* ent = readdir(dir.get())) {
213     // `d_name` is guaranteed to be null terminated
214     std::string_view name = ent->d_name;
215     if (name == "." || name == "..") {
216       continue;
217     }
218     std::string children_file =
219         absl::StrFormat("/proc/%d/task/%s/children", pid, name);
220     std::ifstream children_stream(children_file);
221     if (!children_stream) {
222       std::string err = absl::StrCat("can't read child file: ", children_file);
223       return absl::InternalError(err);
224     }
225 
226     std::string children_str;
227     std::getline(children_stream, children_str);
228     for (std::string_view child_str : absl::StrSplit(children_str, " ")) {
229       if (child_str.empty()) {
230         continue;
231       }
232       pid_t child_pid;
233       if (!absl::SimpleAtoi(child_str, &child_pid)) {
234         std::string error = absl::StrFormat("'%s' is not a pid_t", child_str);
235         return absl::InternalError(error);
236       }
237       child_pids.emplace_back(child_pid);
238     }
239   }
240 
241   return child_pids;
242 }
243 
HaltChildHierarchy()244 absl::Status PidFd::HaltChildHierarchy() {
245   absl::StatusOr<std::vector<pid_t>> children = FindChildPids(pid_);
246   if (!children.ok()) {
247     return children.status();
248   }
249   for (pid_t child : *children) {
250     absl::StatusOr<PidFd> child_pidfd = FromRunningProcess(child);
251     if (!child_pidfd.ok()) {
252       return child_pidfd.status();
253     }
254     // HaltHierarchy will SIGSTOP the child so it cannot spawn more children
255     // or reap its own children while everything is being stopped.
256     if (absl::Status halt = child_pidfd->HaltHierarchy(); !halt.ok()) {
257       return halt;
258     }
259   }
260 
261   return absl::OkStatus();
262 }
263 
SendSignal(int signal)264 absl::Status PidFd::SendSignal(int signal) {
265   if (syscall(__NR_pidfd_send_signal, fd_.Get(), signal, nullptr, 0) < 0) {
266     return absl::ErrnoToStatus(errno, "pidfd_send_signal failed");
267   }
268   return absl::OkStatus();
269 }
270 
271 }  // namespace cuttlefish::process_sandboxer
272