1 /*
2 * Copyright (C) 2024 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "host/commands/process_sandboxer/pidfd.h"
17
18 #include <dirent.h>
19 #include <fcntl.h>
20 #include <linux/sched.h>
21 #include <signal.h>
22 #include <sys/prctl.h>
23 #include <sys/syscall.h>
24 #include <sys/types.h>
25 #include <unistd.h>
26
27 #include <fstream>
28 #include <memory>
29 #include <utility>
30 #include <vector>
31
32 #include <absl/log/check.h>
33 #include <absl/log/log.h>
34 #include <absl/status/status.h>
35 #include <absl/status/statusor.h>
36 #include <absl/strings/numbers.h>
37 #include <absl/strings/str_cat.h>
38 #include <absl/strings/str_format.h>
39 #include <absl/strings/str_join.h>
40 #include <absl/strings/str_split.h>
41 #include <absl/types/span.h>
42
43 #include "host/commands/process_sandboxer/unique_fd.h"
44
45 namespace cuttlefish::process_sandboxer {
46
FromRunningProcess(pid_t pid)47 absl::StatusOr<PidFd> PidFd::FromRunningProcess(pid_t pid) {
48 UniqueFd fd(syscall(__NR_pidfd_open, pid, 0)); // Always CLOEXEC
49 if (fd.Get() < 0) {
50 return absl::ErrnoToStatus(errno, "`pidfd_open` failed");
51 }
52 return PidFd(std::move(fd), pid);
53 }
54
LaunchSubprocess(absl::Span<const std::string> argv,std::vector<std::pair<UniqueFd,int>> fds,absl::Span<const std::string> env)55 absl::StatusOr<PidFd> PidFd::LaunchSubprocess(
56 absl::Span<const std::string> argv,
57 std::vector<std::pair<UniqueFd, int>> fds,
58 absl::Span<const std::string> env) {
59 int pidfd;
60 clone_args args_for_clone = clone_args{
61 .flags = CLONE_PIDFD,
62 .pidfd = reinterpret_cast<std::uintptr_t>(&pidfd),
63 };
64
65 pid_t res = syscall(__NR_clone3, &args_for_clone, sizeof(args_for_clone));
66 if (res < 0) {
67 std::string argv_str = absl::StrJoin(argv, "','");
68 std::string error = absl::StrCat("clone3 failed: argv=['", argv_str, "']");
69 return absl::ErrnoToStatus(errno, error);
70 } else if (res > 0) {
71 std::string argv_str = absl::StrJoin(argv, "','");
72 VLOG(1) << res << ": Running w/o sandbox ['" << argv_str << "]";
73
74 UniqueFd fd(pidfd);
75 return PidFd(std::move(fd), res);
76 }
77
78 /* Duplicate every input in `fds` into a range higher than the highest output
79 * in `fds`, in case there is any overlap between inputs and outputs. */
80 int minimum_backup_fd = -1;
81 for (const auto& [my_fd, target_fd] : fds) {
82 if (target_fd + 1 > minimum_backup_fd) {
83 minimum_backup_fd = target_fd + 1;
84 }
85 }
86
87 std::unordered_map<int, int> backup_mapping;
88 for (const auto& [my_fd, target_fd] : fds) {
89 int backup = fcntl(my_fd.Get(), F_DUPFD, minimum_backup_fd);
90 PCHECK(backup >= 0) << "fcntl(..., F_DUPFD) failed";
91 int flags = fcntl(backup, F_GETFD);
92 PCHECK(flags >= 0) << "fcntl(..., F_GETFD failed";
93 flags &= FD_CLOEXEC;
94 PCHECK(fcntl(backup, F_SETFD, flags) >= 0) << "fcntl(..., F_SETFD failed";
95 backup_mapping[backup] = target_fd;
96 }
97
98 for (const auto& [backup_fd, target_fd] : backup_mapping) {
99 // dup2 always unsets FD_CLOEXEC
100 PCHECK(dup2(backup_fd, target_fd) >= 0) << "dup2 failed";
101 }
102
103 std::vector<std::string> argv_clone(argv.begin(), argv.end());
104 std::vector<char*> argv_cstr;
105 for (auto& arg : argv_clone) {
106 argv_cstr.emplace_back(arg.data());
107 }
108 argv_cstr.emplace_back(nullptr);
109
110 std::vector<std::string> env_clone(env.begin(), env.end());
111 std::vector<char*> env_cstr;
112 for (std::string& env_member : env_clone) {
113 env_cstr.emplace_back(env_member.data());
114 }
115 env_cstr.emplace_back(nullptr);
116
117 if (prctl(PR_SET_PDEATHSIG, SIGHUP) < 0) { // Die when parent dies
118 PLOG(FATAL) << "prctl failed";
119 }
120
121 execve(argv_cstr[0], argv_cstr.data(), env_cstr.data());
122
123 PLOG(FATAL) << "execv failed";
124 }
125
PidFd(UniqueFd fd,pid_t pid)126 PidFd::PidFd(UniqueFd fd, pid_t pid) : fd_(std::move(fd)), pid_(pid) {}
127
Get() const128 int PidFd::Get() const { return fd_.Get(); }
129
AllFds()130 absl::StatusOr<std::vector<std::pair<UniqueFd, int>>> PidFd::AllFds() {
131 std::vector<std::pair<UniqueFd, int>> fds;
132
133 std::string dir_name = absl::StrFormat("/proc/%d/fd", pid_);
134 std::unique_ptr<DIR, int (*)(DIR*)> dir(opendir(dir_name.c_str()), closedir);
135 if (dir.get() == nullptr) {
136 return absl::ErrnoToStatus(errno, "`opendir` failed");
137 }
138 for (dirent* ent = readdir(dir.get()); ent; ent = readdir(dir.get())) {
139 int other_fd;
140 // `d_name` is guaranteed to be null terminated
141 std::string_view name{ent->d_name};
142 if (name == "." || name == "..") {
143 continue;
144 }
145 if (!absl::SimpleAtoi(name, &other_fd)) {
146 std::string error = absl::StrFormat("'%v/%v' not an int", dir_name, name);
147 return absl::InternalError(error);
148 }
149 // Always CLOEXEC
150 UniqueFd our_fd(syscall(__NR_pidfd_getfd, fd_.Get(), other_fd, 0));
151 if (our_fd.Get() < 0) {
152 return absl::ErrnoToStatus(errno, "`pidfd_getfd` failed");
153 }
154 fds.emplace_back(std::move(our_fd), other_fd);
155 }
156
157 return fds;
158 }
159
ReadNullSepFile(const std::string & path)160 static absl::StatusOr<std::vector<std::string>> ReadNullSepFile(
161 const std::string& path) {
162 std::ifstream cmdline_file(path, std::ios::binary);
163 if (!cmdline_file) {
164 auto err = absl::StrFormat("Failed to open '%v'", path);
165 return absl::InternalError(err);
166 }
167 std::stringstream buffer;
168 buffer << cmdline_file.rdbuf();
169 if (!cmdline_file) {
170 auto err = absl::StrFormat("Failed to read '%v'", path);
171 return absl::InternalError(err);
172 }
173
174 std::vector<std::string> members = absl::StrSplit(buffer.str(), '\0');
175 if (members.empty()) {
176 return absl::InternalError(absl::StrFormat("'%v' is empty", path));
177 } else if (members.back() == "") {
178 members.pop_back(); // may end in a null terminator
179 }
180 return members;
181 }
182
Argv()183 absl::StatusOr<std::vector<std::string>> PidFd::Argv() {
184 return ReadNullSepFile(absl::StrFormat("/proc/%d/cmdline", pid_));
185 }
186
Env()187 absl::StatusOr<std::vector<std::string>> PidFd::Env() {
188 return ReadNullSepFile(absl::StrFormat("/proc/%d/environ", pid_));
189 }
190
HaltHierarchy()191 absl::Status PidFd::HaltHierarchy() {
192 if (absl::Status stop = SendSignal(SIGSTOP); !stop.ok()) {
193 return stop;
194 }
195 if (absl::Status halt_children = HaltChildHierarchy(); !halt_children.ok()) {
196 return halt_children;
197 }
198 return SendSignal(SIGKILL);
199 }
200
201 /* Assumes the process referred to by `pid` does not spawn any more children or
202 * reap any children while this function is running. */
FindChildPids(pid_t pid)203 static absl::StatusOr<std::vector<pid_t>> FindChildPids(pid_t pid) {
204 std::vector<pid_t> child_pids;
205
206 std::string task_dir = absl::StrFormat("/proc/%d/task", pid);
207 std::unique_ptr<DIR, int (*)(DIR*)> dir(opendir(task_dir.c_str()), closedir);
208 if (dir.get() == nullptr) {
209 return absl::ErrnoToStatus(errno, "`opendir` failed");
210 }
211
212 while (dirent* ent = readdir(dir.get())) {
213 // `d_name` is guaranteed to be null terminated
214 std::string_view name = ent->d_name;
215 if (name == "." || name == "..") {
216 continue;
217 }
218 std::string children_file =
219 absl::StrFormat("/proc/%d/task/%s/children", pid, name);
220 std::ifstream children_stream(children_file);
221 if (!children_stream) {
222 std::string err = absl::StrCat("can't read child file: ", children_file);
223 return absl::InternalError(err);
224 }
225
226 std::string children_str;
227 std::getline(children_stream, children_str);
228 for (std::string_view child_str : absl::StrSplit(children_str, " ")) {
229 if (child_str.empty()) {
230 continue;
231 }
232 pid_t child_pid;
233 if (!absl::SimpleAtoi(child_str, &child_pid)) {
234 std::string error = absl::StrFormat("'%s' is not a pid_t", child_str);
235 return absl::InternalError(error);
236 }
237 child_pids.emplace_back(child_pid);
238 }
239 }
240
241 return child_pids;
242 }
243
HaltChildHierarchy()244 absl::Status PidFd::HaltChildHierarchy() {
245 absl::StatusOr<std::vector<pid_t>> children = FindChildPids(pid_);
246 if (!children.ok()) {
247 return children.status();
248 }
249 for (pid_t child : *children) {
250 absl::StatusOr<PidFd> child_pidfd = FromRunningProcess(child);
251 if (!child_pidfd.ok()) {
252 return child_pidfd.status();
253 }
254 // HaltHierarchy will SIGSTOP the child so it cannot spawn more children
255 // or reap its own children while everything is being stopped.
256 if (absl::Status halt = child_pidfd->HaltHierarchy(); !halt.ok()) {
257 return halt;
258 }
259 }
260
261 return absl::OkStatus();
262 }
263
SendSignal(int signal)264 absl::Status PidFd::SendSignal(int signal) {
265 if (syscall(__NR_pidfd_send_signal, fd_.Get(), signal, nullptr, 0) < 0) {
266 return absl::ErrnoToStatus(errno, "pidfd_send_signal failed");
267 }
268 return absl::OkStatus();
269 }
270
271 } // namespace cuttlefish::process_sandboxer
272