1 /* Authors: Gregory P. Smith & Jeffrey Yasskin */
2 #ifndef Py_BUILD_CORE_BUILTIN
3 # define Py_BUILD_CORE_MODULE 1
4 #endif
5
6 #include "Python.h"
7 #include "pycore_fileutils.h"
8 #if defined(HAVE_PIPE2) && !defined(_GNU_SOURCE)
9 # define _GNU_SOURCE
10 #endif
11 #include <unistd.h>
12 #include <fcntl.h>
13 #ifdef HAVE_SYS_TYPES_H
14 #include <sys/types.h>
15 #endif
16 #if defined(HAVE_SYS_STAT_H)
17 #include <sys/stat.h>
18 #endif
19 #ifdef HAVE_SYS_SYSCALL_H
20 #include <sys/syscall.h>
21 #endif
22 #if defined(HAVE_SYS_RESOURCE_H)
23 #include <sys/resource.h>
24 #endif
25 #ifdef HAVE_DIRENT_H
26 #include <dirent.h>
27 #endif
28 #ifdef HAVE_GRP_H
29 #include <grp.h>
30 #endif /* HAVE_GRP_H */
31
32 #include "posixmodule.h"
33
34 #ifdef _Py_MEMORY_SANITIZER
35 # include <sanitizer/msan_interface.h>
36 #endif
37
38 #if defined(__ANDROID__) && __ANDROID_API__ < 21 && !defined(SYS_getdents64)
39 # include <sys/linux-syscalls.h>
40 # define SYS_getdents64 __NR_getdents64
41 #endif
42
43 #if defined(__linux__) && defined(HAVE_VFORK) && defined(HAVE_SIGNAL_H) && \
44 defined(HAVE_PTHREAD_SIGMASK) && !defined(HAVE_BROKEN_PTHREAD_SIGMASK)
45 /* If this is ever expanded to non-Linux platforms, verify what calls are
46 * allowed after vfork(). Ex: setsid() may be disallowed on macOS? */
47 # include <signal.h>
48 # define VFORK_USABLE 1
49 #endif
50
51 #if defined(__sun) && defined(__SVR4)
52 /* readdir64 is used to work around Solaris 9 bug 6395699. */
53 # define readdir readdir64
54 # define dirent dirent64
55 # if !defined(HAVE_DIRFD)
56 /* Some versions of Solaris lack dirfd(). */
57 # define dirfd(dirp) ((dirp)->dd_fd)
58 # define HAVE_DIRFD
59 # endif
60 #endif
61
62 #if defined(__FreeBSD__) || (defined(__APPLE__) && defined(__MACH__)) || defined(__DragonFly__)
63 # define FD_DIR "/dev/fd"
64 #else
65 # define FD_DIR "/proc/self/fd"
66 #endif
67
68 #ifdef NGROUPS_MAX
69 #define MAX_GROUPS NGROUPS_MAX
70 #else
71 #define MAX_GROUPS 64
72 #endif
73
74 #define POSIX_CALL(call) do { if ((call) == -1) goto error; } while (0)
75
76 static struct PyModuleDef _posixsubprocessmodule;
77
78 /* Convert ASCII to a positive int, no libc call. no overflow. -1 on error. */
79 static int
_pos_int_from_ascii(const char * name)80 _pos_int_from_ascii(const char *name)
81 {
82 int num = 0;
83 while (*name >= '0' && *name <= '9') {
84 num = num * 10 + (*name - '0');
85 ++name;
86 }
87 if (*name)
88 return -1; /* Non digit found, not a number. */
89 return num;
90 }
91
92
93 #if defined(__FreeBSD__) || defined(__DragonFly__)
94 /* When /dev/fd isn't mounted it is often a static directory populated
95 * with 0 1 2 or entries for 0 .. 63 on FreeBSD, NetBSD, OpenBSD and DragonFlyBSD.
96 * NetBSD and OpenBSD have a /proc fs available (though not necessarily
97 * mounted) and do not have fdescfs for /dev/fd. MacOS X has a devfs
98 * that properly supports /dev/fd.
99 */
100 static int
_is_fdescfs_mounted_on_dev_fd(void)101 _is_fdescfs_mounted_on_dev_fd(void)
102 {
103 struct stat dev_stat;
104 struct stat dev_fd_stat;
105 if (stat("/dev", &dev_stat) != 0)
106 return 0;
107 if (stat(FD_DIR, &dev_fd_stat) != 0)
108 return 0;
109 if (dev_stat.st_dev == dev_fd_stat.st_dev)
110 return 0; /* / == /dev == /dev/fd means it is static. #fail */
111 return 1;
112 }
113 #endif
114
115
116 /* Returns 1 if there is a problem with fd_sequence, 0 otherwise. */
117 static int
_sanity_check_python_fd_sequence(PyObject * fd_sequence)118 _sanity_check_python_fd_sequence(PyObject *fd_sequence)
119 {
120 Py_ssize_t seq_idx;
121 long prev_fd = -1;
122 for (seq_idx = 0; seq_idx < PyTuple_GET_SIZE(fd_sequence); ++seq_idx) {
123 PyObject* py_fd = PyTuple_GET_ITEM(fd_sequence, seq_idx);
124 long iter_fd;
125 if (!PyLong_Check(py_fd)) {
126 return 1;
127 }
128 iter_fd = PyLong_AsLong(py_fd);
129 if (iter_fd < 0 || iter_fd <= prev_fd || iter_fd > INT_MAX) {
130 /* Negative, overflow, unsorted, too big for a fd. */
131 return 1;
132 }
133 prev_fd = iter_fd;
134 }
135 return 0;
136 }
137
138
139 /* Is fd found in the sorted Python Sequence? */
140 static int
_is_fd_in_sorted_fd_sequence(int fd,int * fd_sequence,Py_ssize_t fd_sequence_len)141 _is_fd_in_sorted_fd_sequence(int fd, int *fd_sequence,
142 Py_ssize_t fd_sequence_len)
143 {
144 /* Binary search. */
145 Py_ssize_t search_min = 0;
146 Py_ssize_t search_max = fd_sequence_len - 1;
147 if (search_max < 0)
148 return 0;
149 do {
150 long middle = (search_min + search_max) / 2;
151 long middle_fd = fd_sequence[middle];
152 if (fd == middle_fd)
153 return 1;
154 if (fd > middle_fd)
155 search_min = middle + 1;
156 else
157 search_max = middle - 1;
158 } while (search_min <= search_max);
159 return 0;
160 }
161
162 /*
163 * Do all the Python C API calls in the parent process to turn the pass_fds
164 * "py_fds_to_keep" tuple into a C array. The caller owns allocation and
165 * freeing of the array.
166 *
167 * On error an unknown number of array elements may have been filled in.
168 * A Python exception has been set when an error is returned.
169 *
170 * Returns: -1 on error, 0 on success.
171 */
172 static int
convert_fds_to_keep_to_c(PyObject * py_fds_to_keep,int * c_fds_to_keep)173 convert_fds_to_keep_to_c(PyObject *py_fds_to_keep, int *c_fds_to_keep)
174 {
175 Py_ssize_t i, len;
176
177 len = PyTuple_GET_SIZE(py_fds_to_keep);
178 for (i = 0; i < len; ++i) {
179 PyObject* fdobj = PyTuple_GET_ITEM(py_fds_to_keep, i);
180 long fd = PyLong_AsLong(fdobj);
181 if (fd == -1 && PyErr_Occurred()) {
182 return -1;
183 }
184 if (fd < 0 || fd > INT_MAX) {
185 PyErr_SetString(PyExc_ValueError,
186 "fd out of range in fds_to_keep.");
187 return -1;
188 }
189 c_fds_to_keep[i] = (int)fd;
190 }
191 return 0;
192 }
193
194
195 /* This function must be async-signal-safe as it is called from child_exec()
196 * after fork() or vfork().
197 */
198 static int
make_inheritable(int * c_fds_to_keep,Py_ssize_t len,int errpipe_write)199 make_inheritable(int *c_fds_to_keep, Py_ssize_t len, int errpipe_write)
200 {
201 Py_ssize_t i;
202
203 for (i = 0; i < len; ++i) {
204 int fd = c_fds_to_keep[i];
205 if (fd == errpipe_write) {
206 /* errpipe_write is part of fds_to_keep. It must be closed at
207 exec(), but kept open in the child process until exec() is
208 called. */
209 continue;
210 }
211 if (_Py_set_inheritable_async_safe(fd, 1, NULL) < 0)
212 return -1;
213 }
214 return 0;
215 }
216
217
218 /* Get the maximum file descriptor that could be opened by this process.
219 * This function is async signal safe for use between fork() and exec().
220 */
221 static long
safe_get_max_fd(void)222 safe_get_max_fd(void)
223 {
224 long local_max_fd;
225 #if defined(__NetBSD__)
226 local_max_fd = fcntl(0, F_MAXFD);
227 if (local_max_fd >= 0)
228 return local_max_fd;
229 #endif
230 #if defined(HAVE_SYS_RESOURCE_H) && defined(__OpenBSD__)
231 struct rlimit rl;
232 /* Not on the POSIX async signal safe functions list but likely
233 * safe. TODO - Someone should audit OpenBSD to make sure. */
234 if (getrlimit(RLIMIT_NOFILE, &rl) >= 0)
235 return (long) rl.rlim_max;
236 #endif
237 #ifdef _SC_OPEN_MAX
238 local_max_fd = sysconf(_SC_OPEN_MAX);
239 if (local_max_fd == -1)
240 #endif
241 local_max_fd = 256; /* Matches legacy Lib/subprocess.py behavior. */
242 return local_max_fd;
243 }
244
245
246 /* Close all file descriptors in the given range except for those in
247 * fds_to_keep by invoking closer on each subrange.
248 *
249 * If end_fd == -1, it's guessed via safe_get_max_fd(), but it isn't
250 * possible to know for sure what the max fd to go up to is for
251 * processes with the capability of raising their maximum, or in case
252 * a process opened a high fd and then lowered its maximum.
253 */
254 static int
_close_range_except(int start_fd,int end_fd,int * fds_to_keep,Py_ssize_t fds_to_keep_len,int (* closer)(int,int))255 _close_range_except(int start_fd,
256 int end_fd,
257 int *fds_to_keep,
258 Py_ssize_t fds_to_keep_len,
259 int (*closer)(int, int))
260 {
261 if (end_fd == -1) {
262 end_fd = Py_MIN(safe_get_max_fd(), INT_MAX);
263 }
264 Py_ssize_t keep_seq_idx;
265 /* As fds_to_keep is sorted we can loop through the list closing
266 * fds in between any in the keep list falling within our range. */
267 for (keep_seq_idx = 0; keep_seq_idx < fds_to_keep_len; ++keep_seq_idx) {
268 int keep_fd = fds_to_keep[keep_seq_idx];
269 if (keep_fd < start_fd)
270 continue;
271 if (closer(start_fd, keep_fd - 1) != 0)
272 return -1;
273 start_fd = keep_fd + 1;
274 }
275 if (start_fd <= end_fd) {
276 if (closer(start_fd, end_fd) != 0)
277 return -1;
278 }
279 return 0;
280 }
281
282 #if defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)
283 /* It doesn't matter if d_name has room for NAME_MAX chars; we're using this
284 * only to read a directory of short file descriptor number names. The kernel
285 * will return an error if we didn't give it enough space. Highly Unlikely.
286 * This structure is very old and stable: It will not change unless the kernel
287 * chooses to break compatibility with all existing binaries. Highly Unlikely.
288 */
289 struct linux_dirent64 {
290 unsigned long long d_ino;
291 long long d_off;
292 unsigned short d_reclen; /* Length of this linux_dirent */
293 unsigned char d_type;
294 char d_name[256]; /* Filename (null-terminated) */
295 };
296
297 static int
_brute_force_closer(int first,int last)298 _brute_force_closer(int first, int last)
299 {
300 for (int i = first; i <= last; i++) {
301 /* Ignore errors */
302 (void)close(i);
303 }
304 return 0;
305 }
306
307 /* Close all open file descriptors in the range from start_fd and higher
308 * Do not close any in the sorted fds_to_keep list.
309 *
310 * This version is async signal safe as it does not make any unsafe C library
311 * calls, malloc calls or handle any locks. It is _unfortunate_ to be forced
312 * to resort to making a kernel system call directly but this is the ONLY api
313 * available that does no harm. opendir/readdir/closedir perform memory
314 * allocation and locking so while they usually work they are not guaranteed
315 * to (especially if you have replaced your malloc implementation). A version
316 * of this function that uses those can be found in the _maybe_unsafe variant.
317 *
318 * This is Linux specific because that is all I am ready to test it on. It
319 * should be easy to add OS specific dirent or dirent64 structures and modify
320 * it with some cpp #define magic to work on other OSes as well if you want.
321 */
322 static void
_close_open_fds_safe(int start_fd,int * fds_to_keep,Py_ssize_t fds_to_keep_len)323 _close_open_fds_safe(int start_fd, int *fds_to_keep, Py_ssize_t fds_to_keep_len)
324 {
325 int fd_dir_fd;
326
327 fd_dir_fd = _Py_open_noraise(FD_DIR, O_RDONLY);
328 if (fd_dir_fd == -1) {
329 /* No way to get a list of open fds. */
330 _close_range_except(start_fd, -1,
331 fds_to_keep, fds_to_keep_len,
332 _brute_force_closer);
333 return;
334 } else {
335 char buffer[sizeof(struct linux_dirent64)];
336 int bytes;
337 while ((bytes = syscall(SYS_getdents64, fd_dir_fd,
338 (struct linux_dirent64 *)buffer,
339 sizeof(buffer))) > 0) {
340 struct linux_dirent64 *entry;
341 int offset;
342 #ifdef _Py_MEMORY_SANITIZER
343 __msan_unpoison(buffer, bytes);
344 #endif
345 for (offset = 0; offset < bytes; offset += entry->d_reclen) {
346 int fd;
347 entry = (struct linux_dirent64 *)(buffer + offset);
348 if ((fd = _pos_int_from_ascii(entry->d_name)) < 0)
349 continue; /* Not a number. */
350 if (fd != fd_dir_fd && fd >= start_fd &&
351 !_is_fd_in_sorted_fd_sequence(fd, fds_to_keep,
352 fds_to_keep_len)) {
353 close(fd);
354 }
355 }
356 }
357 close(fd_dir_fd);
358 }
359 }
360
361 #define _close_open_fds_fallback _close_open_fds_safe
362
363 #else /* NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */
364
365 static int
_unsafe_closer(int first,int last)366 _unsafe_closer(int first, int last)
367 {
368 _Py_closerange(first, last);
369 return 0;
370 }
371
372 /* Close all open file descriptors from start_fd and higher.
373 * Do not close any in the sorted fds_to_keep tuple.
374 *
375 * This function violates the strict use of async signal safe functions. :(
376 * It calls opendir(), readdir() and closedir(). Of these, the one most
377 * likely to ever cause a problem is opendir() as it performs an internal
378 * malloc(). Practically this should not be a problem. The Java VM makes the
379 * same calls between fork and exec in its own UNIXProcess_md.c implementation.
380 *
381 * readdir_r() is not used because it provides no benefit. It is typically
382 * implemented as readdir() followed by memcpy(). See also:
383 * http://womble.decadent.org.uk/readdir_r-advisory.html
384 */
385 static void
_close_open_fds_maybe_unsafe(int start_fd,int * fds_to_keep,Py_ssize_t fds_to_keep_len)386 _close_open_fds_maybe_unsafe(int start_fd, int *fds_to_keep,
387 Py_ssize_t fds_to_keep_len)
388 {
389 DIR *proc_fd_dir;
390 #ifndef HAVE_DIRFD
391 while (_is_fd_in_sorted_fd_sequence(start_fd, fds_to_keep,
392 fds_to_keep_len)) {
393 ++start_fd;
394 }
395 /* Close our lowest fd before we call opendir so that it is likely to
396 * reuse that fd otherwise we might close opendir's file descriptor in
397 * our loop. This trick assumes that fd's are allocated on a lowest
398 * available basis. */
399 close(start_fd);
400 ++start_fd;
401 #endif
402
403 #if defined(__FreeBSD__) || defined(__DragonFly__)
404 if (!_is_fdescfs_mounted_on_dev_fd())
405 proc_fd_dir = NULL;
406 else
407 #endif
408 proc_fd_dir = opendir(FD_DIR);
409 if (!proc_fd_dir) {
410 /* No way to get a list of open fds. */
411 _close_range_except(start_fd, -1, fds_to_keep, fds_to_keep_len,
412 _unsafe_closer);
413 } else {
414 struct dirent *dir_entry;
415 #ifdef HAVE_DIRFD
416 int fd_used_by_opendir = dirfd(proc_fd_dir);
417 #else
418 int fd_used_by_opendir = start_fd - 1;
419 #endif
420 errno = 0;
421 while ((dir_entry = readdir(proc_fd_dir))) {
422 int fd;
423 if ((fd = _pos_int_from_ascii(dir_entry->d_name)) < 0)
424 continue; /* Not a number. */
425 if (fd != fd_used_by_opendir && fd >= start_fd &&
426 !_is_fd_in_sorted_fd_sequence(fd, fds_to_keep,
427 fds_to_keep_len)) {
428 close(fd);
429 }
430 errno = 0;
431 }
432 if (errno) {
433 /* readdir error, revert behavior. Highly Unlikely. */
434 _close_range_except(start_fd, -1, fds_to_keep, fds_to_keep_len,
435 _unsafe_closer);
436 }
437 closedir(proc_fd_dir);
438 }
439 }
440
441 #define _close_open_fds_fallback _close_open_fds_maybe_unsafe
442
443 #endif /* else NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */
444
445 /* We can use close_range() library function only if it's known to be
446 * async-signal-safe.
447 *
448 * On Linux, glibc explicitly documents it to be a thin wrapper over
449 * the system call, and other C libraries are likely to follow glibc.
450 */
451 #if defined(HAVE_CLOSE_RANGE) && \
452 (defined(__linux__) || defined(__FreeBSD__))
453 #define HAVE_ASYNC_SAFE_CLOSE_RANGE
454
455 static int
_close_range_closer(int first,int last)456 _close_range_closer(int first, int last)
457 {
458 return close_range(first, last, 0);
459 }
460 #endif
461
462 static void
_close_open_fds(int start_fd,int * fds_to_keep,Py_ssize_t fds_to_keep_len)463 _close_open_fds(int start_fd, int *fds_to_keep, Py_ssize_t fds_to_keep_len)
464 {
465 #ifdef HAVE_ASYNC_SAFE_CLOSE_RANGE
466 if (_close_range_except(
467 start_fd, INT_MAX, fds_to_keep, fds_to_keep_len,
468 _close_range_closer) == 0) {
469 return;
470 }
471 #endif
472 _close_open_fds_fallback(start_fd, fds_to_keep, fds_to_keep_len);
473 }
474
475 #ifdef VFORK_USABLE
476 /* Reset dispositions for all signals to SIG_DFL except for ignored
477 * signals. This way we ensure that no signal handlers can run
478 * after we unblock signals in a child created by vfork().
479 */
480 static void
reset_signal_handlers(const sigset_t * child_sigmask)481 reset_signal_handlers(const sigset_t *child_sigmask)
482 {
483 struct sigaction sa_dfl = {.sa_handler = SIG_DFL};
484 for (int sig = 1; sig < _NSIG; sig++) {
485 /* Dispositions for SIGKILL and SIGSTOP can't be changed. */
486 if (sig == SIGKILL || sig == SIGSTOP) {
487 continue;
488 }
489
490 /* There is no need to reset the disposition of signals that will
491 * remain blocked across execve() since the kernel will do it. */
492 if (sigismember(child_sigmask, sig) == 1) {
493 continue;
494 }
495
496 struct sigaction sa;
497 /* C libraries usually return EINVAL for signals used
498 * internally (e.g. for thread cancellation), so simply
499 * skip errors here. */
500 if (sigaction(sig, NULL, &sa) == -1) {
501 continue;
502 }
503
504 /* void *h works as these fields are both pointer types already. */
505 void *h = (sa.sa_flags & SA_SIGINFO ? (void *)sa.sa_sigaction :
506 (void *)sa.sa_handler);
507 if (h == SIG_IGN || h == SIG_DFL) {
508 continue;
509 }
510
511 /* This call can't reasonably fail, but if it does, terminating
512 * the child seems to be too harsh, so ignore errors. */
513 (void) sigaction(sig, &sa_dfl, NULL);
514 }
515 }
516 #endif /* VFORK_USABLE */
517
518
519 /*
520 * This function is code executed in the child process immediately after
521 * (v)fork to set things up and call exec().
522 *
523 * All of the code in this function must only use async-signal-safe functions,
524 * listed at `man 7 signal` or
525 * http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html.
526 *
527 * This restriction is documented at
528 * http://www.opengroup.org/onlinepubs/009695399/functions/fork.html.
529 *
530 * If this function is called after vfork(), even more care must be taken.
531 * The lack of preparations that C libraries normally take on fork(),
532 * as well as sharing the address space with the parent, might make even
533 * async-signal-safe functions vfork-unsafe. In particular, on Linux,
534 * set*id() and setgroups() library functions must not be called, since
535 * they have to interact with the library-level thread list and send
536 * library-internal signals to implement per-process credentials semantics
537 * required by POSIX but not supported natively on Linux. Another reason to
538 * avoid this family of functions is that sharing an address space between
539 * processes running with different privileges is inherently insecure.
540 * See bpo-35823 for further discussion and references.
541 *
542 * In some C libraries, setrlimit() has the same thread list/signalling
543 * behavior since resource limits were per-thread attributes before
544 * Linux 2.6.10. Musl, as of 1.2.1, is known to have this issue
545 * (https://www.openwall.com/lists/musl/2020/10/15/6).
546 *
547 * If vfork-unsafe functionality is desired after vfork(), consider using
548 * syscall() to obtain it.
549 */
550 Py_NO_INLINE static void
child_exec(char * const exec_array[],char * const argv[],char * const envp[],const char * cwd,int p2cread,int p2cwrite,int c2pread,int c2pwrite,int errread,int errwrite,int errpipe_read,int errpipe_write,int close_fds,int restore_signals,int call_setsid,pid_t pgid_to_set,int call_setgid,gid_t gid,int call_setgroups,size_t groups_size,const gid_t * groups,int call_setuid,uid_t uid,int child_umask,const void * child_sigmask,int * fds_to_keep,Py_ssize_t fds_to_keep_len,PyObject * preexec_fn,PyObject * preexec_fn_args_tuple)551 child_exec(char *const exec_array[],
552 char *const argv[],
553 char *const envp[],
554 const char *cwd,
555 int p2cread, int p2cwrite,
556 int c2pread, int c2pwrite,
557 int errread, int errwrite,
558 int errpipe_read, int errpipe_write,
559 int close_fds, int restore_signals,
560 int call_setsid, pid_t pgid_to_set,
561 int call_setgid, gid_t gid,
562 int call_setgroups, size_t groups_size, const gid_t *groups,
563 int call_setuid, uid_t uid, int child_umask,
564 const void *child_sigmask,
565 int *fds_to_keep, Py_ssize_t fds_to_keep_len,
566 PyObject *preexec_fn,
567 PyObject *preexec_fn_args_tuple)
568 {
569 int i, saved_errno, reached_preexec = 0;
570 PyObject *result;
571 const char* err_msg = "";
572 /* Buffer large enough to hold a hex integer. We can't malloc. */
573 char hex_errno[sizeof(saved_errno)*2+1];
574
575 if (make_inheritable(fds_to_keep, fds_to_keep_len, errpipe_write) < 0)
576 goto error;
577
578 /* Close parent's pipe ends. */
579 if (p2cwrite != -1)
580 POSIX_CALL(close(p2cwrite));
581 if (c2pread != -1)
582 POSIX_CALL(close(c2pread));
583 if (errread != -1)
584 POSIX_CALL(close(errread));
585 POSIX_CALL(close(errpipe_read));
586
587 /* When duping fds, if there arises a situation where one of the fds is
588 either 0, 1 or 2, it is possible that it is overwritten (#12607). */
589 if (c2pwrite == 0) {
590 POSIX_CALL(c2pwrite = dup(c2pwrite));
591 /* issue32270 */
592 if (_Py_set_inheritable_async_safe(c2pwrite, 0, NULL) < 0) {
593 goto error;
594 }
595 }
596 while (errwrite == 0 || errwrite == 1) {
597 POSIX_CALL(errwrite = dup(errwrite));
598 /* issue32270 */
599 if (_Py_set_inheritable_async_safe(errwrite, 0, NULL) < 0) {
600 goto error;
601 }
602 }
603
604 /* Dup fds for child.
605 dup2() removes the CLOEXEC flag but we must do it ourselves if dup2()
606 would be a no-op (issue #10806). */
607 if (p2cread == 0) {
608 if (_Py_set_inheritable_async_safe(p2cread, 1, NULL) < 0)
609 goto error;
610 }
611 else if (p2cread != -1)
612 POSIX_CALL(dup2(p2cread, 0)); /* stdin */
613
614 if (c2pwrite == 1) {
615 if (_Py_set_inheritable_async_safe(c2pwrite, 1, NULL) < 0)
616 goto error;
617 }
618 else if (c2pwrite != -1)
619 POSIX_CALL(dup2(c2pwrite, 1)); /* stdout */
620
621 if (errwrite == 2) {
622 if (_Py_set_inheritable_async_safe(errwrite, 1, NULL) < 0)
623 goto error;
624 }
625 else if (errwrite != -1)
626 POSIX_CALL(dup2(errwrite, 2)); /* stderr */
627
628 /* We no longer manually close p2cread, c2pwrite, and errwrite here as
629 * _close_open_fds takes care when it is not already non-inheritable. */
630
631 if (cwd)
632 POSIX_CALL(chdir(cwd));
633
634 if (child_umask >= 0)
635 umask(child_umask); /* umask() always succeeds. */
636
637 if (restore_signals)
638 _Py_RestoreSignals();
639
640 #ifdef VFORK_USABLE
641 if (child_sigmask) {
642 reset_signal_handlers(child_sigmask);
643 if ((errno = pthread_sigmask(SIG_SETMASK, child_sigmask, NULL))) {
644 goto error;
645 }
646 }
647 #endif
648
649 #ifdef HAVE_SETSID
650 if (call_setsid)
651 POSIX_CALL(setsid());
652 #endif
653
654 #ifdef HAVE_SETPGID
655 if (pgid_to_set >= 0)
656 POSIX_CALL(setpgid(0, pgid_to_set));
657 #endif
658
659 #ifdef HAVE_SETGROUPS
660 if (call_setgroups)
661 POSIX_CALL(setgroups(groups_size, groups));
662 #endif /* HAVE_SETGROUPS */
663
664 #ifdef HAVE_SETREGID
665 if (call_setgid)
666 POSIX_CALL(setregid(gid, gid));
667 #endif /* HAVE_SETREGID */
668
669 #ifdef HAVE_SETREUID
670 if (call_setuid)
671 POSIX_CALL(setreuid(uid, uid));
672 #endif /* HAVE_SETREUID */
673
674
675 reached_preexec = 1;
676 if (preexec_fn != Py_None && preexec_fn_args_tuple) {
677 /* This is where the user has asked us to deadlock their program. */
678 result = PyObject_Call(preexec_fn, preexec_fn_args_tuple, NULL);
679 if (result == NULL) {
680 /* Stringifying the exception or traceback would involve
681 * memory allocation and thus potential for deadlock.
682 * We've already faced potential deadlock by calling back
683 * into Python in the first place, so it probably doesn't
684 * matter but we avoid it to minimize the possibility. */
685 err_msg = "Exception occurred in preexec_fn.";
686 errno = 0; /* We don't want to report an OSError. */
687 goto error;
688 }
689 /* Py_DECREF(result); - We're about to exec so why bother? */
690 }
691
692 /* close FDs after executing preexec_fn, which might open FDs */
693 if (close_fds) {
694 /* TODO HP-UX could use pstat_getproc() if anyone cares about it. */
695 _close_open_fds(3, fds_to_keep, fds_to_keep_len);
696 }
697
698 /* This loop matches the Lib/os.py _execvpe()'s PATH search when */
699 /* given the executable_list generated by Lib/subprocess.py. */
700 saved_errno = 0;
701 for (i = 0; exec_array[i] != NULL; ++i) {
702 const char *executable = exec_array[i];
703 if (envp) {
704 execve(executable, argv, envp);
705 } else {
706 execv(executable, argv);
707 }
708 if (errno != ENOENT && errno != ENOTDIR && saved_errno == 0) {
709 saved_errno = errno;
710 }
711 }
712 /* Report the first exec error, not the last. */
713 if (saved_errno)
714 errno = saved_errno;
715
716 error:
717 saved_errno = errno;
718 /* Report the posix error to our parent process. */
719 /* We ignore all write() return values as the total size of our writes is
720 less than PIPEBUF and we cannot do anything about an error anyways.
721 Use _Py_write_noraise() to retry write() if it is interrupted by a
722 signal (fails with EINTR). */
723 if (saved_errno) {
724 char *cur;
725 _Py_write_noraise(errpipe_write, "OSError:", 8);
726 cur = hex_errno + sizeof(hex_errno);
727 while (saved_errno != 0 && cur != hex_errno) {
728 *--cur = Py_hexdigits[saved_errno % 16];
729 saved_errno /= 16;
730 }
731 _Py_write_noraise(errpipe_write, cur, hex_errno + sizeof(hex_errno) - cur);
732 _Py_write_noraise(errpipe_write, ":", 1);
733 if (!reached_preexec) {
734 /* Indicate to the parent that the error happened before exec(). */
735 _Py_write_noraise(errpipe_write, "noexec", 6);
736 }
737 /* We can't call strerror(saved_errno). It is not async signal safe.
738 * The parent process will look the error message up. */
739 } else {
740 _Py_write_noraise(errpipe_write, "SubprocessError:0:", 18);
741 _Py_write_noraise(errpipe_write, err_msg, strlen(err_msg));
742 }
743 }
744
745
746 /* The main purpose of this wrapper function is to isolate vfork() from both
747 * subprocess_fork_exec() and child_exec(). A child process created via
748 * vfork() executes on the same stack as the parent process while the latter is
749 * suspended, so this function should not be inlined to avoid compiler bugs
750 * that might clobber data needed by the parent later. Additionally,
751 * child_exec() should not be inlined to avoid spurious -Wclobber warnings from
752 * GCC (see bpo-35823).
753 */
754 Py_NO_INLINE static pid_t
do_fork_exec(char * const exec_array[],char * const argv[],char * const envp[],const char * cwd,int p2cread,int p2cwrite,int c2pread,int c2pwrite,int errread,int errwrite,int errpipe_read,int errpipe_write,int close_fds,int restore_signals,int call_setsid,pid_t pgid_to_set,int call_setgid,gid_t gid,int call_setgroups,size_t groups_size,const gid_t * groups,int call_setuid,uid_t uid,int child_umask,const void * child_sigmask,int * fds_to_keep,Py_ssize_t fds_to_keep_len,PyObject * preexec_fn,PyObject * preexec_fn_args_tuple)755 do_fork_exec(char *const exec_array[],
756 char *const argv[],
757 char *const envp[],
758 const char *cwd,
759 int p2cread, int p2cwrite,
760 int c2pread, int c2pwrite,
761 int errread, int errwrite,
762 int errpipe_read, int errpipe_write,
763 int close_fds, int restore_signals,
764 int call_setsid, pid_t pgid_to_set,
765 int call_setgid, gid_t gid,
766 int call_setgroups, size_t groups_size, const gid_t *groups,
767 int call_setuid, uid_t uid, int child_umask,
768 const void *child_sigmask,
769 int *fds_to_keep, Py_ssize_t fds_to_keep_len,
770 PyObject *preexec_fn,
771 PyObject *preexec_fn_args_tuple)
772 {
773
774 pid_t pid;
775
776 #ifdef VFORK_USABLE
777 if (child_sigmask) {
778 /* These are checked by our caller; verify them in debug builds. */
779 assert(!call_setuid);
780 assert(!call_setgid);
781 assert(!call_setgroups);
782 assert(preexec_fn == Py_None);
783
784 pid = vfork();
785 if (pid == -1) {
786 /* If vfork() fails, fall back to using fork(). When it isn't
787 * allowed in a process by the kernel, vfork can return -1
788 * with errno EINVAL. https://bugs.python.org/issue47151. */
789 pid = fork();
790 }
791 } else
792 #endif
793 {
794 pid = fork();
795 }
796
797 if (pid != 0) {
798 return pid;
799 }
800
801 /* Child process.
802 * See the comment above child_exec() for restrictions imposed on
803 * the code below.
804 */
805
806 if (preexec_fn != Py_None) {
807 /* We'll be calling back into Python later so we need to do this.
808 * This call may not be async-signal-safe but neither is calling
809 * back into Python. The user asked us to use hope as a strategy
810 * to avoid deadlock... */
811 PyOS_AfterFork_Child();
812 }
813
814 child_exec(exec_array, argv, envp, cwd,
815 p2cread, p2cwrite, c2pread, c2pwrite,
816 errread, errwrite, errpipe_read, errpipe_write,
817 close_fds, restore_signals, call_setsid, pgid_to_set,
818 call_setgid, gid, call_setgroups, groups_size, groups,
819 call_setuid, uid, child_umask, child_sigmask,
820 fds_to_keep, fds_to_keep_len,
821 preexec_fn, preexec_fn_args_tuple);
822 _exit(255);
823 return 0; /* Dead code to avoid a potential compiler warning. */
824 }
825
826
827 static PyObject *
subprocess_fork_exec(PyObject * module,PyObject * args)828 subprocess_fork_exec(PyObject *module, PyObject *args)
829 {
830 PyObject *gc_module = NULL;
831 PyObject *executable_list, *py_fds_to_keep;
832 PyObject *env_list, *preexec_fn;
833 PyObject *process_args, *converted_args = NULL, *fast_args = NULL;
834 PyObject *preexec_fn_args_tuple = NULL;
835 PyObject *groups_list;
836 PyObject *uid_object, *gid_object;
837 int p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite;
838 int errpipe_read, errpipe_write, close_fds, restore_signals;
839 int call_setsid;
840 pid_t pgid_to_set = -1;
841 int call_setgid = 0, call_setgroups = 0, call_setuid = 0;
842 uid_t uid;
843 gid_t gid, *groups = NULL;
844 int child_umask;
845 PyObject *cwd_obj, *cwd_obj2 = NULL;
846 const char *cwd;
847 pid_t pid = -1;
848 int need_to_reenable_gc = 0;
849 char *const *exec_array, *const *argv = NULL, *const *envp = NULL;
850 Py_ssize_t arg_num, num_groups = 0;
851 int need_after_fork = 0;
852 int saved_errno = 0;
853 int allow_vfork;
854 int *c_fds_to_keep = NULL;
855
856 if (!PyArg_ParseTuple(
857 args, "OOpO!OOiiiiiiiiii" _Py_PARSE_PID "OOOiOp:fork_exec",
858 &process_args, &executable_list,
859 &close_fds, &PyTuple_Type, &py_fds_to_keep,
860 &cwd_obj, &env_list,
861 &p2cread, &p2cwrite, &c2pread, &c2pwrite,
862 &errread, &errwrite, &errpipe_read, &errpipe_write,
863 &restore_signals, &call_setsid, &pgid_to_set,
864 &gid_object, &groups_list, &uid_object, &child_umask,
865 &preexec_fn, &allow_vfork))
866 return NULL;
867
868 if ((preexec_fn != Py_None) &&
869 (PyInterpreterState_Get() != PyInterpreterState_Main())) {
870 PyErr_SetString(PyExc_RuntimeError,
871 "preexec_fn not supported within subinterpreters");
872 return NULL;
873 }
874
875 if (close_fds && errpipe_write < 3) { /* precondition */
876 PyErr_SetString(PyExc_ValueError, "errpipe_write must be >= 3");
877 return NULL;
878 }
879 if (_sanity_check_python_fd_sequence(py_fds_to_keep)) {
880 PyErr_SetString(PyExc_ValueError, "bad value(s) in fds_to_keep");
881 return NULL;
882 }
883
884 PyInterpreterState *interp = PyInterpreterState_Get();
885 const PyConfig *config = _PyInterpreterState_GetConfig(interp);
886 if (config->_isolated_interpreter) {
887 PyErr_SetString(PyExc_RuntimeError,
888 "subprocess not supported for isolated subinterpreters");
889 return NULL;
890 }
891
892 /* We need to call gc.disable() when we'll be calling preexec_fn */
893 if (preexec_fn != Py_None) {
894 need_to_reenable_gc = PyGC_Disable();
895 }
896
897 exec_array = _PySequence_BytesToCharpArray(executable_list);
898 if (!exec_array)
899 goto cleanup;
900
901 /* Convert args and env into appropriate arguments for exec() */
902 /* These conversions are done in the parent process to avoid allocating
903 or freeing memory in the child process. */
904 if (process_args != Py_None) {
905 Py_ssize_t num_args;
906 /* Equivalent to: */
907 /* tuple(PyUnicode_FSConverter(arg) for arg in process_args) */
908 fast_args = PySequence_Fast(process_args, "argv must be a tuple");
909 if (fast_args == NULL)
910 goto cleanup;
911 num_args = PySequence_Fast_GET_SIZE(fast_args);
912 converted_args = PyTuple_New(num_args);
913 if (converted_args == NULL)
914 goto cleanup;
915 for (arg_num = 0; arg_num < num_args; ++arg_num) {
916 PyObject *borrowed_arg, *converted_arg;
917 if (PySequence_Fast_GET_SIZE(fast_args) != num_args) {
918 PyErr_SetString(PyExc_RuntimeError, "args changed during iteration");
919 goto cleanup;
920 }
921 borrowed_arg = PySequence_Fast_GET_ITEM(fast_args, arg_num);
922 if (PyUnicode_FSConverter(borrowed_arg, &converted_arg) == 0)
923 goto cleanup;
924 PyTuple_SET_ITEM(converted_args, arg_num, converted_arg);
925 }
926
927 argv = _PySequence_BytesToCharpArray(converted_args);
928 Py_CLEAR(converted_args);
929 Py_CLEAR(fast_args);
930 if (!argv)
931 goto cleanup;
932 }
933
934 if (env_list != Py_None) {
935 envp = _PySequence_BytesToCharpArray(env_list);
936 if (!envp)
937 goto cleanup;
938 }
939
940 if (cwd_obj != Py_None) {
941 if (PyUnicode_FSConverter(cwd_obj, &cwd_obj2) == 0)
942 goto cleanup;
943 cwd = PyBytes_AsString(cwd_obj2);
944 } else {
945 cwd = NULL;
946 }
947
948 if (groups_list != Py_None) {
949 #ifdef HAVE_SETGROUPS
950 Py_ssize_t i;
951 gid_t gid;
952
953 if (!PyList_Check(groups_list)) {
954 PyErr_SetString(PyExc_TypeError,
955 "setgroups argument must be a list");
956 goto cleanup;
957 }
958 num_groups = PySequence_Size(groups_list);
959
960 if (num_groups < 0)
961 goto cleanup;
962
963 if (num_groups > MAX_GROUPS) {
964 PyErr_SetString(PyExc_ValueError, "too many groups");
965 goto cleanup;
966 }
967
968 if ((groups = PyMem_RawMalloc(num_groups * sizeof(gid_t))) == NULL) {
969 PyErr_SetString(PyExc_MemoryError,
970 "failed to allocate memory for group list");
971 goto cleanup;
972 }
973
974 for (i = 0; i < num_groups; i++) {
975 PyObject *elem;
976 elem = PySequence_GetItem(groups_list, i);
977 if (!elem)
978 goto cleanup;
979 if (!PyLong_Check(elem)) {
980 PyErr_SetString(PyExc_TypeError,
981 "groups must be integers");
982 Py_DECREF(elem);
983 goto cleanup;
984 } else {
985 if (!_Py_Gid_Converter(elem, &gid)) {
986 Py_DECREF(elem);
987 PyErr_SetString(PyExc_ValueError, "invalid group id");
988 goto cleanup;
989 }
990 groups[i] = gid;
991 }
992 Py_DECREF(elem);
993 }
994 call_setgroups = 1;
995
996 #else /* HAVE_SETGROUPS */
997 PyErr_BadInternalCall();
998 goto cleanup;
999 #endif /* HAVE_SETGROUPS */
1000 }
1001
1002 if (gid_object != Py_None) {
1003 #ifdef HAVE_SETREGID
1004 if (!_Py_Gid_Converter(gid_object, &gid))
1005 goto cleanup;
1006
1007 call_setgid = 1;
1008
1009 #else /* HAVE_SETREGID */
1010 PyErr_BadInternalCall();
1011 goto cleanup;
1012 #endif /* HAVE_SETREUID */
1013 }
1014
1015 if (uid_object != Py_None) {
1016 #ifdef HAVE_SETREUID
1017 if (!_Py_Uid_Converter(uid_object, &uid))
1018 goto cleanup;
1019
1020 call_setuid = 1;
1021
1022 #else /* HAVE_SETREUID */
1023 PyErr_BadInternalCall();
1024 goto cleanup;
1025 #endif /* HAVE_SETREUID */
1026 }
1027
1028 Py_ssize_t fds_to_keep_len = PyTuple_GET_SIZE(py_fds_to_keep);
1029 c_fds_to_keep = PyMem_Malloc(fds_to_keep_len * sizeof(int));
1030 if (c_fds_to_keep == NULL) {
1031 PyErr_SetString(PyExc_MemoryError, "failed to malloc c_fds_to_keep");
1032 goto cleanup;
1033 }
1034 if (convert_fds_to_keep_to_c(py_fds_to_keep, c_fds_to_keep) < 0) {
1035 goto cleanup;
1036 }
1037
1038 /* This must be the last thing done before fork() because we do not
1039 * want to call PyOS_BeforeFork() if there is any chance of another
1040 * error leading to the cleanup: code without calling fork(). */
1041 if (preexec_fn != Py_None) {
1042 preexec_fn_args_tuple = PyTuple_New(0);
1043 if (!preexec_fn_args_tuple)
1044 goto cleanup;
1045 PyOS_BeforeFork();
1046 need_after_fork = 1;
1047 }
1048
1049 /* NOTE: When old_sigmask is non-NULL, do_fork_exec() may use vfork(). */
1050 const void *old_sigmask = NULL;
1051 #ifdef VFORK_USABLE
1052 /* Use vfork() only if it's safe. See the comment above child_exec(). */
1053 sigset_t old_sigs;
1054 if (preexec_fn == Py_None && allow_vfork &&
1055 !call_setuid && !call_setgid && !call_setgroups) {
1056 /* Block all signals to ensure that no signal handlers are run in the
1057 * child process while it shares memory with us. Note that signals
1058 * used internally by C libraries won't be blocked by
1059 * pthread_sigmask(), but signal handlers installed by C libraries
1060 * normally service only signals originating from *within the process*,
1061 * so it should be sufficient to consider any library function that
1062 * might send such a signal to be vfork-unsafe and do not call it in
1063 * the child.
1064 */
1065 sigset_t all_sigs;
1066 sigfillset(&all_sigs);
1067 if ((saved_errno = pthread_sigmask(SIG_BLOCK, &all_sigs, &old_sigs))) {
1068 goto cleanup;
1069 }
1070 old_sigmask = &old_sigs;
1071 }
1072 #endif
1073
1074 pid = do_fork_exec(exec_array, argv, envp, cwd,
1075 p2cread, p2cwrite, c2pread, c2pwrite,
1076 errread, errwrite, errpipe_read, errpipe_write,
1077 close_fds, restore_signals, call_setsid, pgid_to_set,
1078 call_setgid, gid, call_setgroups, num_groups, groups,
1079 call_setuid, uid, child_umask, old_sigmask,
1080 c_fds_to_keep, fds_to_keep_len,
1081 preexec_fn, preexec_fn_args_tuple);
1082
1083 /* Parent (original) process */
1084 if (pid == -1) {
1085 /* Capture errno for the exception. */
1086 saved_errno = errno;
1087 }
1088
1089 #ifdef VFORK_USABLE
1090 if (old_sigmask) {
1091 /* vfork() semantics guarantees that the parent is blocked
1092 * until the child performs _exit() or execve(), so it is safe
1093 * to unblock signals once we're here.
1094 * Note that in environments where vfork() is implemented as fork(),
1095 * such as QEMU user-mode emulation, the parent won't be blocked,
1096 * but it won't share the address space with the child,
1097 * so it's still safe to unblock the signals.
1098 *
1099 * We don't handle errors here because this call can't fail
1100 * if valid arguments are given, and because there is no good
1101 * way for the caller to deal with a failure to restore
1102 * the thread signal mask. */
1103 (void) pthread_sigmask(SIG_SETMASK, old_sigmask, NULL);
1104 }
1105 #endif
1106
1107 if (need_after_fork)
1108 PyOS_AfterFork_Parent();
1109
1110 cleanup:
1111 if (c_fds_to_keep != NULL) {
1112 PyMem_Free(c_fds_to_keep);
1113 }
1114
1115 if (saved_errno != 0) {
1116 errno = saved_errno;
1117 /* We can't call this above as PyOS_AfterFork_Parent() calls back
1118 * into Python code which would see the unreturned error. */
1119 PyErr_SetFromErrno(PyExc_OSError);
1120 }
1121
1122 Py_XDECREF(preexec_fn_args_tuple);
1123 PyMem_RawFree(groups);
1124 Py_XDECREF(cwd_obj2);
1125 if (envp)
1126 _Py_FreeCharPArray(envp);
1127 Py_XDECREF(converted_args);
1128 Py_XDECREF(fast_args);
1129 if (argv)
1130 _Py_FreeCharPArray(argv);
1131 if (exec_array)
1132 _Py_FreeCharPArray(exec_array);
1133
1134 if (need_to_reenable_gc) {
1135 PyGC_Enable();
1136 }
1137 Py_XDECREF(gc_module);
1138
1139 return pid == -1 ? NULL : PyLong_FromPid(pid);
1140 }
1141
1142
1143 PyDoc_STRVAR(subprocess_fork_exec_doc,
1144 "fork_exec(args, executable_list, close_fds, pass_fds, cwd, env,\n\
1145 p2cread, p2cwrite, c2pread, c2pwrite,\n\
1146 errread, errwrite, errpipe_read, errpipe_write,\n\
1147 restore_signals, call_setsid, pgid_to_set,\n\
1148 gid, groups_list, uid,\n\
1149 preexec_fn)\n\
1150 \n\
1151 Forks a child process, closes parent file descriptors as appropriate in the\n\
1152 child and dups the few that are needed before calling exec() in the child\n\
1153 process.\n\
1154 \n\
1155 If close_fds is true, close file descriptors 3 and higher, except those listed\n\
1156 in the sorted tuple pass_fds.\n\
1157 \n\
1158 The preexec_fn, if supplied, will be called immediately before closing file\n\
1159 descriptors and exec.\n\
1160 WARNING: preexec_fn is NOT SAFE if your application uses threads.\n\
1161 It may trigger infrequent, difficult to debug deadlocks.\n\
1162 \n\
1163 If an error occurs in the child process before the exec, it is\n\
1164 serialized and written to the errpipe_write fd per subprocess.py.\n\
1165 \n\
1166 Returns: the child process's PID.\n\
1167 \n\
1168 Raises: Only on an error in the parent process.\n\
1169 ");
1170
1171 /* module level code ********************************************************/
1172
1173 PyDoc_STRVAR(module_doc,
1174 "A POSIX helper for the subprocess module.");
1175
1176 static PyMethodDef module_methods[] = {
1177 {"fork_exec", subprocess_fork_exec, METH_VARARGS, subprocess_fork_exec_doc},
1178 {NULL, NULL} /* sentinel */
1179 };
1180
1181 static PyModuleDef_Slot _posixsubprocess_slots[] = {
1182 {0, NULL}
1183 };
1184
1185 static struct PyModuleDef _posixsubprocessmodule = {
1186 PyModuleDef_HEAD_INIT,
1187 .m_name = "_posixsubprocess",
1188 .m_doc = module_doc,
1189 .m_size = 0,
1190 .m_methods = module_methods,
1191 .m_slots = _posixsubprocess_slots,
1192 };
1193
1194 PyMODINIT_FUNC
PyInit__posixsubprocess(void)1195 PyInit__posixsubprocess(void)
1196 {
1197 return PyModuleDef_Init(&_posixsubprocessmodule);
1198 }
1199