1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build unix
6
7// Fork, exec, wait, etc.
8
9package syscall
10
11import (
12	errorspkg "errors"
13	"internal/bytealg"
14	"runtime"
15	"sync"
16	"unsafe"
17)
18
19// ForkLock is used to synchronize creation of new file descriptors
20// with fork.
21//
22// We want the child in a fork/exec sequence to inherit only the
23// file descriptors we intend. To do that, we mark all file
24// descriptors close-on-exec and then, in the child, explicitly
25// unmark the ones we want the exec'ed program to keep.
26// Unix doesn't make this easy: there is, in general, no way to
27// allocate a new file descriptor close-on-exec. Instead you
28// have to allocate the descriptor and then mark it close-on-exec.
29// If a fork happens between those two events, the child's exec
30// will inherit an unwanted file descriptor.
31//
32// This lock solves that race: the create new fd/mark close-on-exec
33// operation is done holding ForkLock for reading, and the fork itself
34// is done holding ForkLock for writing. At least, that's the idea.
35// There are some complications.
36//
37// Some system calls that create new file descriptors can block
38// for arbitrarily long times: open on a hung NFS server or named
39// pipe, accept on a socket, and so on. We can't reasonably grab
40// the lock across those operations.
41//
42// It is worse to inherit some file descriptors than others.
43// If a non-malicious child accidentally inherits an open ordinary file,
44// that's not a big deal. On the other hand, if a long-lived child
45// accidentally inherits the write end of a pipe, then the reader
46// of that pipe will not see EOF until that child exits, potentially
47// causing the parent program to hang. This is a common problem
48// in threaded C programs that use popen.
49//
50// Luckily, the file descriptors that are most important not to
51// inherit are not the ones that can take an arbitrarily long time
52// to create: pipe returns instantly, and the net package uses
53// non-blocking I/O to accept on a listening socket.
54// The rules for which file descriptor-creating operations use the
55// ForkLock are as follows:
56//
57//   - [Pipe]. Use pipe2 if available. Otherwise, does not block,
58//     so use ForkLock.
59//   - [Socket]. Use SOCK_CLOEXEC if available. Otherwise, does not
60//     block, so use ForkLock.
61//   - [Open]. Use [O_CLOEXEC] if available. Otherwise, may block,
62//     so live with the race.
63//   - [Dup]. Use [F_DUPFD_CLOEXEC] or dup3 if available. Otherwise,
64//     does not block, so use ForkLock.
65var ForkLock sync.RWMutex
66
67// StringSlicePtr converts a slice of strings to a slice of pointers
68// to NUL-terminated byte arrays. If any string contains a NUL byte
69// this function panics instead of returning an error.
70//
71// Deprecated: Use [SlicePtrFromStrings] instead.
72func StringSlicePtr(ss []string) []*byte {
73	bb := make([]*byte, len(ss)+1)
74	for i := 0; i < len(ss); i++ {
75		bb[i] = StringBytePtr(ss[i])
76	}
77	bb[len(ss)] = nil
78	return bb
79}
80
81// SlicePtrFromStrings converts a slice of strings to a slice of
82// pointers to NUL-terminated byte arrays. If any string contains
83// a NUL byte, it returns (nil, [EINVAL]).
84func SlicePtrFromStrings(ss []string) ([]*byte, error) {
85	n := 0
86	for _, s := range ss {
87		if bytealg.IndexByteString(s, 0) != -1 {
88			return nil, EINVAL
89		}
90		n += len(s) + 1 // +1 for NUL
91	}
92	bb := make([]*byte, len(ss)+1)
93	b := make([]byte, n)
94	n = 0
95	for i, s := range ss {
96		bb[i] = &b[n]
97		copy(b[n:], s)
98		n += len(s) + 1
99	}
100	return bb, nil
101}
102
103func CloseOnExec(fd int) { fcntl(fd, F_SETFD, FD_CLOEXEC) }
104
105func SetNonblock(fd int, nonblocking bool) (err error) {
106	flag, err := fcntl(fd, F_GETFL, 0)
107	if err != nil {
108		return err
109	}
110	if (flag&O_NONBLOCK != 0) == nonblocking {
111		return nil
112	}
113	if nonblocking {
114		flag |= O_NONBLOCK
115	} else {
116		flag &^= O_NONBLOCK
117	}
118	_, err = fcntl(fd, F_SETFL, flag)
119	return err
120}
121
122// Credential holds user and group identities to be assumed
123// by a child process started by [StartProcess].
124type Credential struct {
125	Uid         uint32   // User ID.
126	Gid         uint32   // Group ID.
127	Groups      []uint32 // Supplementary group IDs.
128	NoSetGroups bool     // If true, don't set supplementary groups
129}
130
131// ProcAttr holds attributes that will be applied to a new process started
132// by [StartProcess].
133type ProcAttr struct {
134	Dir   string    // Current working directory.
135	Env   []string  // Environment.
136	Files []uintptr // File descriptors.
137	Sys   *SysProcAttr
138}
139
140var zeroProcAttr ProcAttr
141var zeroSysProcAttr SysProcAttr
142
143func forkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
144	var p [2]int
145	var n int
146	var err1 Errno
147	var wstatus WaitStatus
148
149	if attr == nil {
150		attr = &zeroProcAttr
151	}
152	sys := attr.Sys
153	if sys == nil {
154		sys = &zeroSysProcAttr
155	}
156
157	// Convert args to C form.
158	argv0p, err := BytePtrFromString(argv0)
159	if err != nil {
160		return 0, err
161	}
162	argvp, err := SlicePtrFromStrings(argv)
163	if err != nil {
164		return 0, err
165	}
166	envvp, err := SlicePtrFromStrings(attr.Env)
167	if err != nil {
168		return 0, err
169	}
170
171	if (runtime.GOOS == "freebsd" || runtime.GOOS == "dragonfly") && len(argv) > 0 && len(argv[0]) > len(argv0) {
172		argvp[0] = argv0p
173	}
174
175	var chroot *byte
176	if sys.Chroot != "" {
177		chroot, err = BytePtrFromString(sys.Chroot)
178		if err != nil {
179			return 0, err
180		}
181	}
182	var dir *byte
183	if attr.Dir != "" {
184		dir, err = BytePtrFromString(attr.Dir)
185		if err != nil {
186			return 0, err
187		}
188	}
189
190	// Both Setctty and Foreground use the Ctty field,
191	// but they give it slightly different meanings.
192	if sys.Setctty && sys.Foreground {
193		return 0, errorspkg.New("both Setctty and Foreground set in SysProcAttr")
194	}
195	if sys.Setctty && sys.Ctty >= len(attr.Files) {
196		return 0, errorspkg.New("Setctty set but Ctty not valid in child")
197	}
198
199	acquireForkLock()
200
201	// Allocate child status pipe close on exec.
202	if err = forkExecPipe(p[:]); err != nil {
203		releaseForkLock()
204		return 0, err
205	}
206
207	// Kick off child.
208	pid, err1 = forkAndExecInChild(argv0p, argvp, envvp, chroot, dir, attr, sys, p[1])
209	if err1 != 0 {
210		Close(p[0])
211		Close(p[1])
212		releaseForkLock()
213		return 0, Errno(err1)
214	}
215	releaseForkLock()
216
217	// Read child error status from pipe.
218	Close(p[1])
219	for {
220		n, err = readlen(p[0], (*byte)(unsafe.Pointer(&err1)), int(unsafe.Sizeof(err1)))
221		if err != EINTR {
222			break
223		}
224	}
225	Close(p[0])
226	if err != nil || n != 0 {
227		if n == int(unsafe.Sizeof(err1)) {
228			err = Errno(err1)
229		}
230		if err == nil {
231			err = EPIPE
232		}
233
234		// Child failed; wait for it to exit, to make sure
235		// the zombies don't accumulate.
236		_, err1 := Wait4(pid, &wstatus, 0, nil)
237		for err1 == EINTR {
238			_, err1 = Wait4(pid, &wstatus, 0, nil)
239		}
240
241		// OS-specific cleanup on failure.
242		forkAndExecFailureCleanup(attr, sys)
243
244		return 0, err
245	}
246
247	// Read got EOF, so pipe closed on exec, so exec succeeded.
248	return pid, nil
249}
250
251// Combination of fork and exec, careful to be thread safe.
252func ForkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
253	return forkExec(argv0, argv, attr)
254}
255
256// StartProcess wraps [ForkExec] for package os.
257func StartProcess(argv0 string, argv []string, attr *ProcAttr) (pid int, handle uintptr, err error) {
258	pid, err = forkExec(argv0, argv, attr)
259	return pid, 0, err
260}
261
262// Implemented in runtime package.
263func runtime_BeforeExec()
264func runtime_AfterExec()
265
266// execveLibc is non-nil on OS using libc syscall, set to execve in exec_libc.go; this
267// avoids a build dependency for other platforms.
268var execveLibc func(path uintptr, argv uintptr, envp uintptr) Errno
269var execveDarwin func(path *byte, argv **byte, envp **byte) error
270var execveOpenBSD func(path *byte, argv **byte, envp **byte) error
271
272// Exec invokes the execve(2) system call.
273func Exec(argv0 string, argv []string, envv []string) (err error) {
274	argv0p, err := BytePtrFromString(argv0)
275	if err != nil {
276		return err
277	}
278	argvp, err := SlicePtrFromStrings(argv)
279	if err != nil {
280		return err
281	}
282	envvp, err := SlicePtrFromStrings(envv)
283	if err != nil {
284		return err
285	}
286	runtime_BeforeExec()
287
288	rlim := origRlimitNofile.Load()
289	if rlim != nil {
290		Setrlimit(RLIMIT_NOFILE, rlim)
291	}
292
293	var err1 error
294	if runtime.GOOS == "solaris" || runtime.GOOS == "illumos" || runtime.GOOS == "aix" {
295		// RawSyscall should never be used on Solaris, illumos, or AIX.
296		err1 = execveLibc(
297			uintptr(unsafe.Pointer(argv0p)),
298			uintptr(unsafe.Pointer(&argvp[0])),
299			uintptr(unsafe.Pointer(&envvp[0])))
300	} else if runtime.GOOS == "darwin" || runtime.GOOS == "ios" {
301		// Similarly on Darwin.
302		err1 = execveDarwin(argv0p, &argvp[0], &envvp[0])
303	} else if runtime.GOOS == "openbsd" && runtime.GOARCH != "mips64" {
304		// Similarly on OpenBSD.
305		err1 = execveOpenBSD(argv0p, &argvp[0], &envvp[0])
306	} else {
307		_, _, err1 = RawSyscall(SYS_EXECVE,
308			uintptr(unsafe.Pointer(argv0p)),
309			uintptr(unsafe.Pointer(&argvp[0])),
310			uintptr(unsafe.Pointer(&envvp[0])))
311	}
312	runtime_AfterExec()
313	return err1
314}
315