1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5//go:build unix 6 7// Fork, exec, wait, etc. 8 9package syscall 10 11import ( 12 errorspkg "errors" 13 "internal/bytealg" 14 "runtime" 15 "sync" 16 "unsafe" 17) 18 19// ForkLock is used to synchronize creation of new file descriptors 20// with fork. 21// 22// We want the child in a fork/exec sequence to inherit only the 23// file descriptors we intend. To do that, we mark all file 24// descriptors close-on-exec and then, in the child, explicitly 25// unmark the ones we want the exec'ed program to keep. 26// Unix doesn't make this easy: there is, in general, no way to 27// allocate a new file descriptor close-on-exec. Instead you 28// have to allocate the descriptor and then mark it close-on-exec. 29// If a fork happens between those two events, the child's exec 30// will inherit an unwanted file descriptor. 31// 32// This lock solves that race: the create new fd/mark close-on-exec 33// operation is done holding ForkLock for reading, and the fork itself 34// is done holding ForkLock for writing. At least, that's the idea. 35// There are some complications. 36// 37// Some system calls that create new file descriptors can block 38// for arbitrarily long times: open on a hung NFS server or named 39// pipe, accept on a socket, and so on. We can't reasonably grab 40// the lock across those operations. 41// 42// It is worse to inherit some file descriptors than others. 43// If a non-malicious child accidentally inherits an open ordinary file, 44// that's not a big deal. On the other hand, if a long-lived child 45// accidentally inherits the write end of a pipe, then the reader 46// of that pipe will not see EOF until that child exits, potentially 47// causing the parent program to hang. This is a common problem 48// in threaded C programs that use popen. 49// 50// Luckily, the file descriptors that are most important not to 51// inherit are not the ones that can take an arbitrarily long time 52// to create: pipe returns instantly, and the net package uses 53// non-blocking I/O to accept on a listening socket. 54// The rules for which file descriptor-creating operations use the 55// ForkLock are as follows: 56// 57// - [Pipe]. Use pipe2 if available. Otherwise, does not block, 58// so use ForkLock. 59// - [Socket]. Use SOCK_CLOEXEC if available. Otherwise, does not 60// block, so use ForkLock. 61// - [Open]. Use [O_CLOEXEC] if available. Otherwise, may block, 62// so live with the race. 63// - [Dup]. Use [F_DUPFD_CLOEXEC] or dup3 if available. Otherwise, 64// does not block, so use ForkLock. 65var ForkLock sync.RWMutex 66 67// StringSlicePtr converts a slice of strings to a slice of pointers 68// to NUL-terminated byte arrays. If any string contains a NUL byte 69// this function panics instead of returning an error. 70// 71// Deprecated: Use [SlicePtrFromStrings] instead. 72func StringSlicePtr(ss []string) []*byte { 73 bb := make([]*byte, len(ss)+1) 74 for i := 0; i < len(ss); i++ { 75 bb[i] = StringBytePtr(ss[i]) 76 } 77 bb[len(ss)] = nil 78 return bb 79} 80 81// SlicePtrFromStrings converts a slice of strings to a slice of 82// pointers to NUL-terminated byte arrays. If any string contains 83// a NUL byte, it returns (nil, [EINVAL]). 84func SlicePtrFromStrings(ss []string) ([]*byte, error) { 85 n := 0 86 for _, s := range ss { 87 if bytealg.IndexByteString(s, 0) != -1 { 88 return nil, EINVAL 89 } 90 n += len(s) + 1 // +1 for NUL 91 } 92 bb := make([]*byte, len(ss)+1) 93 b := make([]byte, n) 94 n = 0 95 for i, s := range ss { 96 bb[i] = &b[n] 97 copy(b[n:], s) 98 n += len(s) + 1 99 } 100 return bb, nil 101} 102 103func CloseOnExec(fd int) { fcntl(fd, F_SETFD, FD_CLOEXEC) } 104 105func SetNonblock(fd int, nonblocking bool) (err error) { 106 flag, err := fcntl(fd, F_GETFL, 0) 107 if err != nil { 108 return err 109 } 110 if (flag&O_NONBLOCK != 0) == nonblocking { 111 return nil 112 } 113 if nonblocking { 114 flag |= O_NONBLOCK 115 } else { 116 flag &^= O_NONBLOCK 117 } 118 _, err = fcntl(fd, F_SETFL, flag) 119 return err 120} 121 122// Credential holds user and group identities to be assumed 123// by a child process started by [StartProcess]. 124type Credential struct { 125 Uid uint32 // User ID. 126 Gid uint32 // Group ID. 127 Groups []uint32 // Supplementary group IDs. 128 NoSetGroups bool // If true, don't set supplementary groups 129} 130 131// ProcAttr holds attributes that will be applied to a new process started 132// by [StartProcess]. 133type ProcAttr struct { 134 Dir string // Current working directory. 135 Env []string // Environment. 136 Files []uintptr // File descriptors. 137 Sys *SysProcAttr 138} 139 140var zeroProcAttr ProcAttr 141var zeroSysProcAttr SysProcAttr 142 143func forkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) { 144 var p [2]int 145 var n int 146 var err1 Errno 147 var wstatus WaitStatus 148 149 if attr == nil { 150 attr = &zeroProcAttr 151 } 152 sys := attr.Sys 153 if sys == nil { 154 sys = &zeroSysProcAttr 155 } 156 157 // Convert args to C form. 158 argv0p, err := BytePtrFromString(argv0) 159 if err != nil { 160 return 0, err 161 } 162 argvp, err := SlicePtrFromStrings(argv) 163 if err != nil { 164 return 0, err 165 } 166 envvp, err := SlicePtrFromStrings(attr.Env) 167 if err != nil { 168 return 0, err 169 } 170 171 if (runtime.GOOS == "freebsd" || runtime.GOOS == "dragonfly") && len(argv) > 0 && len(argv[0]) > len(argv0) { 172 argvp[0] = argv0p 173 } 174 175 var chroot *byte 176 if sys.Chroot != "" { 177 chroot, err = BytePtrFromString(sys.Chroot) 178 if err != nil { 179 return 0, err 180 } 181 } 182 var dir *byte 183 if attr.Dir != "" { 184 dir, err = BytePtrFromString(attr.Dir) 185 if err != nil { 186 return 0, err 187 } 188 } 189 190 // Both Setctty and Foreground use the Ctty field, 191 // but they give it slightly different meanings. 192 if sys.Setctty && sys.Foreground { 193 return 0, errorspkg.New("both Setctty and Foreground set in SysProcAttr") 194 } 195 if sys.Setctty && sys.Ctty >= len(attr.Files) { 196 return 0, errorspkg.New("Setctty set but Ctty not valid in child") 197 } 198 199 acquireForkLock() 200 201 // Allocate child status pipe close on exec. 202 if err = forkExecPipe(p[:]); err != nil { 203 releaseForkLock() 204 return 0, err 205 } 206 207 // Kick off child. 208 pid, err1 = forkAndExecInChild(argv0p, argvp, envvp, chroot, dir, attr, sys, p[1]) 209 if err1 != 0 { 210 Close(p[0]) 211 Close(p[1]) 212 releaseForkLock() 213 return 0, Errno(err1) 214 } 215 releaseForkLock() 216 217 // Read child error status from pipe. 218 Close(p[1]) 219 for { 220 n, err = readlen(p[0], (*byte)(unsafe.Pointer(&err1)), int(unsafe.Sizeof(err1))) 221 if err != EINTR { 222 break 223 } 224 } 225 Close(p[0]) 226 if err != nil || n != 0 { 227 if n == int(unsafe.Sizeof(err1)) { 228 err = Errno(err1) 229 } 230 if err == nil { 231 err = EPIPE 232 } 233 234 // Child failed; wait for it to exit, to make sure 235 // the zombies don't accumulate. 236 _, err1 := Wait4(pid, &wstatus, 0, nil) 237 for err1 == EINTR { 238 _, err1 = Wait4(pid, &wstatus, 0, nil) 239 } 240 241 // OS-specific cleanup on failure. 242 forkAndExecFailureCleanup(attr, sys) 243 244 return 0, err 245 } 246 247 // Read got EOF, so pipe closed on exec, so exec succeeded. 248 return pid, nil 249} 250 251// Combination of fork and exec, careful to be thread safe. 252func ForkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) { 253 return forkExec(argv0, argv, attr) 254} 255 256// StartProcess wraps [ForkExec] for package os. 257func StartProcess(argv0 string, argv []string, attr *ProcAttr) (pid int, handle uintptr, err error) { 258 pid, err = forkExec(argv0, argv, attr) 259 return pid, 0, err 260} 261 262// Implemented in runtime package. 263func runtime_BeforeExec() 264func runtime_AfterExec() 265 266// execveLibc is non-nil on OS using libc syscall, set to execve in exec_libc.go; this 267// avoids a build dependency for other platforms. 268var execveLibc func(path uintptr, argv uintptr, envp uintptr) Errno 269var execveDarwin func(path *byte, argv **byte, envp **byte) error 270var execveOpenBSD func(path *byte, argv **byte, envp **byte) error 271 272// Exec invokes the execve(2) system call. 273func Exec(argv0 string, argv []string, envv []string) (err error) { 274 argv0p, err := BytePtrFromString(argv0) 275 if err != nil { 276 return err 277 } 278 argvp, err := SlicePtrFromStrings(argv) 279 if err != nil { 280 return err 281 } 282 envvp, err := SlicePtrFromStrings(envv) 283 if err != nil { 284 return err 285 } 286 runtime_BeforeExec() 287 288 rlim := origRlimitNofile.Load() 289 if rlim != nil { 290 Setrlimit(RLIMIT_NOFILE, rlim) 291 } 292 293 var err1 error 294 if runtime.GOOS == "solaris" || runtime.GOOS == "illumos" || runtime.GOOS == "aix" { 295 // RawSyscall should never be used on Solaris, illumos, or AIX. 296 err1 = execveLibc( 297 uintptr(unsafe.Pointer(argv0p)), 298 uintptr(unsafe.Pointer(&argvp[0])), 299 uintptr(unsafe.Pointer(&envvp[0]))) 300 } else if runtime.GOOS == "darwin" || runtime.GOOS == "ios" { 301 // Similarly on Darwin. 302 err1 = execveDarwin(argv0p, &argvp[0], &envvp[0]) 303 } else if runtime.GOOS == "openbsd" && runtime.GOARCH != "mips64" { 304 // Similarly on OpenBSD. 305 err1 = execveOpenBSD(argv0p, &argvp[0], &envvp[0]) 306 } else { 307 _, _, err1 = RawSyscall(SYS_EXECVE, 308 uintptr(unsafe.Pointer(argv0p)), 309 uintptr(unsafe.Pointer(&argvp[0])), 310 uintptr(unsafe.Pointer(&envvp[0]))) 311 } 312 runtime_AfterExec() 313 return err1 314} 315