1// Copyright 2023 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Support for pidfd was added during the course of a few Linux releases:
6//  v5.1: pidfd_send_signal syscall;
7//  v5.2: CLONE_PIDFD flag for clone syscall;
8//  v5.3: pidfd_open syscall, clone3 syscall;
9//  v5.4: P_PIDFD idtype support for waitid syscall;
10//  v5.6: pidfd_getfd syscall.
11
12package os
13
14import (
15	"errors"
16	"internal/syscall/unix"
17	"sync"
18	"syscall"
19	"unsafe"
20)
21
22// ensurePidfd initializes the PidFD field in sysAttr if it is not already set.
23// It returns the original or modified SysProcAttr struct and a flag indicating
24// whether the PidFD should be duplicated before using.
25func ensurePidfd(sysAttr *syscall.SysProcAttr) (*syscall.SysProcAttr, bool) {
26	if !pidfdWorks() {
27		return sysAttr, false
28	}
29
30	var pidfd int
31
32	if sysAttr == nil {
33		return &syscall.SysProcAttr{
34			PidFD: &pidfd,
35		}, false
36	}
37	if sysAttr.PidFD == nil {
38		newSys := *sysAttr // copy
39		newSys.PidFD = &pidfd
40		return &newSys, false
41	}
42
43	return sysAttr, true
44}
45
46// getPidfd returns the value of sysAttr.PidFD (or its duplicate if needDup is
47// set) and a flag indicating whether the value can be used.
48func getPidfd(sysAttr *syscall.SysProcAttr, needDup bool) (uintptr, bool) {
49	if !pidfdWorks() {
50		return 0, false
51	}
52
53	h := *sysAttr.PidFD
54	if needDup {
55		dupH, e := unix.Fcntl(h, syscall.F_DUPFD_CLOEXEC, 0)
56		if e != nil {
57			return 0, false
58		}
59		h = dupH
60	}
61	return uintptr(h), true
62}
63
64func pidfdFind(pid int) (uintptr, error) {
65	if !pidfdWorks() {
66		return 0, syscall.ENOSYS
67	}
68
69	h, err := unix.PidFDOpen(pid, 0)
70	if err != nil {
71		return 0, convertESRCH(err)
72	}
73	return h, nil
74}
75
76// _P_PIDFD is used as idtype argument to waitid syscall.
77const _P_PIDFD = 3
78
79func (p *Process) pidfdWait() (*ProcessState, error) {
80	// When pidfd is used, there is no wait/kill race (described in CL 23967)
81	// because the PID recycle issue doesn't exist (IOW, pidfd, unlike PID,
82	// is guaranteed to refer to one particular process). Thus, there is no
83	// need for the workaround (blockUntilWaitable + sigMu) from pidWait.
84	//
85	// We _do_ need to be careful about reuse of the pidfd FD number when
86	// closing the pidfd. See handle for more details.
87	handle, status := p.handleTransientAcquire()
88	switch status {
89	case statusDone:
90		// Process already completed Wait, or was not found by
91		// pidfdFind. Return ECHILD for consistency with what the wait
92		// syscall would return.
93		return nil, NewSyscallError("wait", syscall.ECHILD)
94	case statusReleased:
95		return nil, syscall.EINVAL
96	}
97	defer p.handleTransientRelease()
98
99	var (
100		info   unix.SiginfoChild
101		rusage syscall.Rusage
102		e      syscall.Errno
103	)
104	for {
105		_, _, e = syscall.Syscall6(syscall.SYS_WAITID, _P_PIDFD, handle, uintptr(unsafe.Pointer(&info)), syscall.WEXITED, uintptr(unsafe.Pointer(&rusage)), 0)
106		if e != syscall.EINTR {
107			break
108		}
109	}
110	if e != 0 {
111		return nil, NewSyscallError("waitid", e)
112	}
113	// Release the Process' handle reference, in addition to the reference
114	// we took above.
115	p.handlePersistentRelease(statusDone)
116	return &ProcessState{
117		pid:    int(info.Pid),
118		status: info.WaitStatus(),
119		rusage: &rusage,
120	}, nil
121}
122
123func (p *Process) pidfdSendSignal(s syscall.Signal) error {
124	handle, status := p.handleTransientAcquire()
125	switch status {
126	case statusDone:
127		return ErrProcessDone
128	case statusReleased:
129		return errors.New("os: process already released")
130	}
131	defer p.handleTransientRelease()
132
133	return convertESRCH(unix.PidFDSendSignal(handle, s))
134}
135
136func pidfdWorks() bool {
137	return checkPidfdOnce() == nil
138}
139
140var checkPidfdOnce = sync.OnceValue(checkPidfd)
141
142// checkPidfd checks whether all required pidfd-related syscalls work.
143// This consists of pidfd_open and pidfd_send_signal syscalls, and waitid
144// syscall with idtype of P_PIDFD.
145//
146// Reasons for non-working pidfd syscalls include an older kernel and an
147// execution environment in which the above system calls are restricted by
148// seccomp or a similar technology.
149func checkPidfd() error {
150	// Get a pidfd of the current process (opening of "/proc/self" won't
151	// work for waitid).
152	fd, err := unix.PidFDOpen(syscall.Getpid(), 0)
153	if err != nil {
154		return NewSyscallError("pidfd_open", err)
155	}
156	defer syscall.Close(int(fd))
157
158	// Check waitid(P_PIDFD) works.
159	for {
160		_, _, err = syscall.Syscall6(syscall.SYS_WAITID, _P_PIDFD, fd, 0, syscall.WEXITED, 0, 0)
161		if err != syscall.EINTR {
162			break
163		}
164	}
165	// Expect ECHILD from waitid since we're not our own parent.
166	if err != syscall.ECHILD {
167		return NewSyscallError("pidfd_wait", err)
168	}
169
170	// Check pidfd_send_signal works (should be able to send 0 to itself).
171	if err := unix.PidFDSendSignal(fd, 0); err != nil {
172		return NewSyscallError("pidfd_send_signal", err)
173	}
174
175	return nil
176}
177