1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <linux/unistd.h>
18 #include <sched.h>
19 #include <semaphore.h>
20
21 #include <cstring> // strerror
22
23 #include "berberis/base/checks.h"
24 #include "berberis/base/tracing.h"
25 #include "berberis/guest_os_primitives/guest_signal.h"
26 #include "berberis/guest_os_primitives/guest_thread.h"
27 #include "berberis/guest_os_primitives/guest_thread_manager.h" // ResetCurrentGuestThreadAfterFork
28 #include "berberis/guest_os_primitives/scoped_pending_signals.h"
29 #include "berberis/guest_state/guest_addr.h"
30 #include "berberis/guest_state/guest_state_opaque.h"
31 #include "berberis/runtime/execute_guest.h"
32 #include "berberis/runtime_primitives/runtime_library.h"
33
34 #include "guest_signal_action.h"
35 #include "guest_thread_manager_impl.h"
36 #include "scoped_signal_blocker.h"
37
38 namespace berberis {
39
40 namespace {
41
CloneSyscall(long flags,long child_stack,long parent_tid,long new_tls,long child_tid)42 long CloneSyscall(long flags, long child_stack, long parent_tid, long new_tls, long child_tid) {
43 #if defined(__x86_64__) // sys_clone's last two arguments are flipped on x86-64.
44 return syscall(__NR_clone, flags, child_stack, parent_tid, child_tid, new_tls);
45 #else
46 return syscall(__NR_clone, flags, child_stack, parent_tid, new_tls, child_tid);
47 #endif
48 }
49
50 struct GuestThreadCloneInfo {
51 GuestThread* thread;
52 HostSigset mask;
53 sem_t sem;
54 };
55
SemPostOrDie(sem_t * sem)56 void SemPostOrDie(sem_t* sem) {
57 int error = sem_post(sem);
58 // sem_post works in two stages: it increments semaphore's value, and then calls FUTEX_WAKE.
59 // If FUTEX_WAIT sporadically returns inside sem_wait between sem_post stages then sem_wait
60 // may observe the updated value and successfully finish. If semaphore is destroyed upon
61 // sem_wait return (like in CloneGuestThread), sem_post's call to FUTEX_WAKE will fail with
62 // EINVAL.
63 // Note that sem_destroy itself may do nothing (bionic and glibc are like that), the actual
64 // destruction happens because we free up memory (e.g. stack frame) where sem_t is stored.
65 // More details at https://sourceware.org/bugzilla/show_bug.cgi?id=12674
66 #if defined(__GLIBC__) && ((__GLIBC__ < 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ < 21)))
67 // GLibc before 2.21 may return EINVAL in the above situation. We ignore it since we cannot do
68 // anything about it, and it doesn't really break anything: we just acknowledge the fact that the
69 // semaphore can be destoyed already.
70 LOG_ALWAYS_FATAL_IF(error != 0 && error != EINVAL, "sem_post returned error=%s", strerror(errno));
71 #else
72 // Bionic and recent GLibc ignore the error code returned
73 // from FUTEX_WAKE. So, they never return EINVAL.
74 LOG_ALWAYS_FATAL_IF(error != 0, "sem_post returned error=%s", strerror(errno));
75 #endif
76 }
77
RunClonedGuestThread(void * arg)78 int RunClonedGuestThread(void* arg) {
79 GuestThreadCloneInfo* info = static_cast<GuestThreadCloneInfo*>(arg);
80 GuestThread* thread = info->thread;
81
82 // Cannot use host pthread_key!
83 // TODO(b/280551726): Clear guest thread in exit syscall.
84 InsertCurrentThread(thread, false);
85
86 // ExecuteGuest requires pending signals enabled.
87 ScopedPendingSignalsEnabler scoped_pending_signals_enabler(thread);
88
89 // Host signals are blocked in parent before the clone,
90 // and remain blocked in child until this point.
91 RTSigprocmaskSyscallOrDie(SIG_SETMASK, &info->mask, nullptr);
92
93 // Notify parent that child is ready. Now parent can:
94 // - search for child in thread table
95 // - send child a signal
96 // - dispose info
97 SemPostOrDie(&info->sem);
98 // TODO(b/77574158): Ensure caller has a chance to handle the notification.
99 sched_yield();
100
101 ExecuteGuest(thread->state());
102
103 LOG_ALWAYS_FATAL("cloned thread didn't exit");
104 return 0;
105 }
106
107 } // namespace
108
109 // go/berberis-guest-threads
CloneGuestThread(GuestThread * thread,int flags,GuestAddr guest_stack_top,GuestAddr parent_tid,GuestAddr new_tls,GuestAddr child_tid)110 pid_t CloneGuestThread(GuestThread* thread,
111 int flags,
112 GuestAddr guest_stack_top,
113 GuestAddr parent_tid,
114 GuestAddr new_tls,
115 GuestAddr child_tid) {
116 ThreadState& thread_state = *thread->state();
117 if (!(flags & CLONE_VM)) {
118 // Memory is *not* shared with the child.
119 // Run the child on the same host stack as the parent. Thus, can use host local variables.
120 // The child gets a copy of guest thread object.
121 // ATTENTION: Do not set new tls for the host - tls might be incompatible.
122 // TODO(b/280551726): Consider forcing new host tls to 0.
123 long pid = CloneSyscall(flags & ~CLONE_SETTLS, 0, parent_tid, 0, child_tid);
124 if (pid == 0) {
125 // Child, reset thread table.
126 ResetCurrentGuestThreadAfterFork(thread);
127 if (guest_stack_top) {
128 SetStackRegister(GetCPUState(thread_state), guest_stack_top);
129 // TODO(b/280551726): Reset stack attributes?
130 }
131 if ((flags & CLONE_SETTLS)) {
132 SetTlsAddr(thread_state, new_tls);
133 }
134 }
135 return pid;
136 }
137
138 // Memory is shared with the child.
139 // The child needs a distinct stack, both host and guest! Because of the distinct host stack,
140 // cannot use host local variables. For now, use clone function to pass parameters to the child.
141 // The child needs new instance of guest thread object.
142
143 GuestThreadCloneInfo info;
144
145 info.thread = GuestThread::CreateClone(thread, (flags & CLONE_SIGHAND) != 0);
146 if (info.thread == nullptr) {
147 return EAGAIN;
148 }
149
150 ThreadState& clone_thread_state = *info.thread->state();
151
152 if ((flags & CLONE_SETTLS)) {
153 SetTlsAddr(clone_thread_state, new_tls);
154 }
155
156 // Current insn addr is on SVC instruction, move to the next.
157 // TODO(b/280551726): Not needed if we can use raw syscall and continue current execution.
158 CPUState& clone_cpu = GetCPUState(clone_thread_state);
159 AdvanceInsnAddrBeyondSyscall(clone_cpu);
160 SetReturnValueRegister(clone_cpu, 0); // Syscall return value
161
162 if (guest_stack_top != kNullGuestAddr) {
163 SetStackRegister(GetCPUState(clone_thread_state), guest_stack_top);
164 SetLinkRegister(clone_cpu, kNullGuestAddr);
165 } else {
166 if (!(flags & CLONE_VFORK)) {
167 TRACE("CLONE_VM with NULL guest stack and not in CLONE_VFORK mode, returning EINVAL");
168 return EINVAL;
169 }
170 // See b/323981318 and b/156400255.
171 TRACE("CLONE_VFORK with CLONE_VM and NULL guest stack, will share guest stack with parent");
172 // GuestThread::CreateClone has already copied stack and link pointers to new thread.
173 }
174
175 // Thread must start with pending signals while it's executing runtime code.
176 SetPendingSignalsStatusAtomic(clone_thread_state, kPendingSignalsEnabled);
177 SetResidence(clone_thread_state, kOutsideGeneratedCode);
178
179 int error = sem_init(&info.sem, 0, 0);
180 LOG_ALWAYS_FATAL_IF(error != 0, "sem_init returned error=%s", strerror(errno));
181
182 // ATTENTION: Don't set new tls for the host - tls might be incompatible.
183 // TODO(b/280551726): Consider forcing new host tls to 0.
184 long pid;
185 {
186 ScopedSignalBlocker signal_blocker;
187 info.mask = *signal_blocker.old_mask();
188 pid = clone(RunClonedGuestThread,
189 info.thread->GetHostStackTop(),
190 flags & ~CLONE_SETTLS,
191 &info,
192 parent_tid,
193 nullptr,
194 child_tid);
195 if (pid != -1) {
196 CHECK_EQ(0, sem_wait(&info.sem)); // Wait with blocked signals to avoid EINTR.
197 }
198 }
199
200 if (pid == -1) {
201 GuestThread::Destroy(info.thread);
202 }
203
204 sem_destroy(&info.sem);
205 return pid;
206 }
207
208 } // namespace berberis
209