1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <linux/unistd.h>
18 #include <sched.h>
19 #include <semaphore.h>
20 
21 #include <cstring>  // strerror
22 
23 #include "berberis/base/checks.h"
24 #include "berberis/base/tracing.h"
25 #include "berberis/guest_os_primitives/guest_signal.h"
26 #include "berberis/guest_os_primitives/guest_thread.h"
27 #include "berberis/guest_os_primitives/guest_thread_manager.h"  // ResetCurrentGuestThreadAfterFork
28 #include "berberis/guest_os_primitives/scoped_pending_signals.h"
29 #include "berberis/guest_state/guest_addr.h"
30 #include "berberis/guest_state/guest_state_opaque.h"
31 #include "berberis/runtime/execute_guest.h"
32 #include "berberis/runtime_primitives/runtime_library.h"
33 
34 #include "guest_signal_action.h"
35 #include "guest_thread_manager_impl.h"
36 #include "scoped_signal_blocker.h"
37 
38 namespace berberis {
39 
40 namespace {
41 
CloneSyscall(long flags,long child_stack,long parent_tid,long new_tls,long child_tid)42 long CloneSyscall(long flags, long child_stack, long parent_tid, long new_tls, long child_tid) {
43 #if defined(__x86_64__)  // sys_clone's last two arguments are flipped on x86-64.
44   return syscall(__NR_clone, flags, child_stack, parent_tid, child_tid, new_tls);
45 #else
46   return syscall(__NR_clone, flags, child_stack, parent_tid, new_tls, child_tid);
47 #endif
48 }
49 
50 struct GuestThreadCloneInfo {
51   GuestThread* thread;
52   HostSigset mask;
53   sem_t sem;
54 };
55 
SemPostOrDie(sem_t * sem)56 void SemPostOrDie(sem_t* sem) {
57   int error = sem_post(sem);
58   // sem_post works in two stages: it increments semaphore's value, and then calls FUTEX_WAKE.
59   // If FUTEX_WAIT sporadically returns inside sem_wait between sem_post stages then sem_wait
60   // may observe the updated value and successfully finish. If semaphore is destroyed upon
61   // sem_wait return (like in CloneGuestThread), sem_post's call to FUTEX_WAKE will fail with
62   // EINVAL.
63   // Note that sem_destroy itself may do nothing (bionic and glibc are like that), the actual
64   // destruction happens because we free up memory (e.g. stack frame) where sem_t is stored.
65   // More details at https://sourceware.org/bugzilla/show_bug.cgi?id=12674
66 #if defined(__GLIBC__) && ((__GLIBC__ < 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ < 21)))
67   // GLibc before 2.21 may return EINVAL in the above situation. We ignore it since we cannot do
68   // anything about it, and it doesn't really break anything: we just acknowledge the fact that the
69   // semaphore can be destoyed already.
70   LOG_ALWAYS_FATAL_IF(error != 0 && error != EINVAL, "sem_post returned error=%s", strerror(errno));
71 #else
72   // Bionic and recent GLibc ignore the error code returned
73   // from FUTEX_WAKE. So, they never return EINVAL.
74   LOG_ALWAYS_FATAL_IF(error != 0, "sem_post returned error=%s", strerror(errno));
75 #endif
76 }
77 
RunClonedGuestThread(void * arg)78 int RunClonedGuestThread(void* arg) {
79   GuestThreadCloneInfo* info = static_cast<GuestThreadCloneInfo*>(arg);
80   GuestThread* thread = info->thread;
81 
82   // Cannot use host pthread_key!
83   // TODO(b/280551726): Clear guest thread in exit syscall.
84   InsertCurrentThread(thread, false);
85 
86   // ExecuteGuest requires pending signals enabled.
87   ScopedPendingSignalsEnabler scoped_pending_signals_enabler(thread);
88 
89   // Host signals are blocked in parent before the clone,
90   // and remain blocked in child until this point.
91   RTSigprocmaskSyscallOrDie(SIG_SETMASK, &info->mask, nullptr);
92 
93   // Notify parent that child is ready. Now parent can:
94   // - search for child in thread table
95   // - send child a signal
96   // - dispose info
97   SemPostOrDie(&info->sem);
98   // TODO(b/77574158): Ensure caller has a chance to handle the notification.
99   sched_yield();
100 
101   ExecuteGuest(thread->state());
102 
103   LOG_ALWAYS_FATAL("cloned thread didn't exit");
104   return 0;
105 }
106 
107 }  // namespace
108 
109 // go/berberis-guest-threads
CloneGuestThread(GuestThread * thread,int flags,GuestAddr guest_stack_top,GuestAddr parent_tid,GuestAddr new_tls,GuestAddr child_tid)110 pid_t CloneGuestThread(GuestThread* thread,
111                        int flags,
112                        GuestAddr guest_stack_top,
113                        GuestAddr parent_tid,
114                        GuestAddr new_tls,
115                        GuestAddr child_tid) {
116   ThreadState& thread_state = *thread->state();
117   if (!(flags & CLONE_VM)) {
118     // Memory is *not* shared with the child.
119     // Run the child on the same host stack as the parent. Thus, can use host local variables.
120     // The child gets a copy of guest thread object.
121     // ATTENTION: Do not set new tls for the host - tls might be incompatible.
122     // TODO(b/280551726): Consider forcing new host tls to 0.
123     long pid = CloneSyscall(flags & ~CLONE_SETTLS, 0, parent_tid, 0, child_tid);
124     if (pid == 0) {
125       // Child, reset thread table.
126       ResetCurrentGuestThreadAfterFork(thread);
127       if (guest_stack_top) {
128         SetStackRegister(GetCPUState(thread_state), guest_stack_top);
129         // TODO(b/280551726): Reset stack attributes?
130       }
131       if ((flags & CLONE_SETTLS)) {
132         SetTlsAddr(thread_state, new_tls);
133       }
134     }
135     return pid;
136   }
137 
138   // Memory is shared with the child.
139   // The child needs a distinct stack, both host and guest! Because of the distinct host stack,
140   // cannot use host local variables. For now, use clone function to pass parameters to the child.
141   // The child needs new instance of guest thread object.
142 
143   GuestThreadCloneInfo info;
144 
145   info.thread = GuestThread::CreateClone(thread, (flags & CLONE_SIGHAND) != 0);
146   if (info.thread == nullptr) {
147     return EAGAIN;
148   }
149 
150   ThreadState& clone_thread_state = *info.thread->state();
151 
152   if ((flags & CLONE_SETTLS)) {
153     SetTlsAddr(clone_thread_state, new_tls);
154   }
155 
156   // Current insn addr is on SVC instruction, move to the next.
157   // TODO(b/280551726): Not needed if we can use raw syscall and continue current execution.
158   CPUState& clone_cpu = GetCPUState(clone_thread_state);
159   AdvanceInsnAddrBeyondSyscall(clone_cpu);
160   SetReturnValueRegister(clone_cpu, 0);  // Syscall return value
161 
162   if (guest_stack_top != kNullGuestAddr) {
163     SetStackRegister(GetCPUState(clone_thread_state), guest_stack_top);
164     SetLinkRegister(clone_cpu, kNullGuestAddr);
165   } else {
166     if (!(flags & CLONE_VFORK)) {
167       TRACE("CLONE_VM with NULL guest stack and not in CLONE_VFORK mode, returning EINVAL");
168       return EINVAL;
169     }
170     // See b/323981318 and b/156400255.
171     TRACE("CLONE_VFORK with CLONE_VM and NULL guest stack, will share guest stack with parent");
172     // GuestThread::CreateClone has already copied stack and link pointers to new thread.
173   }
174 
175   // Thread must start with pending signals while it's executing runtime code.
176   SetPendingSignalsStatusAtomic(clone_thread_state, kPendingSignalsEnabled);
177   SetResidence(clone_thread_state, kOutsideGeneratedCode);
178 
179   int error = sem_init(&info.sem, 0, 0);
180   LOG_ALWAYS_FATAL_IF(error != 0, "sem_init returned error=%s", strerror(errno));
181 
182   // ATTENTION: Don't set new tls for the host - tls might be incompatible.
183   // TODO(b/280551726): Consider forcing new host tls to 0.
184   long pid;
185   {
186     ScopedSignalBlocker signal_blocker;
187     info.mask = *signal_blocker.old_mask();
188     pid = clone(RunClonedGuestThread,
189                 info.thread->GetHostStackTop(),
190                 flags & ~CLONE_SETTLS,
191                 &info,
192                 parent_tid,
193                 nullptr,
194                 child_tid);
195     if (pid != -1) {
196       CHECK_EQ(0, sem_wait(&info.sem));  // Wait with blocked signals to avoid EINTR.
197     }
198   }
199 
200   if (pid == -1) {
201     GuestThread::Destroy(info.thread);
202   }
203 
204   sem_destroy(&info.sem);
205   return pid;
206 }
207 
208 }  // namespace berberis
209