xref: /aosp_15_r20/bionic/libc/bionic/pthread_internal.h (revision 8d67ca893c1523eb926b9080dbe4e2ffd2a27ba1)
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *  * Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  *  * Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in
12  *    the documentation and/or other materials provided with the
13  *    distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #pragma once
30 
31 #include <pthread.h>
32 #include <stdatomic.h>
33 
34 #if __has_feature(hwaddress_sanitizer)
35 #include <sanitizer/hwasan_interface.h>
36 #else
37 #define __hwasan_thread_enter()
38 #define __hwasan_thread_exit()
39 #endif
40 
41 #include "platform/bionic/page.h"
42 
43 #include "private/bionic_elf_tls.h"
44 #include "private/bionic_lock.h"
45 #include "private/bionic_tls.h"
46 
47 // Has the thread been detached by a pthread_join or pthread_detach call?
48 #define PTHREAD_ATTR_FLAG_DETACHED 0x00000001
49 
50 // Has the thread been joined by another thread?
51 #define PTHREAD_ATTR_FLAG_JOINED 0x00000002
52 
53 // Used for pthread_attr_setinheritsched. We need two flags for this apparent
54 // boolean because our historical behavior matches neither of the POSIX choices.
55 #define PTHREAD_ATTR_FLAG_INHERIT 0x00000004
56 #define PTHREAD_ATTR_FLAG_EXPLICIT 0x00000008
57 
58 enum ThreadJoinState {
59   THREAD_NOT_JOINED,
60   THREAD_EXITED_NOT_JOINED,
61   THREAD_JOINED,
62   THREAD_DETACHED
63 };
64 
65 class thread_local_dtor;
66 
67 class pthread_internal_t {
68  public:
69   class pthread_internal_t* next;
70   class pthread_internal_t* prev;
71 
72   pid_t tid;
73 
74  private:
75   uint32_t cached_pid_ : 31;
76   uint32_t vforked_ : 1;
77 
78  public:
is_vforked()79   bool is_vforked() { return vforked_; }
80 
invalidate_cached_pid()81   pid_t invalidate_cached_pid() {
82     pid_t old_value;
83     get_cached_pid(&old_value);
84     set_cached_pid(0);
85     return old_value;
86   }
87 
set_cached_pid(pid_t value)88   void set_cached_pid(pid_t value) {
89     cached_pid_ = value;
90   }
91 
get_cached_pid(pid_t * cached_pid)92   bool get_cached_pid(pid_t* cached_pid) {
93     *cached_pid = cached_pid_;
94     return (*cached_pid != 0);
95   }
96 
97   pthread_attr_t attr;
98 
99   _Atomic(ThreadJoinState) join_state;
100 
101   __pthread_cleanup_t* cleanup_stack;
102 
103   void* (*start_routine)(void*);
104   void* start_routine_arg;
105   void* return_value;
106   sigset64_t start_mask;
107 
108   void* alternate_signal_stack;
109 
110   // The start address of the shadow call stack's guard region (arm64/riscv64).
111   // This region is SCS_GUARD_REGION_SIZE bytes large, but only SCS_SIZE bytes
112   // are actually used.
113   //
114   // This address is only used to deallocate the shadow call stack on thread
115   // exit; the address of the stack itself is stored only in the register used
116   // as the shadow stack pointer (x18 on arm64, gp on riscv64).
117   //
118   // Because the protection offered by SCS relies on the secrecy of the stack
119   // address, storing the address here weakens the protection, but only
120   // slightly, because it is relatively easy for an attacker to discover the
121   // address of the guard region anyway (e.g. it can be discovered by reference
122   // to other allocations), but not the stack itself, which is <0.1% of the size
123   // of the guard region.
124   //
125   // longjmp()/setjmp() don't store all the bits of the shadow stack pointer,
126   // only the bottom bits covered by SCS_MASK. Since longjmp()/setjmp() between
127   // different threads is undefined behavior (and unsupported on Android), we
128   // can retrieve the high bits of the shadow stack pointer from the current
129   // value in the register --- all the jmp_buf needs to store is where exactly
130   // the shadow stack pointer is *within* the thread's shadow stack: the bottom
131   // bits of the register.
132   //
133   // There are at least two other options for discovering the start address of
134   // the guard region on thread exit, but they are not as simple as storing in
135   // TLS.
136   //
137   // 1) Derive it from the current value of the shadow stack pointer. This is
138   //    only possible in processes that do not contain legacy code that might
139   //    clobber x18 on arm64, therefore each process must declare early during
140   //    process startup whether it might load legacy code.
141   //    TODO: riscv64 has no legacy code, so we can actually go this route
142   //    there, but hopefully we'll actually get the Zisslpcfi extension instead.
143   // 2) Mark the guard region as such using prctl(PR_SET_VMA_ANON_NAME) and
144   //    discover its address by reading /proc/self/maps. One issue with this is
145   //    that reading /proc/self/maps can race with allocations, so we may need
146   //    code to handle retries.
147   void* shadow_call_stack_guard_region;
148 
149   // A pointer to the top of the stack. This lets android_unsafe_frame_pointer_chase determine the
150   // top of the stack quickly, which would otherwise require special logic for the main thread.
151   uintptr_t stack_top;
152 
153   // Whether the thread is in the process of terminating (has blocked signals), or has already
154   // terminated. This is used by android_run_on_all_threads() to avoid sending a signal to a thread
155   // that will never receive it.
156   _Atomic(bool) terminating;
157 
158   Lock startup_handshake_lock;
159 
160   void* mmap_base;
161   size_t mmap_size;
162 
163   // The location of the VMA to label as the thread's stack_and_tls.
164   void* mmap_base_unguarded;
165   size_t mmap_size_unguarded;
166   char vma_name_buffer[32];
167 
168   thread_local_dtor* thread_local_dtors;
169 
170   /*
171    * The dynamic linker implements dlerror(3), which makes it hard for us to implement this
172    * per-thread buffer by simply using malloc(3) and free(3).
173    */
174   char* current_dlerror;
175 #define __BIONIC_DLERROR_BUFFER_SIZE 512
176   char dlerror_buffer[__BIONIC_DLERROR_BUFFER_SIZE];
177 
178   bionic_tls* bionic_tls;
179 
180   int errno_value;
181 
182   bionic_tcb* bionic_tcb;
183   char stack_mte_ringbuffer_vma_name_buffer[32];
184   bool should_allocate_stack_mte_ringbuffer;
185 
is_main()186   bool is_main() { return start_routine == nullptr; }
187 };
188 
189 struct ThreadMapping {
190   char* mmap_base;
191   size_t mmap_size;
192   char* mmap_base_unguarded;
193   size_t mmap_size_unguarded;
194 
195   char* static_tls;
196   char* stack_base;
197   char* stack_top;
198 };
199 
200 __LIBC_HIDDEN__ void __init_tcb(bionic_tcb* tcb, pthread_internal_t* thread);
201 __LIBC_HIDDEN__ void __init_tcb_stack_guard(bionic_tcb* tcb);
202 __LIBC_HIDDEN__ void __init_tcb_dtv(bionic_tcb* tcb);
203 __LIBC_HIDDEN__ void __init_bionic_tls_ptrs(bionic_tcb* tcb, bionic_tls* tls);
204 __LIBC_HIDDEN__ bionic_tls* __allocate_temp_bionic_tls();
205 __LIBC_HIDDEN__ void __free_temp_bionic_tls(bionic_tls* tls);
206 __LIBC_HIDDEN__ void __init_additional_stacks(pthread_internal_t*);
207 __LIBC_HIDDEN__ int __init_thread(pthread_internal_t* thread);
208 __LIBC_HIDDEN__ ThreadMapping __allocate_thread_mapping(size_t stack_size, size_t stack_guard_size);
209 __LIBC_HIDDEN__ void __set_stack_and_tls_vma_name(bool is_main_thread);
210 
211 __LIBC_HIDDEN__ pthread_t __pthread_internal_add(pthread_internal_t* thread);
212 __LIBC_HIDDEN__ pthread_internal_t* __pthread_internal_find(pthread_t pthread_id, const char* caller);
213 __LIBC_HIDDEN__ pid_t __pthread_internal_gettid(pthread_t pthread_id, const char* caller);
214 __LIBC_HIDDEN__ void __pthread_internal_remove(pthread_internal_t* thread);
215 __LIBC_HIDDEN__ void __pthread_internal_remove_and_free(pthread_internal_t* thread);
216 __LIBC_HIDDEN__ void __find_main_stack_limits(uintptr_t* low, uintptr_t* high);
217 #if defined(__aarch64__)
218 __LIBC_HIDDEN__ void* __allocate_stack_mte_ringbuffer(size_t n, pthread_internal_t* thread);
219 #endif
220 
__get_bionic_tcb()221 static inline __always_inline bionic_tcb* __get_bionic_tcb() {
222   return reinterpret_cast<bionic_tcb*>(&__get_tls()[MIN_TLS_SLOT]);
223 }
224 
225 // Make __get_thread() inlined for performance reason. See http://b/19825434.
__get_thread()226 static inline __always_inline pthread_internal_t* __get_thread() {
227   return static_cast<pthread_internal_t*>(__get_tls()[TLS_SLOT_THREAD_ID]);
228 }
229 
__get_bionic_tls()230 static inline __always_inline bionic_tls& __get_bionic_tls() {
231   return *static_cast<bionic_tls*>(__get_tls()[TLS_SLOT_BIONIC_TLS]);
232 }
233 
__get_tcb_dtv(bionic_tcb * tcb)234 static inline __always_inline TlsDtv* __get_tcb_dtv(bionic_tcb* tcb) {
235   uintptr_t dtv_slot = reinterpret_cast<uintptr_t>(tcb->tls_slot(TLS_SLOT_DTV));
236   return reinterpret_cast<TlsDtv*>(dtv_slot - offsetof(TlsDtv, generation));
237 }
238 
__set_tcb_dtv(bionic_tcb * tcb,TlsDtv * val)239 static inline void __set_tcb_dtv(bionic_tcb* tcb, TlsDtv* val) {
240   tcb->tls_slot(TLS_SLOT_DTV) = &val->generation;
241 }
242 
243 extern "C" __LIBC_HIDDEN__ int __set_tls(void* ptr);
244 
245 __LIBC_HIDDEN__ void pthread_key_clean_all(void);
246 
247 // Address space is precious on LP32, so use the minimum unit: one page.
248 // On LP64, we could use more but there's no obvious advantage to doing
249 // so, and the various media processes use RLIMIT_AS as a way to limit
250 // the amount of allocation they'll do.
251 #define PTHREAD_GUARD_SIZE max_android_page_size()
252 
253 // SIGSTKSZ (8KiB) is not big enough.
254 // An snprintf to a stack buffer of size PATH_MAX consumes ~7KiB of stack.
255 // On 64-bit, logging uses more than 8KiB by itself, ucontext is comically
256 // large on aarch64, and we have effectively infinite address space, so double
257 // the signal stack size.
258 #if defined(__LP64__)
259 #define SIGNAL_STACK_SIZE_WITHOUT_GUARD (32 * 1024)
260 #else
261 #define SIGNAL_STACK_SIZE_WITHOUT_GUARD (16 * 1024)
262 #endif
263 
264 // Traditionally we gave threads a 1MiB stack. When we started
265 // allocating per-thread alternate signal stacks to ease debugging of
266 // stack overflows, we subtracted the same amount we were using there
267 // from the default thread stack size. This should keep memory usage
268 // roughly constant.
269 #define PTHREAD_STACK_SIZE_DEFAULT ((1 * 1024 * 1024) - SIGNAL_STACK_SIZE_WITHOUT_GUARD)
270 
271 // Leave room for a guard page in the internally created signal stacks.
272 #define SIGNAL_STACK_SIZE (SIGNAL_STACK_SIZE_WITHOUT_GUARD + PTHREAD_GUARD_SIZE)
273 
274 // Needed by fork.
275 __LIBC_HIDDEN__ extern void __bionic_atfork_run_prepare();
276 __LIBC_HIDDEN__ extern void __bionic_atfork_run_child();
277 __LIBC_HIDDEN__ extern void __bionic_atfork_run_parent();
278 
279 // Re-map all threads and successively launched threads with PROT_MTE. Returns 'true' if remapping
280 // took place, 'false' on error or if the stacks were already remapped in the past.
281 __LIBC_HIDDEN__ bool __pthread_internal_remap_stack_with_mte();
282 
283 extern "C" bool android_run_on_all_threads(bool (*func)(void*), void* arg);
284 
285 extern pthread_rwlock_t g_thread_creation_lock;
286