1 // Copyright 2022 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef BASE_ALLOCATOR_DISPATCHER_TLS_H_
6 #define BASE_ALLOCATOR_DISPATCHER_TLS_H_
7
8 #include <string_view>
9
10 #include "build/build_config.h"
11
12 #if BUILDFLAG(IS_POSIX) // the current allocation mechanism (mmap) and TLS
13 // support (pthread) are both defined by POSIX
14 #define USE_LOCAL_TLS_EMULATION() true
15 #else
16 #define USE_LOCAL_TLS_EMULATION() false
17 #endif
18
19 #if USE_LOCAL_TLS_EMULATION()
20 #include <algorithm>
21 #include <atomic>
22 #include <memory>
23 #include <mutex>
24
25 #include "base/base_export.h"
26 #include "base/check.h"
27 #include "base/compiler_specific.h"
28 #include "partition_alloc/partition_alloc_constants.h"
29
30 #include <pthread.h>
31
32 #if HAS_FEATURE(thread_sanitizer)
33 #define DISABLE_TSAN_INSTRUMENTATION __attribute__((no_sanitize("thread")))
34 #else
35 #define DISABLE_TSAN_INSTRUMENTATION
36 #endif
37
38 #define STR_HELPER(x) #x
39 #define STR(x) STR_HELPER(x)
40
41 // Verify that a condition holds and cancel the process in case it doesn't. The
42 // functionality is similar to RAW_CHECK but includes more information in the
43 // logged messages. It is non allocating to prevent recursions.
44 #define TLS_RAW_CHECK(error_message, condition) \
45 TLS_RAW_CHECK_IMPL(error_message, condition, __FILE__, __LINE__)
46
47 #define TLS_RAW_CHECK_IMPL(error_message, condition, file, line) \
48 do { \
49 if (!(condition)) { \
50 constexpr const char* message = \
51 "TLS System: " error_message " Failed condition '" #condition \
52 "' in (" file "@" STR(line) ").\n"; \
53 ::logging::RawCheckFailure(message); \
54 } \
55 } while (0)
56
57 namespace base::debug {
58 struct CrashKeyString;
59 }
60
61 namespace base::allocator::dispatcher {
62 namespace internal {
63
64 // Allocate memory using POSIX' mmap and unmap functionality. The allocator
65 // implements the allocator interface required by ThreadLocalStorage.
66 struct BASE_EXPORT MMapAllocator {
67 // The minimum size of a memory chunk when allocating. Even for chunks with
68 // fewer bytes, at least AllocationChunkSize bytes are allocated. For mmap, this
69 // is usually the page size of the system.
70 // For various OS-CPU combinations, partition_alloc::PartitionPageSize() is not
71 // constexpr. Hence, we can not use this value but define it locally.
72 #if defined(PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR) && \
73 PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR
74 constexpr static size_t AllocationChunkSize =
75 partition_alloc::PartitionPageSize();
76 #elif BUILDFLAG(IS_APPLE)
77 constexpr static size_t AllocationChunkSize = 16384;
78 #elif BUILDFLAG(IS_ANDROID) && defined(ARCH_CPU_64_BITS)
79 constexpr static size_t AllocationChunkSize = 16384;
80 #elif BUILDFLAG(IS_LINUX) && defined(ARCH_CPU_ARM64)
81 constexpr static size_t AllocationChunkSize = 16384;
82 #else
83 constexpr static size_t AllocationChunkSize = 4096;
84 #endif
85
86 // Allocate size_in_bytes bytes of raw memory. Return nullptr if allocation
87 // fails.
88 void* AllocateMemory(size_t size_in_bytes);
89 // Free the raw memory pointed to by pointer_to_allocated. Returns a boolean
90 // value indicating if the free was successful.
91 bool FreeMemoryForTesting(void* pointer_to_allocated, size_t size_in_bytes);
92 };
93
94 // The allocator used by default for the thread local storage.
95 using DefaultAllocator = MMapAllocator;
96
97 using OnThreadTerminationFunction = void (*)(void*);
98
99 // The TLS system used by default for the thread local storage. It stores and
100 // retrieves thread specific data pointers.
101 class BASE_EXPORT PThreadTLSSystem {
102 public:
103 PThreadTLSSystem();
104
105 PThreadTLSSystem(const PThreadTLSSystem&) = delete;
106 PThreadTLSSystem(PThreadTLSSystem&&);
107 PThreadTLSSystem& operator=(const PThreadTLSSystem&) = delete;
108 PThreadTLSSystem& operator=(PThreadTLSSystem&&);
109
110 // Initialize the TLS system to store a data set for different threads.
111 // @param thread_termination_function An optional function which will be
112 // invoked upon termination of a thread.
113 bool Setup(OnThreadTerminationFunction thread_termination_function,
114 const std::string_view instance_id);
115 // Tear down the TLS system. After completing tear down, the thread
116 // termination function passed to Setup will not be invoked anymore.
117 bool TearDownForTesting();
118
119 // Get the pointer to the data associated to the current thread. Returns
120 // nullptr if the TLS system is not initialized or no data was set before.
121 void* GetThreadSpecificData();
122 // Set the pointer to the data associated to the current thread. Return true
123 // if stored successfully, false otherwise.
124 bool SetThreadSpecificData(void* data);
125
126 private:
127 base::debug::CrashKeyString* crash_key_ = nullptr;
128 pthread_key_t data_access_key_ = 0;
129 #if DCHECK_IS_ON()
130 // From POSIX standard at https://www.open-std.org/jtc1/sc22/open/n4217.pdf:
131 // The effect of calling pthread_getspecific() or pthread_setspecific() with a
132 // key value not obtained from pthread_key_create() or after key has been
133 // deleted with pthread_key_delete() is undefined.
134 //
135 // Unfortunately, POSIX doesn't define a special value of pthread_key_t
136 // indicating an invalid key which would allow us to detect accesses outside
137 // of initialized state. Hence, to prevent us from drifting into the evil
138 // realm of undefined behaviour we store whether we're somewhere between Setup
139 // and Teardown.
140 std::atomic_bool initialized_{false};
141 #endif
142 };
143
144 using DefaultTLSSystem = PThreadTLSSystem;
145
146 // In some scenarios, most notably when testing, the allocator and TLS system
147 // passed to |ThreadLocalStorage| are not copyable and have to be wrapped, i.e.
148 // using std::reference_wrapper. |dereference| is a small helper to retrieve the
149 // underlying value.
150 template <typename T>
dereference(T & ref)151 T& dereference(T& ref) {
152 return ref;
153 }
154
155 template <typename T>
dereference(std::reference_wrapper<T> & ref)156 T& dereference(std::reference_wrapper<T>& ref) {
157 // std::reference_wrapper requires a valid reference for construction,
158 // therefore, no need in checking here.
159 return ref.get();
160 }
161
162 // Store thread local data. The data is organized in chunks, where each chunk
163 // holds |ItemsPerChunk|. Each item may be free or used.
164 //
165 // When a thread requests data, the chunks are searched for a free data item,
166 // which is registered for this thread and marked as |used|. Further requests by
167 // this thread will then always return the same item. When a thread terminates,
168 // the item will be reset and return to the pool of free items.
169 //
170 // Upon construction, the first chunk is created. If a thread requests data and
171 // there is no free item available, another chunk is created. Upon destruction,
172 // all memory is freed. Pointers to data items become invalid!
173 //
174 // Constructor and destructor are not thread safe.
175 //
176 // @tparam PayloadType The item type to be stored.
177 // @tparam AllocatorType The allocator being used. An allocator must provide
178 // the following interface:
179 // void* AllocateMemory(size_t size_in_bytes); // Allocate size_in_bytes bytes
180 // of raw memory.
181 // void FreeMemory(void* pointer_to_allocated, size_t size_in_bytes); // Free
182 // the raw memory pointed to by pointer_to_allocated.
183 // Any failure in allocation or free must terminate the process.
184 // @tparam TLSSystemType The TLS system being used. A TLS system must provide
185 // the following interface:
186 // bool Setup(OnThreadTerminationFunction thread_termination_function);
187 // bool Destroy();
188 // void* GetThreadSpecificData();
189 // bool SetThreadSpecificData(void* data);
190 // @tparam AllocationChunkSize The minimum size of a memory chunk that the
191 // allocator can handle. We try to size the chunks so that each chunk uses this
192 // size to the maximum.
193 // @tparam IsDestructibleForTesting For testing purposes we allow the destructor
194 // to perform clean up upon destruction. Otherwise, using the destructor will
195 // result in a compilation failure.
196 template <typename PayloadType,
197 typename AllocatorType,
198 typename TLSSystemType,
199 size_t AllocationChunkSize,
200 bool IsDestructibleForTesting>
201 struct ThreadLocalStorage {
ThreadLocalStorageThreadLocalStorage202 explicit ThreadLocalStorage(const std::string_view instance_id)
203 : root_(AllocateAndInitializeChunk()) {
204 Initialize(instance_id);
205 }
206
207 // Create a new instance of |ThreadLocalStorage| using the passed allocator
208 // and TLS system. This initializes the underlying TLS system and creates the
209 // first chunk of data.
ThreadLocalStorageThreadLocalStorage210 ThreadLocalStorage(const std::string_view instance_id,
211 AllocatorType allocator,
212 TLSSystemType tls_system)
213 : allocator_(std::move(allocator)),
214 tls_system_(std::move(tls_system)),
215 root_(AllocateAndInitializeChunk()) {
216 Initialize(instance_id);
217 }
218
219 // Deletes an instance of |ThreadLocalStorage| and delete all the data chunks
220 // created.
~ThreadLocalStorageThreadLocalStorage221 ~ThreadLocalStorage() {
222 if constexpr (IsDestructibleForTesting) {
223 TearDownForTesting();
224 } else if constexpr (!IsDestructibleForTesting) {
225 static_assert(
226 IsDestructibleForTesting,
227 "ThreadLocalStorage cannot be destructed outside of test code.");
228 }
229 }
230
231 // Explicitly prevent all forms of Copy/Move construction/assignment. For an
232 // exact copy of ThreadLocalStorage we would need to copy the mapping of
233 // thread to item, which we can't do at the moment. On the other side, our
234 // atomic members do not support moving out of the box.
235 ThreadLocalStorage(const ThreadLocalStorage&) = delete;
236 ThreadLocalStorage(ThreadLocalStorage&& other) = delete;
237 ThreadLocalStorage& operator=(const ThreadLocalStorage&) = delete;
238 ThreadLocalStorage& operator=(ThreadLocalStorage&&) = delete;
239
240 // Get the data item for the current thread. If no data is registered so far,
241 // find a free item in the chunks and register it for the current thread.
GetThreadLocalDataThreadLocalStorage242 PayloadType* GetThreadLocalData() {
243 auto& tls_system = dereference(tls_system_);
244
245 auto* slot = static_cast<SingleSlot*>(tls_system.GetThreadSpecificData());
246
247 if (UNLIKELY(slot == nullptr)) {
248 slot = FindAndAllocateFreeSlot(root_.load(std::memory_order_relaxed));
249
250 // We might be called in the course of handling a memory allocation. We do
251 // not use CHECK since they might allocate and cause a recursion.
252 TLS_RAW_CHECK("Failed to set thread specific data.",
253 tls_system.SetThreadSpecificData(slot));
254
255 // Reset the content to wipe out any previous data.
256 Reset(slot->item);
257 }
258
259 return &(slot->item);
260 }
261
262 private:
263 // Encapsulate the payload item and some administrative data.
264 struct SingleSlot {
265 PayloadType item;
266 #if !defined(__cpp_lib_atomic_value_initialization) || \
267 __cpp_lib_atomic_value_initialization < 201911L
268 std::atomic_flag is_used = ATOMIC_FLAG_INIT;
269 #else
270 std::atomic_flag is_used;
271 #endif
272 };
273
274 template <size_t NumberOfItems>
275 struct ChunkT {
276 SingleSlot slots[NumberOfItems];
277 // Pointer to the next chunk.
278 std::atomic<ChunkT*> next_chunk = nullptr;
279 // Helper flag to ensure we create the next chunk only once in a multi
280 // threaded environment.
281 std::once_flag create_next_chunk_flag;
282 };
283
284 template <size_t LowerNumberOfItems,
285 size_t UpperNumberOfItems,
286 size_t NumberOfBytes>
CalculateEffectiveNumberOfItemsBinSearchThreadLocalStorage287 static constexpr size_t CalculateEffectiveNumberOfItemsBinSearch() {
288 if constexpr (LowerNumberOfItems == UpperNumberOfItems) {
289 return LowerNumberOfItems;
290 }
291
292 constexpr size_t CurrentNumberOfItems =
293 (UpperNumberOfItems - LowerNumberOfItems) / 2 + LowerNumberOfItems;
294
295 if constexpr (sizeof(ChunkT<CurrentNumberOfItems>) > NumberOfBytes) {
296 return CalculateEffectiveNumberOfItemsBinSearch<
297 LowerNumberOfItems, CurrentNumberOfItems, NumberOfBytes>();
298 }
299
300 if constexpr (sizeof(ChunkT<CurrentNumberOfItems + 1>) < NumberOfBytes) {
301 return CalculateEffectiveNumberOfItemsBinSearch<
302 CurrentNumberOfItems + 1, UpperNumberOfItems, NumberOfBytes>();
303 }
304
305 return CurrentNumberOfItems;
306 }
307
308 // Calculate the maximum number of items we can store in one chunk without the
309 // size of the chunk exceeding NumberOfBytes. To avoid things like alignment
310 // and packing tampering with the calculation, instead of calculating the
311 // correct number of items we use sizeof-operator against ChunkT to search for
312 // the correct size. Unfortunately, the number of recursions is limited by the
313 // compiler. Therefore, we use a binary search instead of a simple linear
314 // search.
315 template <size_t MinimumNumberOfItems, size_t NumberOfBytes>
CalculateEffectiveNumberOfItemsThreadLocalStorage316 static constexpr size_t CalculateEffectiveNumberOfItems() {
317 if constexpr (sizeof(ChunkT<MinimumNumberOfItems>) < NumberOfBytes) {
318 constexpr size_t LowerNumberOfItems = MinimumNumberOfItems;
319 constexpr size_t UpperNumberOfItems =
320 NumberOfBytes / sizeof(PayloadType) + 1;
321 return CalculateEffectiveNumberOfItemsBinSearch<
322 LowerNumberOfItems, UpperNumberOfItems, NumberOfBytes>();
323 }
324
325 return MinimumNumberOfItems;
326 }
327
328 public:
329 // The minimum number of items per chunk. It should be high enough to
330 // accommodate most items in the root chunk whilst not wasting to much space
331 // on unnecessary items.
332 static constexpr size_t MinimumNumberOfItemsPerChunk = 75;
333 // The effective number of items per chunk. We use the AllocationChunkSize as
334 // a hint to calculate to effective number of items so we occupy one of these
335 // memory chunks to the maximum extent possible.
336 static constexpr size_t ItemsPerChunk =
337 CalculateEffectiveNumberOfItems<MinimumNumberOfItemsPerChunk,
338 AllocationChunkSize>();
339
340 private:
341 using Chunk = ChunkT<ItemsPerChunk>;
342
343 static_assert(ItemsPerChunk >= MinimumNumberOfItemsPerChunk);
344
345 // Mark an item's slot ready for reuse. This function is used as thread
346 // termination function in the TLS system. We do not destroy anything at this
347 // point but simply mark the slot as unused.
MarkSlotAsFreeThreadLocalStorage348 static void MarkSlotAsFree(void* data) {
349 // We always store SingleSlots in the TLS system. Therefore, we cast to
350 // SingleSlot and reset the is_used flag.
351 auto* const slot = static_cast<SingleSlot*>(data);
352
353 // We might be called in the course of handling a memory allocation.
354 // Therefore, do not use CHECK since it might allocate and cause a
355 // recursion.
356 TLS_RAW_CHECK("Received an invalid slot.",
357 slot && slot->is_used.test_and_set());
358
359 slot->is_used.clear(std::memory_order_relaxed);
360 }
361
362 // Perform common initialization during construction of an instance.
InitializeThreadLocalStorage363 void Initialize(const std::string_view instance_id) {
364 // The constructor must be called outside of the allocation path. Therefore,
365 // it is secure to verify with CHECK.
366
367 // Passing MarkSlotAsFree as thread_termination_function we ensure the
368 // slot/item assigned to the finished thread will be returned to the pool of
369 // unused items.
370 CHECK(dereference(tls_system_).Setup(&MarkSlotAsFree, instance_id));
371 }
372
AllocateAndInitializeChunkThreadLocalStorage373 Chunk* AllocateAndInitializeChunk() {
374 void* const uninitialized_memory =
375 dereference(allocator_).AllocateMemory(sizeof(Chunk));
376
377 // We might be called in the course of handling a memory allocation. We do
378 // not use CHECK since they might allocate and cause a recursion.
379 TLS_RAW_CHECK("Failed to allocate memory for new chunk.",
380 uninitialized_memory != nullptr);
381
382 return new (uninitialized_memory) Chunk{};
383 }
384
FreeAndDeallocateChunkForTestingThreadLocalStorage385 void FreeAndDeallocateChunkForTesting(Chunk* chunk_to_erase) {
386 chunk_to_erase->~Chunk();
387
388 // FreeAndDeallocateChunkForTesting must be called outside of the allocation
389 // path. Therefore, it is secure to verify with CHECK.
390 CHECK(dereference(allocator_)
391 .FreeMemoryForTesting(chunk_to_erase, sizeof(Chunk)));
392 }
393
394 // Find a free slot in the passed chunk, reserve it and return it to the
395 // caller. If no free slot can be found, head on to the next chunk. If the
396 // next chunk doesn't exist, create it.
FindAndAllocateFreeSlotThreadLocalStorage397 SingleSlot* FindAndAllocateFreeSlot(Chunk* const chunk) {
398 SingleSlot* const slot = std::find_if_not(
399 std::begin(chunk->slots), std::end(chunk->slots),
400 [](SingleSlot& candidate_slot) {
401 return candidate_slot.is_used.test_and_set(std::memory_order_relaxed);
402 });
403
404 // So we found a slot. Happily return it to the caller.
405 if (slot != std::end(chunk->slots)) {
406 return slot;
407 }
408
409 // Ok, there are no more free slots in this chunk. First, ensure the next
410 // chunk is valid and create one if necessary.
411 std::call_once(chunk->create_next_chunk_flag, [&] {
412 // From https://eel.is/c++draft/thread.once.callonce#3
413 //
414 // Synchronization: For any given once_flag: all active executions occur
415 // in a total order; completion of an active execution synchronizes with
416 // the start of the next one in this total order; and the returning
417 // execution synchronizes with the return from all passive executions.
418 //
419 // Therefore, we do only a relaxed store here, call_once synchronizes with
420 // other threads.
421 chunk->next_chunk.store(AllocateAndInitializeChunk(),
422 std::memory_order_relaxed);
423 });
424
425 return FindAndAllocateFreeSlot(chunk->next_chunk);
426 }
427
428 template <bool IsDestructibleForTestingP = IsDestructibleForTesting>
429 typename std::enable_if<IsDestructibleForTestingP>::type
TearDownForTestingThreadLocalStorage430 TearDownForTesting() {
431 // The destructor must be called outside of the allocation path. Therefore,
432 // it is secure to verify with CHECK.
433
434 // All accessing threads must be terminated by now. For additional security
435 // we tear down the TLS system first. This way we ensure that
436 // MarkSlotAsFree is not called anymore and we have no accesses from the
437 // TLS system's side.
438 CHECK(dereference(tls_system_).TearDownForTesting());
439
440 // Delete all data chunks.
441 for (auto* chunk = root_.load(); chunk != nullptr;) {
442 auto* next_chunk = chunk->next_chunk.load();
443 FreeAndDeallocateChunkForTesting(chunk);
444 chunk = next_chunk;
445 }
446 }
447
448 // Reset a single item to its default value.
449 // Since items are re-used, they may be accessed from different threads,
450 // causing TSan to trigger. Therefore, the reset is exempt from TSan
451 // instrumentation.
ResetThreadLocalStorage452 DISABLE_TSAN_INSTRUMENTATION void Reset(PayloadType& item) { item = {}; }
453
454 AllocatorType allocator_;
455 TLSSystemType tls_system_;
456 std::atomic<Chunk*> const root_;
457 };
458
459 } // namespace internal
460
461 // The ThreadLocalStorage visible to the user. This uses the internal default
462 // allocator and TLS system.
463 template <typename StorageType,
464 typename AllocatorType = internal::DefaultAllocator,
465 typename TLSSystemType = internal::DefaultTLSSystem,
466 size_t AllocationChunkSize = AllocatorType::AllocationChunkSize,
467 bool IsDestructibleForTesting = false>
468 using ThreadLocalStorage =
469 internal::ThreadLocalStorage<StorageType,
470 AllocatorType,
471 TLSSystemType,
472 AllocationChunkSize,
473 IsDestructibleForTesting>;
474
475 } // namespace base::allocator::dispatcher
476
477 #undef TLS_RAW_CHECK_IMPL
478 #undef TLS_RAW_CHECK
479 #undef STR
480 #undef STR_HELPER
481
482 #endif // USE_LOCAL_TLS_EMULATION()
483 #endif // BASE_ALLOCATOR_DISPATCHER_TLS_H_
484