xref: /aosp_15_r20/external/cronet/base/allocator/dispatcher/tls.h (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2022 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef BASE_ALLOCATOR_DISPATCHER_TLS_H_
6 #define BASE_ALLOCATOR_DISPATCHER_TLS_H_
7 
8 #include <string_view>
9 
10 #include "build/build_config.h"
11 
12 #if BUILDFLAG(IS_POSIX)  // the current allocation mechanism (mmap) and TLS
13                          // support (pthread) are both defined by POSIX
14 #define USE_LOCAL_TLS_EMULATION() true
15 #else
16 #define USE_LOCAL_TLS_EMULATION() false
17 #endif
18 
19 #if USE_LOCAL_TLS_EMULATION()
20 #include <algorithm>
21 #include <atomic>
22 #include <memory>
23 #include <mutex>
24 
25 #include "base/base_export.h"
26 #include "base/check.h"
27 #include "base/compiler_specific.h"
28 #include "partition_alloc/partition_alloc_constants.h"
29 
30 #include <pthread.h>
31 
32 #if HAS_FEATURE(thread_sanitizer)
33 #define DISABLE_TSAN_INSTRUMENTATION __attribute__((no_sanitize("thread")))
34 #else
35 #define DISABLE_TSAN_INSTRUMENTATION
36 #endif
37 
38 #define STR_HELPER(x) #x
39 #define STR(x) STR_HELPER(x)
40 
41 // Verify that a condition holds and cancel the process in case it doesn't. The
42 // functionality is similar to RAW_CHECK but includes more information in the
43 // logged messages. It is non allocating to prevent recursions.
44 #define TLS_RAW_CHECK(error_message, condition) \
45   TLS_RAW_CHECK_IMPL(error_message, condition, __FILE__, __LINE__)
46 
47 #define TLS_RAW_CHECK_IMPL(error_message, condition, file, line)        \
48   do {                                                                  \
49     if (!(condition)) {                                                 \
50       constexpr const char* message =                                   \
51           "TLS System: " error_message " Failed condition '" #condition \
52           "' in (" file "@" STR(line) ").\n";                           \
53       ::logging::RawCheckFailure(message);                              \
54     }                                                                   \
55   } while (0)
56 
57 namespace base::debug {
58 struct CrashKeyString;
59 }
60 
61 namespace base::allocator::dispatcher {
62 namespace internal {
63 
64 // Allocate memory using POSIX' mmap and unmap functionality. The allocator
65 // implements the allocator interface required by ThreadLocalStorage.
66 struct BASE_EXPORT MMapAllocator {
67 // The minimum size of a memory chunk when allocating. Even for chunks with
68 // fewer bytes, at least AllocationChunkSize bytes are allocated. For mmap, this
69 // is usually the page size of the system.
70 // For various OS-CPU combinations, partition_alloc::PartitionPageSize() is not
71 // constexpr. Hence, we can not use this value but define it locally.
72 #if defined(PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR) && \
73     PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR
74   constexpr static size_t AllocationChunkSize =
75       partition_alloc::PartitionPageSize();
76 #elif BUILDFLAG(IS_APPLE)
77   constexpr static size_t AllocationChunkSize = 16384;
78 #elif BUILDFLAG(IS_ANDROID) && defined(ARCH_CPU_64_BITS)
79   constexpr static size_t AllocationChunkSize = 16384;
80 #elif BUILDFLAG(IS_LINUX) && defined(ARCH_CPU_ARM64)
81   constexpr static size_t AllocationChunkSize = 16384;
82 #else
83   constexpr static size_t AllocationChunkSize = 4096;
84 #endif
85 
86   // Allocate size_in_bytes bytes of raw memory. Return nullptr if allocation
87   // fails.
88   void* AllocateMemory(size_t size_in_bytes);
89   // Free the raw memory pointed to by pointer_to_allocated. Returns a boolean
90   // value indicating if the free was successful.
91   bool FreeMemoryForTesting(void* pointer_to_allocated, size_t size_in_bytes);
92 };
93 
94 // The allocator used by default for the thread local storage.
95 using DefaultAllocator = MMapAllocator;
96 
97 using OnThreadTerminationFunction = void (*)(void*);
98 
99 // The TLS system used by default for the thread local storage. It stores and
100 // retrieves thread specific data pointers.
101 class BASE_EXPORT PThreadTLSSystem {
102  public:
103   PThreadTLSSystem();
104 
105   PThreadTLSSystem(const PThreadTLSSystem&) = delete;
106   PThreadTLSSystem(PThreadTLSSystem&&);
107   PThreadTLSSystem& operator=(const PThreadTLSSystem&) = delete;
108   PThreadTLSSystem& operator=(PThreadTLSSystem&&);
109 
110   // Initialize the TLS system to store a data set for different threads.
111   // @param thread_termination_function An optional function which will be
112   // invoked upon termination of a thread.
113   bool Setup(OnThreadTerminationFunction thread_termination_function,
114              const std::string_view instance_id);
115   // Tear down the TLS system. After completing tear down, the thread
116   // termination function passed to Setup will not be invoked anymore.
117   bool TearDownForTesting();
118 
119   // Get the pointer to the data associated to the current thread. Returns
120   // nullptr if the TLS system is not initialized or no data was set before.
121   void* GetThreadSpecificData();
122   // Set the pointer to the data associated to the current thread. Return true
123   // if stored successfully, false otherwise.
124   bool SetThreadSpecificData(void* data);
125 
126  private:
127   base::debug::CrashKeyString* crash_key_ = nullptr;
128   pthread_key_t data_access_key_ = 0;
129 #if DCHECK_IS_ON()
130   // From POSIX standard at https://www.open-std.org/jtc1/sc22/open/n4217.pdf:
131   // The effect of calling pthread_getspecific() or pthread_setspecific() with a
132   // key value not obtained from pthread_key_create() or after key has been
133   // deleted with pthread_key_delete() is undefined.
134   //
135   // Unfortunately, POSIX doesn't define a special value of pthread_key_t
136   // indicating an invalid key which would allow us to detect accesses outside
137   // of initialized state. Hence, to prevent us from drifting into the evil
138   // realm of undefined behaviour we store whether we're somewhere between Setup
139   // and Teardown.
140   std::atomic_bool initialized_{false};
141 #endif
142 };
143 
144 using DefaultTLSSystem = PThreadTLSSystem;
145 
146 // In some scenarios, most notably when testing, the allocator and TLS system
147 // passed to |ThreadLocalStorage| are not copyable and have to be wrapped, i.e.
148 // using std::reference_wrapper. |dereference| is a small helper to retrieve the
149 // underlying value.
150 template <typename T>
dereference(T & ref)151 T& dereference(T& ref) {
152   return ref;
153 }
154 
155 template <typename T>
dereference(std::reference_wrapper<T> & ref)156 T& dereference(std::reference_wrapper<T>& ref) {
157   // std::reference_wrapper requires a valid reference for construction,
158   // therefore, no need in checking here.
159   return ref.get();
160 }
161 
162 // Store thread local data. The data is organized in chunks, where each chunk
163 // holds |ItemsPerChunk|. Each item may be free or used.
164 //
165 // When a thread requests data, the chunks are searched for a free data item,
166 // which is registered for this thread and marked as |used|. Further requests by
167 // this thread will then always return the same item. When a thread terminates,
168 // the item will be reset and return to the pool of free items.
169 //
170 // Upon construction, the first chunk is created. If a thread requests data and
171 // there is no free item available, another chunk is created. Upon destruction,
172 // all memory is freed. Pointers to data items become invalid!
173 //
174 // Constructor and destructor are not thread safe.
175 //
176 // @tparam PayloadType The item type to be stored.
177 // @tparam AllocatorType The allocator being used. An allocator must provide
178 // the following interface:
179 //  void* AllocateMemory(size_t size_in_bytes); // Allocate size_in_bytes bytes
180 //  of raw memory.
181 //  void FreeMemory(void* pointer_to_allocated, size_t size_in_bytes); // Free
182 //  the raw memory pointed to by pointer_to_allocated.
183 // Any failure in allocation or free must terminate the process.
184 // @tparam TLSSystemType The TLS system being used. A TLS system must provide
185 // the following interface:
186 //  bool Setup(OnThreadTerminationFunction thread_termination_function);
187 //  bool Destroy();
188 //  void* GetThreadSpecificData();
189 //  bool SetThreadSpecificData(void* data);
190 // @tparam AllocationChunkSize The minimum size of a memory chunk that the
191 // allocator can handle. We try to size the chunks so that each chunk uses this
192 // size to the maximum.
193 // @tparam IsDestructibleForTesting For testing purposes we allow the destructor
194 // to perform clean up upon destruction. Otherwise, using the destructor will
195 // result in a compilation failure.
196 template <typename PayloadType,
197           typename AllocatorType,
198           typename TLSSystemType,
199           size_t AllocationChunkSize,
200           bool IsDestructibleForTesting>
201 struct ThreadLocalStorage {
ThreadLocalStorageThreadLocalStorage202   explicit ThreadLocalStorage(const std::string_view instance_id)
203       : root_(AllocateAndInitializeChunk()) {
204     Initialize(instance_id);
205   }
206 
207   // Create a new instance of |ThreadLocalStorage| using the passed allocator
208   // and TLS system. This initializes the underlying TLS system and creates the
209   // first chunk of data.
ThreadLocalStorageThreadLocalStorage210   ThreadLocalStorage(const std::string_view instance_id,
211                      AllocatorType allocator,
212                      TLSSystemType tls_system)
213       : allocator_(std::move(allocator)),
214         tls_system_(std::move(tls_system)),
215         root_(AllocateAndInitializeChunk()) {
216     Initialize(instance_id);
217   }
218 
219   // Deletes an instance of |ThreadLocalStorage| and delete all the data chunks
220   // created.
~ThreadLocalStorageThreadLocalStorage221   ~ThreadLocalStorage() {
222     if constexpr (IsDestructibleForTesting) {
223       TearDownForTesting();
224     } else if constexpr (!IsDestructibleForTesting) {
225       static_assert(
226           IsDestructibleForTesting,
227           "ThreadLocalStorage cannot be destructed outside of test code.");
228     }
229   }
230 
231   // Explicitly prevent all forms of Copy/Move construction/assignment. For an
232   // exact copy of ThreadLocalStorage we would need to copy the mapping of
233   // thread to item, which we can't do at the moment. On the other side, our
234   // atomic members do not support moving out of the box.
235   ThreadLocalStorage(const ThreadLocalStorage&) = delete;
236   ThreadLocalStorage(ThreadLocalStorage&& other) = delete;
237   ThreadLocalStorage& operator=(const ThreadLocalStorage&) = delete;
238   ThreadLocalStorage& operator=(ThreadLocalStorage&&) = delete;
239 
240   // Get the data item for the current thread. If no data is registered so far,
241   // find a free item in the chunks and register it for the current thread.
GetThreadLocalDataThreadLocalStorage242   PayloadType* GetThreadLocalData() {
243     auto& tls_system = dereference(tls_system_);
244 
245     auto* slot = static_cast<SingleSlot*>(tls_system.GetThreadSpecificData());
246 
247     if (UNLIKELY(slot == nullptr)) {
248       slot = FindAndAllocateFreeSlot(root_.load(std::memory_order_relaxed));
249 
250       // We might be called in the course of handling a memory allocation. We do
251       // not use CHECK since they might allocate and cause a recursion.
252       TLS_RAW_CHECK("Failed to set thread specific data.",
253                     tls_system.SetThreadSpecificData(slot));
254 
255       // Reset the content to wipe out any previous data.
256       Reset(slot->item);
257     }
258 
259     return &(slot->item);
260   }
261 
262  private:
263   // Encapsulate the payload item and some administrative data.
264   struct SingleSlot {
265     PayloadType item;
266 #if !defined(__cpp_lib_atomic_value_initialization) || \
267     __cpp_lib_atomic_value_initialization < 201911L
268     std::atomic_flag is_used = ATOMIC_FLAG_INIT;
269 #else
270     std::atomic_flag is_used;
271 #endif
272   };
273 
274   template <size_t NumberOfItems>
275   struct ChunkT {
276     SingleSlot slots[NumberOfItems];
277     // Pointer to the next chunk.
278     std::atomic<ChunkT*> next_chunk = nullptr;
279     // Helper flag to ensure we create the next chunk only once in a multi
280     // threaded environment.
281     std::once_flag create_next_chunk_flag;
282   };
283 
284   template <size_t LowerNumberOfItems,
285             size_t UpperNumberOfItems,
286             size_t NumberOfBytes>
CalculateEffectiveNumberOfItemsBinSearchThreadLocalStorage287   static constexpr size_t CalculateEffectiveNumberOfItemsBinSearch() {
288     if constexpr (LowerNumberOfItems == UpperNumberOfItems) {
289       return LowerNumberOfItems;
290     }
291 
292     constexpr size_t CurrentNumberOfItems =
293         (UpperNumberOfItems - LowerNumberOfItems) / 2 + LowerNumberOfItems;
294 
295     if constexpr (sizeof(ChunkT<CurrentNumberOfItems>) > NumberOfBytes) {
296       return CalculateEffectiveNumberOfItemsBinSearch<
297           LowerNumberOfItems, CurrentNumberOfItems, NumberOfBytes>();
298     }
299 
300     if constexpr (sizeof(ChunkT<CurrentNumberOfItems + 1>) < NumberOfBytes) {
301       return CalculateEffectiveNumberOfItemsBinSearch<
302           CurrentNumberOfItems + 1, UpperNumberOfItems, NumberOfBytes>();
303     }
304 
305     return CurrentNumberOfItems;
306   }
307 
308   // Calculate the maximum number of items we can store in one chunk without the
309   // size of the chunk exceeding NumberOfBytes. To avoid things like alignment
310   // and packing tampering with the calculation, instead of calculating the
311   // correct number of items we use sizeof-operator against ChunkT to search for
312   // the correct size. Unfortunately, the number of recursions is limited by the
313   // compiler. Therefore, we use a binary search instead of a simple linear
314   // search.
315   template <size_t MinimumNumberOfItems, size_t NumberOfBytes>
CalculateEffectiveNumberOfItemsThreadLocalStorage316   static constexpr size_t CalculateEffectiveNumberOfItems() {
317     if constexpr (sizeof(ChunkT<MinimumNumberOfItems>) < NumberOfBytes) {
318       constexpr size_t LowerNumberOfItems = MinimumNumberOfItems;
319       constexpr size_t UpperNumberOfItems =
320           NumberOfBytes / sizeof(PayloadType) + 1;
321       return CalculateEffectiveNumberOfItemsBinSearch<
322           LowerNumberOfItems, UpperNumberOfItems, NumberOfBytes>();
323     }
324 
325     return MinimumNumberOfItems;
326   }
327 
328  public:
329   // The minimum number of items per chunk. It should be high enough to
330   // accommodate most items in the root chunk whilst not wasting to much space
331   // on unnecessary items.
332   static constexpr size_t MinimumNumberOfItemsPerChunk = 75;
333   // The effective number of items per chunk. We use the AllocationChunkSize as
334   // a hint to calculate to effective number of items so we occupy one of these
335   // memory chunks to the maximum extent possible.
336   static constexpr size_t ItemsPerChunk =
337       CalculateEffectiveNumberOfItems<MinimumNumberOfItemsPerChunk,
338                                       AllocationChunkSize>();
339 
340  private:
341   using Chunk = ChunkT<ItemsPerChunk>;
342 
343   static_assert(ItemsPerChunk >= MinimumNumberOfItemsPerChunk);
344 
345   // Mark an item's slot ready for reuse. This function is used as thread
346   // termination function in the TLS system. We do not destroy anything at this
347   // point but simply mark the slot as unused.
MarkSlotAsFreeThreadLocalStorage348   static void MarkSlotAsFree(void* data) {
349     // We always store SingleSlots in the TLS system. Therefore, we cast to
350     // SingleSlot and reset the is_used flag.
351     auto* const slot = static_cast<SingleSlot*>(data);
352 
353     // We might be called in the course of handling a memory allocation.
354     // Therefore, do not use CHECK since it might allocate and cause a
355     // recursion.
356     TLS_RAW_CHECK("Received an invalid slot.",
357                   slot && slot->is_used.test_and_set());
358 
359     slot->is_used.clear(std::memory_order_relaxed);
360   }
361 
362   // Perform common initialization during construction of an instance.
InitializeThreadLocalStorage363   void Initialize(const std::string_view instance_id) {
364     // The constructor must be called outside of the allocation path. Therefore,
365     // it is secure to verify with CHECK.
366 
367     // Passing MarkSlotAsFree as thread_termination_function we ensure the
368     // slot/item assigned to the finished thread will be returned to the pool of
369     // unused items.
370     CHECK(dereference(tls_system_).Setup(&MarkSlotAsFree, instance_id));
371   }
372 
AllocateAndInitializeChunkThreadLocalStorage373   Chunk* AllocateAndInitializeChunk() {
374     void* const uninitialized_memory =
375         dereference(allocator_).AllocateMemory(sizeof(Chunk));
376 
377     // We might be called in the course of handling a memory allocation. We do
378     // not use CHECK since they might allocate and cause a recursion.
379     TLS_RAW_CHECK("Failed to allocate memory for new chunk.",
380                   uninitialized_memory != nullptr);
381 
382     return new (uninitialized_memory) Chunk{};
383   }
384 
FreeAndDeallocateChunkForTestingThreadLocalStorage385   void FreeAndDeallocateChunkForTesting(Chunk* chunk_to_erase) {
386     chunk_to_erase->~Chunk();
387 
388     // FreeAndDeallocateChunkForTesting must be called outside of the allocation
389     // path. Therefore, it is secure to verify with CHECK.
390     CHECK(dereference(allocator_)
391               .FreeMemoryForTesting(chunk_to_erase, sizeof(Chunk)));
392   }
393 
394   // Find a free slot in the passed chunk, reserve it and return it to the
395   // caller. If no free slot can be found, head on to the next chunk. If the
396   // next chunk doesn't exist, create it.
FindAndAllocateFreeSlotThreadLocalStorage397   SingleSlot* FindAndAllocateFreeSlot(Chunk* const chunk) {
398     SingleSlot* const slot = std::find_if_not(
399         std::begin(chunk->slots), std::end(chunk->slots),
400         [](SingleSlot& candidate_slot) {
401           return candidate_slot.is_used.test_and_set(std::memory_order_relaxed);
402         });
403 
404     // So we found a slot. Happily return it to the caller.
405     if (slot != std::end(chunk->slots)) {
406       return slot;
407     }
408 
409     // Ok, there are no more free slots in this chunk. First, ensure the next
410     // chunk is valid and create one if necessary.
411     std::call_once(chunk->create_next_chunk_flag, [&] {
412       // From https://eel.is/c++draft/thread.once.callonce#3
413       //
414       // Synchronization: For any given once_­flag: all active executions occur
415       // in a total order; completion of an active execution synchronizes with
416       // the start of the next one in this total order; and the returning
417       // execution synchronizes with the return from all passive executions.
418       //
419       // Therefore, we do only a relaxed store here, call_once synchronizes with
420       // other threads.
421       chunk->next_chunk.store(AllocateAndInitializeChunk(),
422                               std::memory_order_relaxed);
423     });
424 
425     return FindAndAllocateFreeSlot(chunk->next_chunk);
426   }
427 
428   template <bool IsDestructibleForTestingP = IsDestructibleForTesting>
429   typename std::enable_if<IsDestructibleForTestingP>::type
TearDownForTestingThreadLocalStorage430   TearDownForTesting() {
431     // The destructor must be called outside of the allocation path. Therefore,
432     // it is secure to verify with CHECK.
433 
434     // All accessing threads must be terminated by now. For additional security
435     // we tear down the TLS system first. This way we ensure that
436     // MarkSlotAsFree is not called anymore and we have no accesses from the
437     // TLS system's side.
438     CHECK(dereference(tls_system_).TearDownForTesting());
439 
440     // Delete all data chunks.
441     for (auto* chunk = root_.load(); chunk != nullptr;) {
442       auto* next_chunk = chunk->next_chunk.load();
443       FreeAndDeallocateChunkForTesting(chunk);
444       chunk = next_chunk;
445     }
446   }
447 
448   // Reset a single item to its default value.
449   // Since items are re-used, they may be accessed from different threads,
450   // causing TSan to trigger. Therefore, the reset is exempt from TSan
451   // instrumentation.
ResetThreadLocalStorage452   DISABLE_TSAN_INSTRUMENTATION void Reset(PayloadType& item) { item = {}; }
453 
454   AllocatorType allocator_;
455   TLSSystemType tls_system_;
456   std::atomic<Chunk*> const root_;
457 };
458 
459 }  // namespace internal
460 
461 // The ThreadLocalStorage visible to the user. This uses the internal default
462 // allocator and TLS system.
463 template <typename StorageType,
464           typename AllocatorType = internal::DefaultAllocator,
465           typename TLSSystemType = internal::DefaultTLSSystem,
466           size_t AllocationChunkSize = AllocatorType::AllocationChunkSize,
467           bool IsDestructibleForTesting = false>
468 using ThreadLocalStorage =
469     internal::ThreadLocalStorage<StorageType,
470                                  AllocatorType,
471                                  TLSSystemType,
472                                  AllocationChunkSize,
473                                  IsDestructibleForTesting>;
474 
475 }  // namespace base::allocator::dispatcher
476 
477 #undef TLS_RAW_CHECK_IMPL
478 #undef TLS_RAW_CHECK
479 #undef STR
480 #undef STR_HELPER
481 
482 #endif  // USE_LOCAL_TLS_EMULATION()
483 #endif  // BASE_ALLOCATOR_DISPATCHER_TLS_H_
484