xref: /aosp_15_r20/external/cronet/base/allocator/partition_allocator/src/partition_alloc/partition_root.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2020 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "partition_alloc/partition_root.h"
6 
7 #include <cstdint>
8 
9 #include "build/build_config.h"
10 #include "partition_alloc/freeslot_bitmap.h"
11 #include "partition_alloc/in_slot_metadata.h"
12 #include "partition_alloc/oom.h"
13 #include "partition_alloc/page_allocator.h"
14 #include "partition_alloc/partition_address_space.h"
15 #include "partition_alloc/partition_alloc-inl.h"
16 #include "partition_alloc/partition_alloc_base/bits.h"
17 #include "partition_alloc/partition_alloc_base/compiler_specific.h"
18 #include "partition_alloc/partition_alloc_base/component_export.h"
19 #include "partition_alloc/partition_alloc_base/debug/debugging_buildflags.h"
20 #include "partition_alloc/partition_alloc_base/thread_annotations.h"
21 #include "partition_alloc/partition_alloc_buildflags.h"
22 #include "partition_alloc/partition_alloc_check.h"
23 #include "partition_alloc/partition_alloc_config.h"
24 #include "partition_alloc/partition_alloc_constants.h"
25 #include "partition_alloc/partition_bucket.h"
26 #include "partition_alloc/partition_cookie.h"
27 #include "partition_alloc/partition_oom.h"
28 #include "partition_alloc/partition_page.h"
29 #include "partition_alloc/reservation_offset_table.h"
30 #include "partition_alloc/tagging.h"
31 #include "partition_alloc/thread_isolation/thread_isolation.h"
32 
33 #if BUILDFLAG(IS_MAC)
34 #include "partition_alloc/partition_alloc_base/mac/mac_util.h"
35 #endif
36 
37 #if BUILDFLAG(USE_STARSCAN)
38 #include "partition_alloc/starscan/pcscan.h"
39 #endif
40 
41 #if !BUILDFLAG(HAS_64_BIT_POINTERS)
42 #include "partition_alloc/address_pool_manager_bitmap.h"
43 #endif
44 
45 #if BUILDFLAG(IS_WIN)
46 #include <windows.h>
47 
48 #include "wow64apiset.h"
49 #endif
50 
51 #if BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS)
52 #include <pthread.h>
53 #endif
54 
55 namespace partition_alloc::internal {
56 
57 #if BUILDFLAG(RECORD_ALLOC_INFO)
58 // Even if this is not hidden behind a BUILDFLAG, it should not use any memory
59 // when recording is disabled, since it ends up in the .bss section.
60 AllocInfo g_allocs = {};
61 
RecordAllocOrFree(uintptr_t addr,size_t size)62 void RecordAllocOrFree(uintptr_t addr, size_t size) {
63   g_allocs.allocs[g_allocs.index.fetch_add(1, std::memory_order_relaxed) %
64                   kAllocInfoSize] = {addr, size};
65 }
66 #endif  // BUILDFLAG(RECORD_ALLOC_INFO)
67 
68 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
IsPtrWithinSameAlloc(uintptr_t orig_address,uintptr_t test_address,size_t type_size)69 PtrPosWithinAlloc IsPtrWithinSameAlloc(uintptr_t orig_address,
70                                        uintptr_t test_address,
71                                        size_t type_size) {
72   PA_DCHECK(IsManagedByNormalBucketsOrDirectMap(orig_address));
73   DCheckIfManagedByPartitionAllocBRPPool(orig_address);
74 
75   auto [slot_start, _] =
76       PartitionAllocGetSlotStartAndSizeInBRPPool(orig_address);
77   // Don't use |orig_address| beyond this point at all. It was needed to
78   // pick the right slot, but now we're dealing with very concrete addresses.
79   // Zero it just in case, to catch errors.
80   orig_address = 0;
81 
82   auto* slot_span = SlotSpanMetadata::FromSlotStart(slot_start);
83   auto* root = PartitionRoot::FromSlotSpanMetadata(slot_span);
84   // Double check that in-slot metadata is indeed present. Currently that's the
85   // case only when BRP is used.
86   PA_DCHECK(root->brp_enabled());
87 
88   uintptr_t object_addr = root->SlotStartToObjectAddr(slot_start);
89   uintptr_t object_end = object_addr + root->GetSlotUsableSize(slot_span);
90   if (test_address < object_addr || object_end < test_address) {
91     return PtrPosWithinAlloc::kFarOOB;
92 #if BUILDFLAG(BACKUP_REF_PTR_POISON_OOB_PTR)
93   } else if (object_end - type_size < test_address) {
94     // Not even a single element of the type referenced by the pointer can fit
95     // between the pointer and the end of the object.
96     return PtrPosWithinAlloc::kAllocEnd;
97 #endif
98   } else {
99     return PtrPosWithinAlloc::kInBounds;
100   }
101 }
102 #endif  // BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
103 
104 }  // namespace partition_alloc::internal
105 
106 namespace partition_alloc {
107 
108 #if PA_CONFIG(USE_PARTITION_ROOT_ENUMERATOR)
109 
110 namespace {
111 internal::Lock g_root_enumerator_lock;
112 }
113 
GetEnumeratorLock()114 internal::Lock& PartitionRoot::GetEnumeratorLock() {
115   return g_root_enumerator_lock;
116 }
117 
118 namespace internal {
119 
120 class PartitionRootEnumerator {
121  public:
122   using EnumerateCallback = void (*)(PartitionRoot* root, bool in_child);
123   enum EnumerateOrder {
124     kNormal,
125     kReverse,
126   };
127 
Instance()128   static PartitionRootEnumerator& Instance() {
129     static PartitionRootEnumerator instance;
130     return instance;
131   }
132 
Enumerate(EnumerateCallback callback,bool in_child,EnumerateOrder order)133   void Enumerate(EnumerateCallback callback,
134                  bool in_child,
135                  EnumerateOrder order) PA_NO_THREAD_SAFETY_ANALYSIS {
136     if (order == kNormal) {
137       PartitionRoot* root;
138       for (root = Head(partition_roots_); root != nullptr;
139            root = root->next_root) {
140         callback(root, in_child);
141       }
142     } else {
143       PA_DCHECK(order == kReverse);
144       PartitionRoot* root;
145       for (root = Tail(partition_roots_); root != nullptr;
146            root = root->prev_root) {
147         callback(root, in_child);
148       }
149     }
150   }
151 
Register(PartitionRoot * root)152   void Register(PartitionRoot* root) {
153     internal::ScopedGuard guard(PartitionRoot::GetEnumeratorLock());
154     root->next_root = partition_roots_;
155     root->prev_root = nullptr;
156     if (partition_roots_) {
157       partition_roots_->prev_root = root;
158     }
159     partition_roots_ = root;
160   }
161 
Unregister(PartitionRoot * root)162   void Unregister(PartitionRoot* root) {
163     internal::ScopedGuard guard(PartitionRoot::GetEnumeratorLock());
164     PartitionRoot* prev = root->prev_root;
165     PartitionRoot* next = root->next_root;
166     if (prev) {
167       PA_DCHECK(prev->next_root == root);
168       prev->next_root = next;
169     } else {
170       PA_DCHECK(partition_roots_ == root);
171       partition_roots_ = next;
172     }
173     if (next) {
174       PA_DCHECK(next->prev_root == root);
175       next->prev_root = prev;
176     }
177     root->next_root = nullptr;
178     root->prev_root = nullptr;
179   }
180 
181  private:
182   constexpr PartitionRootEnumerator() = default;
183 
Head(PartitionRoot * roots)184   PartitionRoot* Head(PartitionRoot* roots) { return roots; }
185 
Tail(PartitionRoot * roots)186   PartitionRoot* Tail(PartitionRoot* roots) PA_NO_THREAD_SAFETY_ANALYSIS {
187     if (!roots) {
188       return nullptr;
189     }
190     PartitionRoot* node = roots;
191     for (; node->next_root != nullptr; node = node->next_root)
192       ;
193     return node;
194   }
195 
196   PartitionRoot* partition_roots_
197       PA_GUARDED_BY(PartitionRoot::GetEnumeratorLock()) = nullptr;
198 };
199 
200 }  // namespace internal
201 
202 #endif  // PA_USE_PARTITION_ROOT_ENUMERATOR
203 
204 #if BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
205 
206 namespace {
207 
208 #if PA_CONFIG(HAS_ATFORK_HANDLER)
209 
LockRoot(PartitionRoot * root,bool)210 void LockRoot(PartitionRoot* root, bool) PA_NO_THREAD_SAFETY_ANALYSIS {
211   PA_DCHECK(root);
212   internal::PartitionRootLock(root).Acquire();
213 }
214 
215 // PA_NO_THREAD_SAFETY_ANALYSIS: acquires the lock and doesn't release it, by
216 // design.
BeforeForkInParent()217 void BeforeForkInParent() PA_NO_THREAD_SAFETY_ANALYSIS {
218   // PartitionRoot::GetLock() is private. So use
219   // g_root_enumerator_lock here.
220   g_root_enumerator_lock.Acquire();
221   internal::PartitionRootEnumerator::Instance().Enumerate(
222       LockRoot, false,
223       internal::PartitionRootEnumerator::EnumerateOrder::kNormal);
224 
225   ThreadCacheRegistry::GetLock().Acquire();
226 }
227 
228 template <typename T>
UnlockOrReinit(T & lock,bool in_child)229 void UnlockOrReinit(T& lock, bool in_child) PA_NO_THREAD_SAFETY_ANALYSIS {
230   // Only re-init the locks in the child process, in the parent can unlock
231   // normally.
232   if (in_child) {
233     lock.Reinit();
234   } else {
235     lock.Release();
236   }
237 }
238 
UnlockOrReinitRoot(PartitionRoot * root,bool in_child)239 void UnlockOrReinitRoot(PartitionRoot* root,
240                         bool in_child) PA_NO_THREAD_SAFETY_ANALYSIS {
241   UnlockOrReinit(internal::PartitionRootLock(root), in_child);
242 }
243 
ReleaseLocks(bool in_child)244 void ReleaseLocks(bool in_child) PA_NO_THREAD_SAFETY_ANALYSIS {
245   // In reverse order, even though there are no lock ordering dependencies.
246   UnlockOrReinit(ThreadCacheRegistry::GetLock(), in_child);
247   internal::PartitionRootEnumerator::Instance().Enumerate(
248       UnlockOrReinitRoot, in_child,
249       internal::PartitionRootEnumerator::EnumerateOrder::kReverse);
250 
251   // PartitionRoot::GetLock() is private. So use
252   // g_root_enumerator_lock here.
253   UnlockOrReinit(g_root_enumerator_lock, in_child);
254 }
255 
AfterForkInParent()256 void AfterForkInParent() {
257   ReleaseLocks(/* in_child = */ false);
258 }
259 
AfterForkInChild()260 void AfterForkInChild() {
261   ReleaseLocks(/* in_child = */ true);
262   // Unsafe, as noted in the name. This is fine here however, since at this
263   // point there is only one thread, this one (unless another post-fork()
264   // handler created a thread, but it would have needed to allocate, which would
265   // have deadlocked the process already).
266   //
267   // If we don't reclaim this memory, it is lost forever. Note that this is only
268   // really an issue if we fork() a multi-threaded process without calling
269   // exec() right away, which is discouraged.
270   ThreadCacheRegistry::Instance().ForcePurgeAllThreadAfterForkUnsafe();
271 }
272 #endif  // PA_CONFIG(HAS_ATFORK_HANDLER)
273 
274 std::atomic<bool> g_global_init_called;
PartitionAllocMallocInitOnce()275 void PartitionAllocMallocInitOnce() {
276   bool expected = false;
277   // No need to block execution for potential concurrent initialization, merely
278   // want to make sure this is only called once.
279   if (!g_global_init_called.compare_exchange_strong(expected, true)) {
280     return;
281   }
282 
283 #if BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS)
284   // When fork() is called, only the current thread continues to execute in the
285   // child process. If the lock is held, but *not* by this thread when fork() is
286   // called, we have a deadlock.
287   //
288   // The "solution" here is to acquire the lock on the forking thread before
289   // fork(), and keep it held until fork() is done, in the parent and the
290   // child. To clean up memory, we also must empty the thread caches in the
291   // child, which is easier, since no threads except for the current one are
292   // running right after the fork().
293   //
294   // This is not perfect though, since:
295   // - Multiple pre/post-fork() handlers can be registered, they are then run in
296   //   LIFO order for the pre-fork handler, and FIFO order for the post-fork
297   //   one. So unless we are the first to register a handler, if another handler
298   //   allocates, then we deterministically deadlock.
299   // - pthread handlers are *not* called when the application calls clone()
300   //   directly, which is what Chrome does to launch processes.
301   //
302   // However, no perfect solution really exists to make threads + fork()
303   // cooperate, but deadlocks are real (and fork() is used in DEATH_TEST()s),
304   // and other malloc() implementations use the same techniques.
305   int err =
306       pthread_atfork(BeforeForkInParent, AfterForkInParent, AfterForkInChild);
307   PA_CHECK(err == 0);
308 #endif  // BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS)
309 }
310 
311 }  // namespace
312 
313 #if BUILDFLAG(IS_APPLE)
PartitionAllocMallocHookOnBeforeForkInParent()314 void PartitionAllocMallocHookOnBeforeForkInParent() {
315   BeforeForkInParent();
316 }
317 
PartitionAllocMallocHookOnAfterForkInParent()318 void PartitionAllocMallocHookOnAfterForkInParent() {
319   AfterForkInParent();
320 }
321 
PartitionAllocMallocHookOnAfterForkInChild()322 void PartitionAllocMallocHookOnAfterForkInChild() {
323   AfterForkInChild();
324 }
325 #endif  // BUILDFLAG(IS_APPLE)
326 
327 #endif  // BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
328 
329 namespace internal {
330 
331 namespace {
332 // 64 was chosen arbitrarily, as it seems like a reasonable trade-off between
333 // performance and purging opportunity. Higher value (i.e. smaller slots)
334 // wouldn't necessarily increase chances of purging, but would result in
335 // more work and larger |slot_usage| array. Lower value would probably decrease
336 // chances of purging. Not empirically tested.
337 constexpr size_t kMaxPurgeableSlotsPerSystemPage = 64;
338 PA_ALWAYS_INLINE PAGE_ALLOCATOR_CONSTANTS_DECLARE_CONSTEXPR size_t
MinPurgeableSlotSize()339 MinPurgeableSlotSize() {
340   return SystemPageSize() / kMaxPurgeableSlotsPerSystemPage;
341 }
342 }  // namespace
343 
344 // The function attempts to unprovision unused slots and discard unused pages.
345 // It may also "straighten" the free list.
346 //
347 // If `accounting_only` is set to true, no action is performed and the function
348 // merely returns the number of bytes in the would-be discarded pages.
PartitionPurgeSlotSpan(PartitionRoot * root,internal::SlotSpanMetadata * slot_span,bool accounting_only)349 static size_t PartitionPurgeSlotSpan(PartitionRoot* root,
350                                      internal::SlotSpanMetadata* slot_span,
351                                      bool accounting_only)
352     PA_EXCLUSIVE_LOCKS_REQUIRED(internal::PartitionRootLock(root)) {
353   const internal::PartitionBucket* bucket = slot_span->bucket;
354   size_t slot_size = bucket->slot_size;
355 
356   if (slot_size < MinPurgeableSlotSize() || !slot_span->num_allocated_slots) {
357     return 0;
358   }
359 
360   size_t bucket_num_slots = bucket->get_slots_per_span();
361   size_t discardable_bytes = 0;
362 
363   if (slot_span->CanStoreRawSize()) {
364     uint32_t utilized_slot_size = static_cast<uint32_t>(
365         RoundUpToSystemPage(slot_span->GetUtilizedSlotSize()));
366     discardable_bytes = bucket->slot_size - utilized_slot_size;
367     if (discardable_bytes && !accounting_only) {
368       uintptr_t slot_span_start =
369           internal::SlotSpanMetadata::ToSlotSpanStart(slot_span);
370       uintptr_t committed_data_end = slot_span_start + utilized_slot_size;
371       ScopedSyscallTimer timer{root};
372       DiscardSystemPages(committed_data_end, discardable_bytes);
373     }
374     return discardable_bytes;
375   }
376 
377 #if defined(PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR)
378   constexpr size_t kMaxSlotCount =
379       (PartitionPageSize() * kMaxPartitionPagesPerRegularSlotSpan) /
380       MinPurgeableSlotSize();
381 #elif BUILDFLAG(IS_APPLE) || \
382     defined(PARTITION_ALLOCATOR_CONSTANTS_POSIX_NONCONST_PAGE_SIZE)
383   // It's better for slot_usage to be stack-allocated and fixed-size, which
384   // demands that its size be constexpr. On IS_APPLE and Linux on arm64,
385   // PartitionPageSize() is always SystemPageSize() << 2, so regardless of
386   // what the run time page size is, kMaxSlotCount can always be simplified
387   // to this expression.
388   constexpr size_t kMaxSlotCount =
389       4 * kMaxPurgeableSlotsPerSystemPage *
390       internal::kMaxPartitionPagesPerRegularSlotSpan;
391   PA_CHECK(kMaxSlotCount == (PartitionPageSize() *
392                              internal::kMaxPartitionPagesPerRegularSlotSpan) /
393                                 MinPurgeableSlotSize());
394 #endif
395   PA_DCHECK(bucket_num_slots <= kMaxSlotCount);
396   PA_DCHECK(slot_span->num_unprovisioned_slots < bucket_num_slots);
397   size_t num_provisioned_slots =
398       bucket_num_slots - slot_span->num_unprovisioned_slots;
399   char slot_usage[kMaxSlotCount];
400 #if !BUILDFLAG(IS_WIN)
401   // The last freelist entry should not be discarded when using OS_WIN.
402   // DiscardVirtualMemory makes the contents of discarded memory undefined.
403   size_t last_slot = static_cast<size_t>(-1);
404 #endif
405   memset(slot_usage, 1, num_provisioned_slots);
406   uintptr_t slot_span_start = SlotSpanMetadata::ToSlotSpanStart(slot_span);
407   // First, walk the freelist for this slot span and make a bitmap of which
408   // slots are not in use.
409   const PartitionFreelistDispatcher* freelist_dispatcher =
410       root->get_freelist_dispatcher();
411 
412   for (PartitionFreelistEntry* entry = slot_span->get_freelist_head(); entry;
413        entry = freelist_dispatcher->GetNext(entry, slot_size)) {
414     size_t slot_number =
415         bucket->GetSlotNumber(SlotStartPtr2Addr(entry) - slot_span_start);
416     PA_DCHECK(slot_number < num_provisioned_slots);
417     slot_usage[slot_number] = 0;
418 #if !BUILDFLAG(IS_WIN)
419     // If we have a slot where the encoded next pointer is 0, we can actually
420     // discard that entry because touching a discarded page is guaranteed to
421     // return the original content or 0. (Note that this optimization won't be
422     // effective on big-endian machines because the masking function is
423     // negation.)
424     if (freelist_dispatcher->IsEncodedNextPtrZero(entry)) {
425       last_slot = slot_number;
426     }
427 #endif
428   }
429 
430   // If the slot(s) at the end of the slot span are not in use, we can truncate
431   // them entirely and rewrite the freelist.
432   size_t truncated_slots = 0;
433   while (!slot_usage[num_provisioned_slots - 1]) {
434     truncated_slots++;
435     num_provisioned_slots--;
436     PA_DCHECK(num_provisioned_slots);
437   }
438   // First, do the work of calculating the discardable bytes. Don't actually
439   // discard anything if `accounting_only` is set.
440   size_t unprovisioned_bytes = 0;
441   uintptr_t begin_addr = slot_span_start + (num_provisioned_slots * slot_size);
442   uintptr_t end_addr = begin_addr + (slot_size * truncated_slots);
443   if (truncated_slots) {
444     // The slots that do not contain discarded pages should not be included to
445     // |truncated_slots|. Detects those slots and fixes |truncated_slots| and
446     // |num_provisioned_slots| accordingly.
447     uintptr_t rounded_up_truncatation_begin_addr =
448         RoundUpToSystemPage(begin_addr);
449     while (begin_addr + slot_size <= rounded_up_truncatation_begin_addr) {
450       begin_addr += slot_size;
451       PA_DCHECK(truncated_slots);
452       --truncated_slots;
453       ++num_provisioned_slots;
454     }
455     begin_addr = rounded_up_truncatation_begin_addr;
456 
457     // We round the end address here up and not down because we're at the end of
458     // a slot span, so we "own" all the way up the page boundary.
459     end_addr = RoundUpToSystemPage(end_addr);
460     PA_DCHECK(end_addr <= slot_span_start + bucket->get_bytes_per_span());
461     if (begin_addr < end_addr) {
462       unprovisioned_bytes = end_addr - begin_addr;
463       discardable_bytes += unprovisioned_bytes;
464     }
465   }
466 
467   // If `accounting_only` isn't set, then take action to remove unprovisioned
468   // slots from the free list (if any) and "straighten" the list (if
469   // requested) to help reduce fragmentation in the future. Then
470   // discard/decommit the pages hosting the unprovisioned slots.
471   if (!accounting_only) {
472     auto straighten_mode =
473         PartitionRoot::GetStraightenLargerSlotSpanFreeListsMode();
474     bool straighten =
475         straighten_mode == StraightenLargerSlotSpanFreeListsMode::kAlways ||
476         (straighten_mode ==
477              StraightenLargerSlotSpanFreeListsMode::kOnlyWhenUnprovisioning &&
478          unprovisioned_bytes);
479 
480     PA_DCHECK((unprovisioned_bytes > 0) == (truncated_slots > 0));
481     size_t new_unprovisioned_slots =
482         truncated_slots + slot_span->num_unprovisioned_slots;
483     PA_DCHECK(new_unprovisioned_slots <= bucket->get_slots_per_span());
484     slot_span->num_unprovisioned_slots = new_unprovisioned_slots;
485 
486     size_t num_new_freelist_entries = 0;
487     internal::PartitionFreelistEntry* back = nullptr;
488     if (straighten) {
489       // Rewrite the freelist to "straighten" it. This achieves two things:
490       // getting rid of unprovisioned entries, ordering etnries based on how
491       // close they're to the slot span start. This reduces chances of
492       // allocating further slots, in hope that we'll get some unused pages at
493       // the end of the span that can be unprovisioned, thus reducing
494       // fragmentation.
495       for (size_t slot_index = 0; slot_index < num_provisioned_slots;
496            ++slot_index) {
497         if (slot_usage[slot_index]) {
498           continue;
499         }
500         // Add the slot to the end of the list. The most proper thing to do
501         // would be to null-terminate the new entry with:
502         //   auto* entry = PartitionFreelistEntry::EmplaceAndInitNull(
503         //       slot_span_start + (slot_size * slot_index));
504         // But no need to do this, as it's last-ness is likely temporary, and
505         // the next iteration's back->SetNext(), or the post-loop
506         // PartitionFreelistEntry::EmplaceAndInitNull(back) will override it
507         // anyway.
508         auto* entry = static_cast<PartitionFreelistEntry*>(
509             SlotStartAddr2Ptr(slot_span_start + (slot_size * slot_index)));
510         if (num_new_freelist_entries) {
511           freelist_dispatcher->SetNext(back, entry);
512         } else {
513           slot_span->SetFreelistHead(entry);
514         }
515         back = entry;
516         num_new_freelist_entries++;
517       }
518     } else if (unprovisioned_bytes) {
519       // If there are any unprovisioned entries, scan the list to remove them,
520       // without "straightening" it.
521       uintptr_t first_unprovisioned_slot =
522           slot_span_start + (num_provisioned_slots * slot_size);
523       bool skipped = false;
524       for (PartitionFreelistEntry* entry = slot_span->get_freelist_head();
525            entry; entry = freelist_dispatcher->GetNext(entry, slot_size)) {
526         uintptr_t entry_addr = SlotStartPtr2Addr(entry);
527         if (entry_addr >= first_unprovisioned_slot) {
528           skipped = true;
529           continue;
530         }
531         // If the last visited entry was skipped (due to being unprovisioned),
532         // update the next pointer of the last not skipped entry (or the head
533         // if no entry exists). Otherwise the link is already correct.
534         if (skipped) {
535           if (num_new_freelist_entries) {
536             freelist_dispatcher->SetNext(back, entry);
537           } else {
538             slot_span->SetFreelistHead(entry);
539           }
540           skipped = false;
541         }
542         back = entry;
543         num_new_freelist_entries++;
544       }
545     }
546     // If any of the above loops were executed, null-terminate the last entry,
547     // or the head if no entry exists.
548     if (straighten || unprovisioned_bytes) {
549       if (num_new_freelist_entries) {
550         PA_DCHECK(back);
551         freelist_dispatcher->EmplaceAndInitNull(back);
552 #if !BUILDFLAG(IS_WIN)
553         // Memorize index of the last slot in the list, as it may be able to
554         // participate in an optimization related to page discaring (below), due
555         // to its next pointer encoded as 0.
556         last_slot =
557             bucket->GetSlotNumber(SlotStartPtr2Addr(back) - slot_span_start);
558 #endif
559       } else {
560         PA_DCHECK(!back);
561         slot_span->SetFreelistHead(nullptr);
562       }
563       PA_DCHECK(num_new_freelist_entries ==
564                 num_provisioned_slots - slot_span->num_allocated_slots);
565     }
566 
567 #if BUILDFLAG(USE_FREESLOT_BITMAP)
568     FreeSlotBitmapReset(slot_span_start + (slot_size * num_provisioned_slots),
569                         end_addr, slot_size);
570 #endif
571 
572     if (unprovisioned_bytes) {
573       if (!kUseLazyCommit) {
574         // Discard the memory.
575         ScopedSyscallTimer timer{root};
576         DiscardSystemPages(begin_addr, unprovisioned_bytes);
577       } else {
578         // See crbug.com/1431606 to understand the detail. LazyCommit depends
579         // on the design: both used slots and unused slots (=in the freelist)
580         // are committed. However this removes the unused slots from the
581         // freelist. So if using DiscardSystemPages() here, PartitionAlloc may
582         // commit the system pages which has been already committed again.
583         // This will make commited_size and max_committed_size metrics wrong.
584         // PA should use DecommitSystemPagesForData() instead.
585         root->DecommitSystemPagesForData(
586             begin_addr, unprovisioned_bytes,
587             PageAccessibilityDisposition::kAllowKeepForPerf);
588       }
589     }
590   }
591 
592   if (slot_size < SystemPageSize()) {
593     // Returns here because implementing the following steps for smaller slot
594     // size will need a complicated logic and make the code messy.
595     return discardable_bytes;
596   }
597 
598   // Next, walk the slots and for any not in use, consider which system pages
599   // are no longer needed. We can discard any system pages back to the system as
600   // long as we don't interfere with a freelist pointer or an adjacent used
601   // slot. Note they'll be automatically paged back in when touched, and
602   // zero-initialized (except Windows).
603   for (size_t i = 0; i < num_provisioned_slots; ++i) {
604     if (slot_usage[i]) {
605       continue;
606     }
607 
608     // The first address we can safely discard is just after the freelist
609     // pointer. There's one optimization opportunity: if the freelist pointer is
610     // encoded as 0, we can discard that pointer value too (except on
611     // Windows).
612     begin_addr = slot_span_start + (i * slot_size);
613     end_addr = begin_addr + slot_size;
614     bool can_discard_free_list_pointer = false;
615 #if !BUILDFLAG(IS_WIN)
616     if (i != last_slot) {
617       begin_addr += sizeof(internal::PartitionFreelistEntry);
618     } else {
619       can_discard_free_list_pointer = true;
620     }
621 #else
622     begin_addr += sizeof(internal::PartitionFreelistEntry);
623 #endif
624 
625     uintptr_t rounded_up_begin_addr = RoundUpToSystemPage(begin_addr);
626     uintptr_t rounded_down_begin_addr = RoundDownToSystemPage(begin_addr);
627     end_addr = RoundDownToSystemPage(end_addr);
628 
629     // |rounded_up_begin_addr| could be greater than |end_addr| only if slot
630     // size was less than system page size, or if free list pointer crossed the
631     // page boundary. Neither is possible here.
632     PA_DCHECK(rounded_up_begin_addr <= end_addr);
633 
634     if (rounded_down_begin_addr < rounded_up_begin_addr && i != 0 &&
635         !slot_usage[i - 1] && can_discard_free_list_pointer) {
636       // This slot contains a partial page in the beginning. The rest of that
637       // page is contained in the slot[i-1], which is also discardable.
638       // Therefore we can discard this page.
639       begin_addr = rounded_down_begin_addr;
640     } else {
641       begin_addr = rounded_up_begin_addr;
642     }
643 
644     if (begin_addr < end_addr) {
645       size_t partial_slot_bytes = end_addr - begin_addr;
646       discardable_bytes += partial_slot_bytes;
647       if (!accounting_only) {
648         // Discard the pages. But don't be tempted to decommit it (as done
649         // above), because here we're getting rid of provisioned pages amidst
650         // used pages, so we're relying on them to materialize automatically
651         // when the virtual address is accessed, so the mapping needs to be
652         // intact.
653         ScopedSyscallTimer timer{root};
654         DiscardSystemPages(begin_addr, partial_slot_bytes);
655       }
656     }
657   }
658 
659   return discardable_bytes;
660 }
661 
PartitionPurgeBucket(PartitionRoot * root,internal::PartitionBucket * bucket)662 static void PartitionPurgeBucket(PartitionRoot* root,
663                                  internal::PartitionBucket* bucket)
664     PA_EXCLUSIVE_LOCKS_REQUIRED(internal::PartitionRootLock(root)) {
665   if (bucket->active_slot_spans_head !=
666       internal::SlotSpanMetadata::get_sentinel_slot_span()) {
667     for (internal::SlotSpanMetadata* slot_span = bucket->active_slot_spans_head;
668          slot_span; slot_span = slot_span->next_slot_span) {
669       PA_DCHECK(slot_span !=
670                 internal::SlotSpanMetadata::get_sentinel_slot_span());
671       PartitionPurgeSlotSpan(root, slot_span, false);
672     }
673   }
674 }
675 
PartitionDumpSlotSpanStats(PartitionBucketMemoryStats * stats_out,PartitionRoot * root,internal::SlotSpanMetadata * slot_span)676 static void PartitionDumpSlotSpanStats(PartitionBucketMemoryStats* stats_out,
677                                        PartitionRoot* root,
678                                        internal::SlotSpanMetadata* slot_span)
679     PA_EXCLUSIVE_LOCKS_REQUIRED(internal::PartitionRootLock(root)) {
680   uint16_t bucket_num_slots = slot_span->bucket->get_slots_per_span();
681 
682   if (slot_span->is_decommitted()) {
683     ++stats_out->num_decommitted_slot_spans;
684     return;
685   }
686 
687   stats_out->discardable_bytes += PartitionPurgeSlotSpan(root, slot_span, true);
688 
689   if (slot_span->CanStoreRawSize()) {
690     stats_out->active_bytes += static_cast<uint32_t>(slot_span->GetRawSize());
691   } else {
692     stats_out->active_bytes +=
693         (slot_span->num_allocated_slots * stats_out->bucket_slot_size);
694   }
695   stats_out->active_count += slot_span->num_allocated_slots;
696 
697   size_t slot_span_bytes_resident = RoundUpToSystemPage(
698       (bucket_num_slots - slot_span->num_unprovisioned_slots) *
699       stats_out->bucket_slot_size);
700   stats_out->resident_bytes += slot_span_bytes_resident;
701   if (slot_span->is_empty()) {
702     stats_out->decommittable_bytes += slot_span_bytes_resident;
703     ++stats_out->num_empty_slot_spans;
704   } else if (slot_span->is_full()) {
705     ++stats_out->num_full_slot_spans;
706   } else {
707     PA_DCHECK(slot_span->is_active());
708     ++stats_out->num_active_slot_spans;
709   }
710 }
711 
PartitionDumpBucketStats(PartitionBucketMemoryStats * stats_out,PartitionRoot * root,const internal::PartitionBucket * bucket)712 static void PartitionDumpBucketStats(PartitionBucketMemoryStats* stats_out,
713                                      PartitionRoot* root,
714                                      const internal::PartitionBucket* bucket)
715     PA_EXCLUSIVE_LOCKS_REQUIRED(internal::PartitionRootLock(root)) {
716   PA_DCHECK(!bucket->is_direct_mapped());
717   stats_out->is_valid = false;
718   // If the active slot span list is empty (==
719   // internal::SlotSpanMetadata::get_sentinel_slot_span()), the bucket might
720   // still need to be reported if it has a list of empty, decommitted or full
721   // slot spans.
722   if (bucket->active_slot_spans_head ==
723           internal::SlotSpanMetadata::get_sentinel_slot_span() &&
724       !bucket->empty_slot_spans_head && !bucket->decommitted_slot_spans_head &&
725       !bucket->num_full_slot_spans) {
726     return;
727   }
728 
729   memset(stats_out, '\0', sizeof(*stats_out));
730   stats_out->is_valid = true;
731   stats_out->is_direct_map = false;
732   stats_out->num_full_slot_spans =
733       static_cast<size_t>(bucket->num_full_slot_spans);
734   stats_out->bucket_slot_size = bucket->slot_size;
735   uint16_t bucket_num_slots = bucket->get_slots_per_span();
736   size_t bucket_useful_storage = stats_out->bucket_slot_size * bucket_num_slots;
737   stats_out->allocated_slot_span_size = bucket->get_bytes_per_span();
738   stats_out->active_bytes = bucket->num_full_slot_spans * bucket_useful_storage;
739   stats_out->active_count = bucket->num_full_slot_spans * bucket_num_slots;
740   stats_out->resident_bytes =
741       bucket->num_full_slot_spans * stats_out->allocated_slot_span_size;
742 
743   for (internal::SlotSpanMetadata* slot_span = bucket->empty_slot_spans_head;
744        slot_span; slot_span = slot_span->next_slot_span) {
745     PA_DCHECK(slot_span->is_empty() || slot_span->is_decommitted());
746     PartitionDumpSlotSpanStats(stats_out, root, slot_span);
747   }
748   for (internal::SlotSpanMetadata* slot_span =
749            bucket->decommitted_slot_spans_head;
750        slot_span; slot_span = slot_span->next_slot_span) {
751     PA_DCHECK(slot_span->is_decommitted());
752     PartitionDumpSlotSpanStats(stats_out, root, slot_span);
753   }
754 
755   if (bucket->active_slot_spans_head !=
756       internal::SlotSpanMetadata::get_sentinel_slot_span()) {
757     for (internal::SlotSpanMetadata* slot_span = bucket->active_slot_spans_head;
758          slot_span; slot_span = slot_span->next_slot_span) {
759       PA_DCHECK(slot_span !=
760                 internal::SlotSpanMetadata::get_sentinel_slot_span());
761       PartitionDumpSlotSpanStats(stats_out, root, slot_span);
762     }
763   }
764 }
765 
766 #if BUILDFLAG(PA_DCHECK_IS_ON)
DCheckIfManagedByPartitionAllocBRPPool(uintptr_t address)767 void DCheckIfManagedByPartitionAllocBRPPool(uintptr_t address) {
768   PA_DCHECK(IsManagedByPartitionAllocBRPPool(address));
769 }
770 #endif
771 
772 #if BUILDFLAG(ENABLE_THREAD_ISOLATION)
PartitionAllocThreadIsolationInit(ThreadIsolationOption thread_isolation)773 void PartitionAllocThreadIsolationInit(ThreadIsolationOption thread_isolation) {
774 #if BUILDFLAG(PA_DCHECK_IS_ON)
775   ThreadIsolationSettings::settings.enabled = true;
776 #endif
777   PartitionAddressSpace::InitThreadIsolatedPool(thread_isolation);
778   // Call WriteProtectThreadIsolatedGlobals last since we might not have write
779   // permissions to to globals afterwards.
780   WriteProtectThreadIsolatedGlobals(thread_isolation);
781 }
782 #endif  // BUILDFLAG(ENABLE_THREAD_ISOLATION)
783 
784 }  // namespace internal
785 
OutOfMemory(size_t size)786 [[noreturn]] PA_NOINLINE void PartitionRoot::OutOfMemory(size_t size) {
787   const size_t virtual_address_space_size =
788       total_size_of_super_pages.load(std::memory_order_relaxed) +
789       total_size_of_direct_mapped_pages.load(std::memory_order_relaxed);
790 #if !defined(ARCH_CPU_64_BITS)
791   const size_t uncommitted_size =
792       virtual_address_space_size -
793       total_size_of_committed_pages.load(std::memory_order_relaxed);
794 
795   // Check whether this OOM is due to a lot of super pages that are allocated
796   // but not committed, probably due to http://crbug.com/421387.
797   if (uncommitted_size > internal::kReasonableSizeOfUnusedPages) {
798     internal::PartitionOutOfMemoryWithLotsOfUncommitedPages(size);
799   }
800 
801 #if BUILDFLAG(IS_WIN)
802   // If true then we are running on 64-bit Windows.
803   BOOL is_wow_64 = FALSE;
804   // Intentionally ignoring failures.
805   IsWow64Process(GetCurrentProcess(), &is_wow_64);
806   // 32-bit address space on Windows is typically either 2 GiB (on 32-bit
807   // Windows) or 4 GiB (on 64-bit Windows). 2.8 and 1.0 GiB are just rough
808   // guesses as to how much address space PA can consume (note that code,
809   // stacks, and other allocators will also consume address space).
810   const size_t kReasonableVirtualSize = (is_wow_64 ? 2800 : 1024) * 1024 * 1024;
811   // Make it obvious whether we are running on 64-bit Windows.
812   PA_DEBUG_DATA_ON_STACK("iswow64", static_cast<size_t>(is_wow_64));
813 #else
814   constexpr size_t kReasonableVirtualSize =
815       // 1.5GiB elsewhere, since address space is typically 3GiB.
816       (1024 + 512) * 1024 * 1024;
817 #endif
818   if (virtual_address_space_size > kReasonableVirtualSize) {
819     internal::PartitionOutOfMemoryWithLargeVirtualSize(
820         virtual_address_space_size);
821   }
822 #endif  // #if !defined(ARCH_CPU_64_BITS)
823 
824   // Out of memory can be due to multiple causes, such as:
825   // - Out of virtual address space in the desired pool
826   // - Out of commit due to either our process, or another one
827   // - Excessive allocations in the current process
828   //
829   // Saving these values make it easier to distinguish between these. See the
830   // documentation in PA_CONFIG(DEBUG_DATA_ON_STACK) on how to get these from
831   // minidumps.
832   PA_DEBUG_DATA_ON_STACK("va_size", virtual_address_space_size);
833   PA_DEBUG_DATA_ON_STACK("alloc", get_total_size_of_allocated_bytes());
834   PA_DEBUG_DATA_ON_STACK("commit", get_total_size_of_committed_pages());
835   PA_DEBUG_DATA_ON_STACK("size", size);
836 
837   if (internal::g_oom_handling_function) {
838     (*internal::g_oom_handling_function)(size);
839   }
840   OOM_CRASH(size);
841 }
842 
DecommitEmptySlotSpans()843 void PartitionRoot::DecommitEmptySlotSpans() {
844   ShrinkEmptySlotSpansRing(0);
845   // Just decommitted everything, and holding the lock, should be exactly 0.
846   PA_DCHECK(empty_slot_spans_dirty_bytes == 0);
847 }
848 
DecommitEmptySlotSpansForTesting()849 void PartitionRoot::DecommitEmptySlotSpansForTesting() {
850   ::partition_alloc::internal::ScopedGuard guard{
851       internal::PartitionRootLock(this)};
852   DecommitEmptySlotSpans();
853 }
854 
DestructForTesting()855 void PartitionRoot::DestructForTesting() {
856   // We need to destruct the thread cache before we unreserve any of the super
857   // pages below, which we currently are not doing. So, we should only call
858   // this function on PartitionRoots without a thread cache.
859   PA_CHECK(!settings.with_thread_cache);
860   auto pool_handle = ChoosePool();
861 #if BUILDFLAG(ENABLE_THREAD_ISOLATION)
862   // The pages managed by thread isolated pool will be free-ed at
863   // UninitThreadIsolatedForTesting(). Don't invoke FreePages() for the pages.
864   if (pool_handle == internal::kThreadIsolatedPoolHandle) {
865     return;
866   }
867   PA_DCHECK(pool_handle < internal::kNumPools);
868 #else
869   PA_DCHECK(pool_handle <= internal::kNumPools);
870 #endif
871 
872   auto* curr = first_extent;
873   while (curr != nullptr) {
874     auto* next = curr->next;
875     uintptr_t address = SuperPagesBeginFromExtent(curr);
876     size_t size =
877         internal::kSuperPageSize * curr->number_of_consecutive_super_pages;
878 #if !BUILDFLAG(HAS_64_BIT_POINTERS)
879     internal::AddressPoolManager::GetInstance().MarkUnused(pool_handle, address,
880                                                            size);
881 #endif
882     internal::AddressPoolManager::GetInstance().UnreserveAndDecommit(
883         pool_handle, address, size);
884     curr = next;
885   }
886 }
887 
888 #if PA_CONFIG(MAYBE_ENABLE_MAC11_MALLOC_SIZE_HACK)
InitMac11MallocSizeHackUsableSize()889 void PartitionRoot::InitMac11MallocSizeHackUsableSize() {
890   settings.mac11_malloc_size_hack_enabled_ = true;
891 
892   // Request of 32B will fall into a 48B bucket in the presence of BRP
893   // in-slot metadata, yielding |48 - in_slot_metadata_size| of actual usable
894   // space.
895   PA_DCHECK(settings.in_slot_metadata_size);
896   settings.mac11_malloc_size_hack_usable_size_ =
897       48 - settings.in_slot_metadata_size;
898 }
899 
EnableMac11MallocSizeHackForTesting()900 void PartitionRoot::EnableMac11MallocSizeHackForTesting() {
901   InitMac11MallocSizeHackUsableSize();
902 }
903 
EnableMac11MallocSizeHackIfNeeded()904 void PartitionRoot::EnableMac11MallocSizeHackIfNeeded() {
905   PA_DCHECK(settings.brp_enabled_);
906   if (internal::base::mac::MacOSMajorVersion() == 11) {
907     InitMac11MallocSizeHackUsableSize();
908   }
909 }
910 #endif  // PA_CONFIG(MAYBE_ENABLE_MAC11_MALLOC_SIZE_HACK)
911 
912 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT) && !BUILDFLAG(HAS_64_BIT_POINTERS)
913 namespace {
914 std::atomic<bool> g_reserve_brp_guard_region_called;
915 // An address constructed by repeating `kQuarantinedByte` shouldn't never point
916 // to valid memory. Preemptively reserve a memory region around that address and
917 // make it inaccessible. Not needed for 64-bit platforms where the address is
918 // guaranteed to be non-canonical. Safe to call multiple times.
ReserveBackupRefPtrGuardRegionIfNeeded()919 void ReserveBackupRefPtrGuardRegionIfNeeded() {
920   bool expected = false;
921   // No need to block execution for potential concurrent initialization, merely
922   // want to make sure this is only called once.
923   if (!g_reserve_brp_guard_region_called.compare_exchange_strong(expected,
924                                                                  true)) {
925     return;
926   }
927 
928   size_t alignment = internal::PageAllocationGranularity();
929   uintptr_t requested_address;
930   memset(&requested_address, internal::kQuarantinedByte,
931          sizeof(requested_address));
932   requested_address = RoundDownToPageAllocationGranularity(requested_address);
933 
934   // Request several pages so that even unreasonably large C++ objects stay
935   // within the inaccessible region. If some of the pages can't be reserved,
936   // it's still preferable to try and reserve the rest.
937   for (size_t i = 0; i < 4; ++i) {
938     [[maybe_unused]] uintptr_t allocated_address =
939         AllocPages(requested_address, alignment, alignment,
940                    PageAccessibilityConfiguration(
941                        PageAccessibilityConfiguration::kInaccessible),
942                    PageTag::kPartitionAlloc);
943     requested_address += alignment;
944   }
945 }
946 }  // namespace
947 #endif  // BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT) &&
948         // !BUILDFLAG(HAS_64_BIT_POINTERS)
949 
Init(PartitionOptions opts)950 void PartitionRoot::Init(PartitionOptions opts) {
951   {
952 #if BUILDFLAG(IS_APPLE)
953     // Needed to statically bound page size, which is a runtime constant on
954     // apple OSes.
955     PA_CHECK((internal::SystemPageSize() == (size_t{1} << 12)) ||
956              (internal::SystemPageSize() == (size_t{1} << 14)));
957 #elif BUILDFLAG(IS_LINUX) && defined(ARCH_CPU_ARM64)
958     // Check runtime pagesize. Though the code is currently the same, it is
959     // not merged with the IS_APPLE case above as a 1 << 16 case needs to be
960     // added here in the future, to allow 64 kiB pagesize. That is only
961     // supported on Linux on arm64, not on IS_APPLE, but not yet present here
962     // as the rest of the PartitionAlloc does not currently support it.
963     PA_CHECK((internal::SystemPageSize() == (size_t{1} << 12)) ||
964              (internal::SystemPageSize() == (size_t{1} << 14)));
965 #endif
966 
967     ::partition_alloc::internal::ScopedGuard guard{lock_};
968     if (initialized) {
969       return;
970     }
971 
972 #if BUILDFLAG(HAS_64_BIT_POINTERS)
973     // Reserve address space for PartitionAlloc.
974     internal::PartitionAddressSpace::Init();
975 #endif
976 
977 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT) && !BUILDFLAG(HAS_64_BIT_POINTERS)
978     ReserveBackupRefPtrGuardRegionIfNeeded();
979 #endif
980 
981 #if BUILDFLAG(PA_DCHECK_IS_ON)
982     settings.use_cookie = true;
983 #else
984     static_assert(!Settings::use_cookie);
985 #endif  // BUILDFLAG(PA_DCHECK_IS_ON)
986 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
987     settings.brp_enabled_ = opts.backup_ref_ptr == PartitionOptions::kEnabled;
988 #else   // BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
989     PA_CHECK(opts.backup_ref_ptr == PartitionOptions::kDisabled);
990 #endif  // BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
991     settings.use_configurable_pool =
992         (opts.use_configurable_pool == PartitionOptions::kAllowed) &&
993         IsConfigurablePoolAvailable();
994     PA_DCHECK(!settings.use_configurable_pool || IsConfigurablePoolAvailable());
995     settings.zapping_by_free_flags =
996         opts.zapping_by_free_flags == PartitionOptions::kEnabled;
997 
998     settings.scheduler_loop_quarantine =
999         opts.scheduler_loop_quarantine == PartitionOptions::kEnabled;
1000     if (settings.scheduler_loop_quarantine) {
1001       scheduler_loop_quarantine_capacity_in_bytes =
1002           opts.scheduler_loop_quarantine_capacity_in_bytes;
1003       scheduler_loop_quarantine_root.SetCapacityInBytes(
1004           opts.scheduler_loop_quarantine_capacity_in_bytes);
1005       scheduler_loop_quarantine.emplace(
1006           scheduler_loop_quarantine_root.CreateBranch());
1007     } else {
1008       // Deleting a running quarantine is not supported.
1009       PA_CHECK(!scheduler_loop_quarantine.has_value());
1010     }
1011 
1012 #if BUILDFLAG(HAS_MEMORY_TAGGING)
1013     settings.memory_tagging_enabled_ =
1014         opts.memory_tagging.enabled == PartitionOptions::kEnabled;
1015     // Memory tagging is not supported in the configurable pool because MTE
1016     // stores tagging information in the high bits of the pointer, it causes
1017     // issues with components like V8's ArrayBuffers which use custom pointer
1018     // representations. All custom representations encountered so far rely on an
1019     // "is in configurable pool?" check, so we use that as a proxy.
1020     PA_CHECK(!settings.memory_tagging_enabled_ ||
1021              !settings.use_configurable_pool);
1022 
1023     settings.memory_tagging_reporting_mode_ =
1024         opts.memory_tagging.reporting_mode;
1025 #endif  // BUILDFLAG(HAS_MEMORY_TAGGING)
1026 
1027     settings.use_pool_offset_freelists =
1028         opts.use_pool_offset_freelists == PartitionOptions::kEnabled;
1029 
1030     // brp_enabled() is not supported in the configurable pool because
1031     // BRP requires objects to be in a different Pool.
1032 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
1033     PA_CHECK(!(settings.use_configurable_pool && brp_enabled()));
1034 #endif
1035 
1036 #if BUILDFLAG(ENABLE_THREAD_ISOLATION)
1037     // BRP and thread isolated mode use different pools, so they can't be
1038     // enabled at the same time.
1039     PA_CHECK(!opts.thread_isolation.enabled ||
1040              opts.backup_ref_ptr == PartitionOptions::kDisabled);
1041     settings.thread_isolation = opts.thread_isolation;
1042 #endif  // BUILDFLAG(ENABLE_THREAD_ISOLATION)
1043 
1044 #if PA_CONFIG(EXTRAS_REQUIRED)
1045     settings.extras_size = 0;
1046 
1047     if (settings.use_cookie) {
1048       settings.extras_size += internal::kPartitionCookieSizeAdjustment;
1049     }
1050 
1051 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
1052     if (brp_enabled()) {
1053       size_t in_slot_metadata_size =
1054           internal::AlignUpInSlotMetadataSizeForApple(
1055               internal::kInSlotMetadataSizeAdjustment);
1056       settings.in_slot_metadata_size = in_slot_metadata_size;
1057       PA_CHECK(internal::kInSlotMetadataSizeAdjustment <=
1058                in_slot_metadata_size);
1059       settings.extras_size += in_slot_metadata_size;
1060 #if PA_CONFIG(MAYBE_ENABLE_MAC11_MALLOC_SIZE_HACK)
1061       EnableMac11MallocSizeHackIfNeeded();
1062 #endif
1063     }
1064 #endif  // BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
1065 #endif  // PA_CONFIG(EXTRAS_REQUIRED)
1066 
1067     settings.quarantine_mode =
1068 #if BUILDFLAG(USE_STARSCAN)
1069         (opts.star_scan_quarantine == PartitionOptions::kDisallowed
1070              ? QuarantineMode::kAlwaysDisabled
1071              : QuarantineMode::kDisabledByDefault);
1072 #else
1073         QuarantineMode::kAlwaysDisabled;
1074 #endif  // BUILDFLAG(USE_STARSCAN)
1075 
1076     // We mark the sentinel slot span as free to make sure it is skipped by our
1077     // logic to find a new active slot span.
1078     memset(&sentinel_bucket, 0, sizeof(sentinel_bucket));
1079     sentinel_bucket.active_slot_spans_head =
1080         SlotSpanMetadata::get_sentinel_slot_span_non_const();
1081 
1082     // This is a "magic" value so we can test if a root pointer is valid.
1083     inverted_self = ~reinterpret_cast<uintptr_t>(this);
1084 
1085     // Set up the actual usable buckets first.
1086     constexpr internal::BucketIndexLookup lookup{};
1087     size_t bucket_index = 0;
1088     while (lookup.bucket_sizes()[bucket_index] !=
1089            internal::kInvalidBucketSize) {
1090       buckets[bucket_index].Init(lookup.bucket_sizes()[bucket_index]);
1091       bucket_index++;
1092     }
1093     PA_DCHECK(bucket_index < internal::kNumBuckets);
1094 
1095     // Remaining buckets are not usable, and not real.
1096     for (size_t index = bucket_index; index < internal::kNumBuckets; index++) {
1097       // Cannot init with size 0 since it computes 1 / size, but make sure the
1098       // bucket is invalid.
1099       buckets[index].Init(internal::kInvalidBucketSize);
1100       buckets[index].active_slot_spans_head = nullptr;
1101       PA_DCHECK(!buckets[index].is_valid());
1102     }
1103 
1104 #if !PA_CONFIG(THREAD_CACHE_SUPPORTED)
1105     // TLS in ThreadCache not supported on other OSes.
1106     settings.with_thread_cache = false;
1107 #else
1108     ThreadCache::EnsureThreadSpecificDataInitialized();
1109     settings.with_thread_cache =
1110         (opts.thread_cache == PartitionOptions::kEnabled);
1111 
1112     if (settings.with_thread_cache) {
1113       ThreadCache::Init(this);
1114     }
1115 #endif  // !PA_CONFIG(THREAD_CACHE_SUPPORTED)
1116 
1117 #if PA_CONFIG(USE_PARTITION_ROOT_ENUMERATOR)
1118     internal::PartitionRootEnumerator::Instance().Register(this);
1119 #endif
1120 
1121     initialized = true;
1122   }
1123 
1124   // Called without the lock, might allocate.
1125 #if BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
1126   PartitionAllocMallocInitOnce();
1127 #endif
1128 
1129 #if BUILDFLAG(ENABLE_THREAD_ISOLATION)
1130   if (settings.thread_isolation.enabled) {
1131     internal::PartitionAllocThreadIsolationInit(settings.thread_isolation);
1132   }
1133 #endif
1134 }
1135 
1136 PartitionRoot::Settings::Settings() = default;
1137 
PartitionRoot()1138 PartitionRoot::PartitionRoot() : scheduler_loop_quarantine_root(*this) {}
1139 
PartitionRoot(PartitionOptions opts)1140 PartitionRoot::PartitionRoot(PartitionOptions opts)
1141     : scheduler_loop_quarantine_root(*this) {
1142   Init(opts);
1143 }
1144 
~PartitionRoot()1145 PartitionRoot::~PartitionRoot() {
1146 #if BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
1147   PA_CHECK(!settings.with_thread_cache)
1148       << "Must not destroy a partition with a thread cache";
1149 #endif  // BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
1150 
1151 #if PA_CONFIG(USE_PARTITION_ROOT_ENUMERATOR)
1152   if (initialized) {
1153     internal::PartitionRootEnumerator::Instance().Unregister(this);
1154   }
1155 #endif  // PA_CONFIG(USE_PARTITION_ALLOC_ENUMERATOR)
1156 }
1157 
EnableThreadCacheIfSupported()1158 void PartitionRoot::EnableThreadCacheIfSupported() {
1159 #if PA_CONFIG(THREAD_CACHE_SUPPORTED)
1160   ::partition_alloc::internal::ScopedGuard guard{lock_};
1161   PA_CHECK(!settings.with_thread_cache);
1162   // By the time we get there, there may be multiple threads created in the
1163   // process. Since `with_thread_cache` is accessed without a lock, it can
1164   // become visible to another thread before the effects of
1165   // `internal::ThreadCacheInit()` are visible. To prevent that, we fake thread
1166   // cache creation being in-progress while this is running.
1167   //
1168   // This synchronizes with the acquire load in `MaybeInitThreadCacheAndAlloc()`
1169   // to ensure that we don't create (and thus use) a ThreadCache before
1170   // ThreadCache::Init()'s effects are visible.
1171   int before =
1172       thread_caches_being_constructed_.fetch_add(1, std::memory_order_acquire);
1173   PA_CHECK(before == 0);
1174   ThreadCache::Init(this);
1175   thread_caches_being_constructed_.fetch_sub(1, std::memory_order_release);
1176   settings.with_thread_cache = true;
1177 #endif  // PA_CONFIG(THREAD_CACHE_SUPPORTED)
1178 }
1179 
TryReallocInPlaceForDirectMap(internal::SlotSpanMetadata * slot_span,size_t requested_size)1180 bool PartitionRoot::TryReallocInPlaceForDirectMap(
1181     internal::SlotSpanMetadata* slot_span,
1182     size_t requested_size) {
1183   PA_DCHECK(slot_span->bucket->is_direct_mapped());
1184   // Slot-span metadata isn't MTE-tagged.
1185   PA_DCHECK(
1186       internal::IsManagedByDirectMap(reinterpret_cast<uintptr_t>(slot_span)));
1187 
1188   size_t raw_size = AdjustSizeForExtrasAdd(requested_size);
1189   auto* extent = DirectMapExtent::FromSlotSpanMetadata(slot_span);
1190   size_t current_reservation_size = extent->reservation_size;
1191   // Calculate the new reservation size the way PartitionDirectMap() would, but
1192   // skip the alignment, because this call isn't requesting it.
1193   size_t new_reservation_size = GetDirectMapReservationSize(raw_size);
1194 
1195   // If new reservation would be larger, there is nothing we can do to
1196   // reallocate in-place.
1197   if (new_reservation_size > current_reservation_size) {
1198     return false;
1199   }
1200 
1201   // Don't reallocate in-place if new reservation size would be less than 80 %
1202   // of the current one, to avoid holding on to too much unused address space.
1203   // Make this check before comparing slot sizes, as even with equal or similar
1204   // slot sizes we can save a lot if the original allocation was heavily padded
1205   // for alignment.
1206   if ((new_reservation_size >> internal::SystemPageShift()) * 5 <
1207       (current_reservation_size >> internal::SystemPageShift()) * 4) {
1208     return false;
1209   }
1210 
1211   // Note that the new size isn't a bucketed size; this function is called
1212   // whenever we're reallocating a direct mapped allocation, so calculate it
1213   // the way PartitionDirectMap() would.
1214   size_t new_slot_size = GetDirectMapSlotSize(raw_size);
1215   if (new_slot_size < internal::kMinDirectMappedDownsize) {
1216     return false;
1217   }
1218 
1219   // Past this point, we decided we'll attempt to reallocate without relocating,
1220   // so we have to honor the padding for alignment in front of the original
1221   // allocation, even though this function isn't requesting any alignment.
1222 
1223   // bucket->slot_size is the currently committed size of the allocation.
1224   size_t current_slot_size = slot_span->bucket->slot_size;
1225   size_t current_usable_size = GetSlotUsableSize(slot_span);
1226   uintptr_t slot_start = SlotSpanMetadata::ToSlotSpanStart(slot_span);
1227   // This is the available part of the reservation up to which the new
1228   // allocation can grow.
1229   size_t available_reservation_size =
1230       current_reservation_size - extent->padding_for_alignment -
1231       PartitionRoot::GetDirectMapMetadataAndGuardPagesSize();
1232 #if BUILDFLAG(PA_DCHECK_IS_ON)
1233   uintptr_t reservation_start = slot_start & internal::kSuperPageBaseMask;
1234   PA_DCHECK(internal::IsReservationStart(reservation_start));
1235   PA_DCHECK(slot_start + available_reservation_size ==
1236             reservation_start + current_reservation_size -
1237                 GetDirectMapMetadataAndGuardPagesSize() +
1238                 internal::PartitionPageSize());
1239 #endif  // BUILDFLAG(PA_DCHECK_IS_ON)
1240 
1241   PA_DCHECK(new_slot_size > internal::kMaxMemoryTaggingSize);
1242   if (new_slot_size == current_slot_size) {
1243     // No need to move any memory around, but update size and cookie below.
1244     // That's because raw_size may have changed.
1245   } else if (new_slot_size < current_slot_size) {
1246     // Shrink by decommitting unneeded pages and making them inaccessible.
1247     size_t decommit_size = current_slot_size - new_slot_size;
1248     DecommitSystemPagesForData(slot_start + new_slot_size, decommit_size,
1249                                PageAccessibilityDisposition::kRequireUpdate);
1250     // Since the decommited system pages are still reserved, we don't need to
1251     // change the entries for decommitted pages in the reservation offset table.
1252   } else if (new_slot_size <= available_reservation_size) {
1253     // Grow within the actually reserved address space. Just need to make sure
1254     // the pages are accessible.
1255     size_t recommit_slot_size_growth = new_slot_size - current_slot_size;
1256     // Direct map never uses tagging, as size is always >kMaxMemoryTaggingSize.
1257     RecommitSystemPagesForData(
1258         slot_start + current_slot_size, recommit_slot_size_growth,
1259         PageAccessibilityDisposition::kRequireUpdate, false);
1260     // The recommited system pages had been already reserved and all the
1261     // entries in the reservation offset table (for entire reservation_size
1262     // region) have been already initialized.
1263 
1264 #if BUILDFLAG(PA_DCHECK_IS_ON)
1265     memset(reinterpret_cast<void*>(slot_start + current_slot_size),
1266            internal::kUninitializedByte, recommit_slot_size_growth);
1267 #endif
1268   } else {
1269     // We can't perform the realloc in-place.
1270     // TODO: support this too when possible.
1271     return false;
1272   }
1273 
1274   DecreaseTotalSizeOfAllocatedBytes(reinterpret_cast<uintptr_t>(slot_span),
1275                                     slot_span->bucket->slot_size);
1276   slot_span->SetRawSize(raw_size);
1277   slot_span->bucket->slot_size = new_slot_size;
1278   IncreaseTotalSizeOfAllocatedBytes(reinterpret_cast<uintptr_t>(slot_span),
1279                                     slot_span->bucket->slot_size, raw_size);
1280 
1281   // Always record in-place realloc() as free()+malloc() pair.
1282   //
1283   // The early returns above (`return false`) will fall back to free()+malloc(),
1284   // so this is consistent.
1285   auto* thread_cache = GetOrCreateThreadCache();
1286   if (ThreadCache::IsValid(thread_cache)) {
1287     thread_cache->RecordDeallocation(current_usable_size);
1288     thread_cache->RecordAllocation(GetSlotUsableSize(slot_span));
1289   }
1290 
1291   // Write a new trailing cookie.
1292   if (settings.use_cookie) {
1293     auto* object = static_cast<unsigned char*>(SlotStartToObject(slot_start));
1294     internal::PartitionCookieWriteValue(object + GetSlotUsableSize(slot_span));
1295   }
1296 
1297   return true;
1298 }
1299 
TryReallocInPlaceForNormalBuckets(void * object,SlotSpanMetadata * slot_span,size_t new_size)1300 bool PartitionRoot::TryReallocInPlaceForNormalBuckets(
1301     void* object,
1302     SlotSpanMetadata* slot_span,
1303     size_t new_size) {
1304   uintptr_t slot_start = ObjectToSlotStart(object);
1305   PA_DCHECK(internal::IsManagedByNormalBuckets(slot_start));
1306 
1307   // TODO: note that tcmalloc will "ignore" a downsizing realloc() unless the
1308   // new size is a significant percentage smaller. We could do the same if we
1309   // determine it is a win.
1310   if (AllocationCapacityFromRequestedSize(new_size) !=
1311       AllocationCapacityFromSlotStart(slot_start)) {
1312     return false;
1313   }
1314   size_t current_usable_size = GetSlotUsableSize(slot_span);
1315 
1316   // Trying to allocate |new_size| would use the same amount of underlying
1317   // memory as we're already using, so re-use the allocation after updating
1318   // statistics (and cookie, if present).
1319   if (slot_span->CanStoreRawSize()) {
1320 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT) && BUILDFLAG(PA_DCHECK_IS_ON)
1321     internal::InSlotMetadata* old_ref_count = nullptr;
1322     if (PA_LIKELY(brp_enabled())) {
1323       old_ref_count = InSlotMetadataPointerFromSlotStartAndSize(
1324           slot_start, slot_span->bucket->slot_size);
1325     }
1326 #endif  // BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT) &&
1327         // BUILDFLAG(PA_DCHECK_IS_ON)
1328     size_t new_raw_size = AdjustSizeForExtrasAdd(new_size);
1329     slot_span->SetRawSize(new_raw_size);
1330 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT) && BUILDFLAG(PA_DCHECK_IS_ON)
1331     if (PA_LIKELY(brp_enabled())) {
1332       internal::InSlotMetadata* new_ref_count =
1333           InSlotMetadataPointerFromSlotStartAndSize(
1334               slot_start, slot_span->bucket->slot_size);
1335       PA_DCHECK(new_ref_count == old_ref_count);
1336     }
1337 #endif  // BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT) &&
1338         // BUILDFLAG(PA_DCHECK_IS_ON)
1339     // Write a new trailing cookie only when it is possible to keep track
1340     // raw size (otherwise we wouldn't know where to look for it later).
1341     if (settings.use_cookie) {
1342       internal::PartitionCookieWriteValue(static_cast<unsigned char*>(object) +
1343                                           GetSlotUsableSize(slot_span));
1344     }
1345   }
1346 
1347   // Always record a realloc() as a free() + malloc(), even if it's in
1348   // place. When we cannot do it in place (`return false` above), the allocator
1349   // falls back to free()+malloc(), so this is consistent.
1350   ThreadCache* thread_cache = GetOrCreateThreadCache();
1351   if (PA_LIKELY(ThreadCache::IsValid(thread_cache))) {
1352     thread_cache->RecordDeallocation(current_usable_size);
1353     thread_cache->RecordAllocation(GetSlotUsableSize(slot_span));
1354   }
1355 
1356   return object;
1357 }
1358 
PurgeMemory(int flags)1359 void PartitionRoot::PurgeMemory(int flags) {
1360   {
1361     ::partition_alloc::internal::ScopedGuard guard{
1362         internal::PartitionRootLock(this)};
1363 #if BUILDFLAG(USE_STARSCAN)
1364     // Avoid purging if there is PCScan task currently scheduled. Since pcscan
1365     // takes snapshot of all allocated pages, decommitting pages here (even
1366     // under the lock) is racy.
1367     // TODO(bikineev): Consider rescheduling the purging after PCScan.
1368     if (PCScan::IsInProgress()) {
1369       return;
1370     }
1371 #endif  // BUILDFLAG(USE_STARSCAN)
1372 
1373     if (flags & PurgeFlags::kDecommitEmptySlotSpans) {
1374       DecommitEmptySlotSpans();
1375     }
1376     if (flags & PurgeFlags::kDiscardUnusedSystemPages) {
1377       for (Bucket& bucket : buckets) {
1378         if (bucket.slot_size == internal::kInvalidBucketSize) {
1379           continue;
1380         }
1381 
1382         if (bucket.slot_size >= internal::MinPurgeableSlotSize()) {
1383           internal::PartitionPurgeBucket(this, &bucket);
1384         } else {
1385           if (sort_smaller_slot_span_free_lists_) {
1386             bucket.SortSmallerSlotSpanFreeLists();
1387           }
1388         }
1389 
1390         // Do it at the end, as the actions above change the status of slot
1391         // spans (e.g. empty -> decommitted).
1392         bucket.MaintainActiveList();
1393 
1394         if (sort_active_slot_spans_) {
1395           bucket.SortActiveSlotSpans();
1396         }
1397       }
1398     }
1399   }
1400 }
1401 
ShrinkEmptySlotSpansRing(size_t limit)1402 void PartitionRoot::ShrinkEmptySlotSpansRing(size_t limit) {
1403   int16_t index = global_empty_slot_span_ring_index;
1404   int16_t starting_index = index;
1405   while (empty_slot_spans_dirty_bytes > limit) {
1406     SlotSpanMetadata* slot_span = global_empty_slot_span_ring[index];
1407     // The ring is not always full, may be nullptr.
1408     if (slot_span) {
1409       slot_span->DecommitIfPossible(this);
1410       // DecommitIfPossible() should set the buffer to null.
1411       PA_DCHECK(!global_empty_slot_span_ring[index]);
1412     }
1413     index += 1;
1414     // Walk through the entirety of possible slots, even though the last ones
1415     // are unused, if global_empty_slot_span_ring_size is smaller than
1416     // kMaxFreeableSpans. It's simpler, and does not cost anything, since all
1417     // the pointers are going to be nullptr.
1418     if (index == internal::kMaxFreeableSpans) {
1419       index = 0;
1420     }
1421 
1422     // Went around the whole ring, since this is locked,
1423     // empty_slot_spans_dirty_bytes should be exactly 0.
1424     if (index == starting_index) {
1425       PA_DCHECK(empty_slot_spans_dirty_bytes == 0);
1426       // Metrics issue, don't crash, return.
1427       break;
1428     }
1429   }
1430 }
1431 
DumpStats(const char * partition_name,bool is_light_dump,PartitionStatsDumper * dumper)1432 void PartitionRoot::DumpStats(const char* partition_name,
1433                               bool is_light_dump,
1434                               PartitionStatsDumper* dumper) {
1435   static const size_t kMaxReportableDirectMaps = 4096;
1436   // Allocate on the heap rather than on the stack to avoid stack overflow
1437   // skirmishes (on Windows, in particular). Allocate before locking below,
1438   // otherwise when PartitionAlloc is malloc() we get reentrancy issues. This
1439   // inflates reported values a bit for detailed dumps though, by 16kiB.
1440   std::unique_ptr<uint32_t[]> direct_map_lengths;
1441   if (!is_light_dump) {
1442     direct_map_lengths =
1443         std::unique_ptr<uint32_t[]>(new uint32_t[kMaxReportableDirectMaps]);
1444   }
1445   PartitionBucketMemoryStats bucket_stats[internal::kNumBuckets];
1446   size_t num_direct_mapped_allocations = 0;
1447   PartitionMemoryStats stats = {};
1448 
1449   stats.syscall_count = syscall_count.load(std::memory_order_relaxed);
1450   stats.syscall_total_time_ns =
1451       syscall_total_time_ns.load(std::memory_order_relaxed);
1452 
1453   // Collect data with the lock held, cannot allocate or call third-party code
1454   // below.
1455   {
1456     ::partition_alloc::internal::ScopedGuard guard{
1457         internal::PartitionRootLock(this)};
1458     PA_DCHECK(total_size_of_allocated_bytes <= max_size_of_allocated_bytes);
1459 
1460     stats.total_mmapped_bytes =
1461         total_size_of_super_pages.load(std::memory_order_relaxed) +
1462         total_size_of_direct_mapped_pages.load(std::memory_order_relaxed);
1463     stats.total_committed_bytes =
1464         total_size_of_committed_pages.load(std::memory_order_relaxed);
1465     stats.max_committed_bytes =
1466         max_size_of_committed_pages.load(std::memory_order_relaxed);
1467     stats.total_allocated_bytes = total_size_of_allocated_bytes;
1468     stats.max_allocated_bytes = max_size_of_allocated_bytes;
1469 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
1470     stats.total_brp_quarantined_bytes =
1471         total_size_of_brp_quarantined_bytes.load(std::memory_order_relaxed);
1472     stats.total_brp_quarantined_count =
1473         total_count_of_brp_quarantined_slots.load(std::memory_order_relaxed);
1474     stats.cumulative_brp_quarantined_bytes =
1475         cumulative_size_of_brp_quarantined_bytes.load(
1476             std::memory_order_relaxed);
1477     stats.cumulative_brp_quarantined_count =
1478         cumulative_count_of_brp_quarantined_slots.load(
1479             std::memory_order_relaxed);
1480 #endif
1481 
1482     size_t direct_mapped_allocations_total_size = 0;
1483     for (size_t i = 0; i < internal::kNumBuckets; ++i) {
1484       const Bucket* bucket = &bucket_at(i);
1485       // Don't report the pseudo buckets that the generic allocator sets up in
1486       // order to preserve a fast size->bucket map (see
1487       // PartitionRoot::Init() for details).
1488       if (!bucket->is_valid()) {
1489         bucket_stats[i].is_valid = false;
1490       } else {
1491         internal::PartitionDumpBucketStats(&bucket_stats[i], this, bucket);
1492       }
1493       if (bucket_stats[i].is_valid) {
1494         stats.total_resident_bytes += bucket_stats[i].resident_bytes;
1495         stats.total_active_bytes += bucket_stats[i].active_bytes;
1496         stats.total_active_count += bucket_stats[i].active_count;
1497         stats.total_decommittable_bytes += bucket_stats[i].decommittable_bytes;
1498         stats.total_discardable_bytes += bucket_stats[i].discardable_bytes;
1499       }
1500     }
1501 
1502     for (DirectMapExtent* extent = direct_map_list;
1503          extent && num_direct_mapped_allocations < kMaxReportableDirectMaps;
1504          extent = extent->next_extent, ++num_direct_mapped_allocations) {
1505       PA_DCHECK(!extent->next_extent ||
1506                 extent->next_extent->prev_extent == extent);
1507       size_t slot_size = extent->bucket->slot_size;
1508       direct_mapped_allocations_total_size += slot_size;
1509       if (is_light_dump) {
1510         continue;
1511       }
1512       direct_map_lengths[num_direct_mapped_allocations] = slot_size;
1513     }
1514 
1515     stats.total_resident_bytes += direct_mapped_allocations_total_size;
1516     stats.total_active_bytes += direct_mapped_allocations_total_size;
1517     stats.total_active_count += num_direct_mapped_allocations;
1518 
1519     stats.has_thread_cache = settings.with_thread_cache;
1520     if (stats.has_thread_cache) {
1521       ThreadCacheRegistry::Instance().DumpStats(
1522           true, &stats.current_thread_cache_stats);
1523       ThreadCacheRegistry::Instance().DumpStats(false,
1524                                                 &stats.all_thread_caches_stats);
1525     }
1526 
1527     stats.has_scheduler_loop_quarantine = settings.scheduler_loop_quarantine;
1528     if (stats.has_scheduler_loop_quarantine) {
1529       memset(
1530           reinterpret_cast<void*>(&stats.scheduler_loop_quarantine_stats_total),
1531           0, sizeof(LightweightQuarantineStats));
1532       scheduler_loop_quarantine_root.AccumulateStats(
1533           stats.scheduler_loop_quarantine_stats_total);
1534     }
1535   }
1536 
1537   // Do not hold the lock when calling |dumper|, as it may allocate.
1538   if (!is_light_dump) {
1539     for (auto& stat : bucket_stats) {
1540       if (stat.is_valid) {
1541         dumper->PartitionsDumpBucketStats(partition_name, &stat);
1542       }
1543     }
1544 
1545     for (size_t i = 0; i < num_direct_mapped_allocations; ++i) {
1546       uint32_t size = direct_map_lengths[i];
1547 
1548       PartitionBucketMemoryStats mapped_stats = {};
1549       mapped_stats.is_valid = true;
1550       mapped_stats.is_direct_map = true;
1551       mapped_stats.num_full_slot_spans = 1;
1552       mapped_stats.allocated_slot_span_size = size;
1553       mapped_stats.bucket_slot_size = size;
1554       mapped_stats.active_bytes = size;
1555       mapped_stats.active_count = 1;
1556       mapped_stats.resident_bytes = size;
1557       dumper->PartitionsDumpBucketStats(partition_name, &mapped_stats);
1558     }
1559   }
1560   dumper->PartitionDumpTotals(partition_name, &stats);
1561 }
1562 
1563 // static
DeleteForTesting(PartitionRoot * partition_root)1564 void PartitionRoot::DeleteForTesting(PartitionRoot* partition_root) {
1565   if (partition_root->settings.with_thread_cache) {
1566     ThreadCache::SwapForTesting(nullptr);
1567     partition_root->settings.with_thread_cache = false;
1568   }
1569 
1570   partition_root->DestructForTesting();  // IN-TEST
1571 
1572   delete partition_root;
1573 }
1574 
ResetForTesting(bool allow_leaks)1575 void PartitionRoot::ResetForTesting(bool allow_leaks) {
1576   if (settings.with_thread_cache) {
1577     ThreadCache::SwapForTesting(nullptr);
1578     settings.with_thread_cache = false;
1579   }
1580 
1581   ::partition_alloc::internal::ScopedGuard guard{
1582       internal::PartitionRootLock(this)};
1583 
1584 #if BUILDFLAG(PA_DCHECK_IS_ON)
1585   if (!allow_leaks) {
1586     unsigned num_allocated_slots = 0;
1587     for (Bucket& bucket : buckets) {
1588       if (bucket.active_slot_spans_head !=
1589           internal::SlotSpanMetadata::get_sentinel_slot_span()) {
1590         for (internal::SlotSpanMetadata* slot_span =
1591                  bucket.active_slot_spans_head;
1592              slot_span; slot_span = slot_span->next_slot_span) {
1593           num_allocated_slots += slot_span->num_allocated_slots;
1594         }
1595       }
1596       // Full slot spans are nowhere. Need to see bucket.num_full_slot_spans
1597       // to count the number of full slot spans' slots.
1598       if (bucket.num_full_slot_spans) {
1599         num_allocated_slots +=
1600             bucket.num_full_slot_spans * bucket.get_slots_per_span();
1601       }
1602     }
1603     PA_DCHECK(num_allocated_slots == 0);
1604 
1605     // Check for direct-mapped allocations.
1606     PA_DCHECK(!direct_map_list);
1607   }
1608 #endif
1609 
1610   DestructForTesting();  // IN-TEST
1611 
1612 #if PA_CONFIG(USE_PARTITION_ROOT_ENUMERATOR)
1613   if (initialized) {
1614     internal::PartitionRootEnumerator::Instance().Unregister(this);
1615   }
1616 #endif  // PA_CONFIG(USE_PARTITION_ROOT_ENUMERATOR)
1617 
1618   for (Bucket& bucket : buckets) {
1619     bucket.active_slot_spans_head =
1620         SlotSpanMetadata::get_sentinel_slot_span_non_const();
1621     bucket.empty_slot_spans_head = nullptr;
1622     bucket.decommitted_slot_spans_head = nullptr;
1623     bucket.num_full_slot_spans = 0;
1624   }
1625 
1626   next_super_page = 0;
1627   next_partition_page = 0;
1628   next_partition_page_end = 0;
1629   current_extent = nullptr;
1630   first_extent = nullptr;
1631 
1632   direct_map_list = nullptr;
1633   for (auto*& entity : global_empty_slot_span_ring) {
1634     entity = nullptr;
1635   }
1636 
1637   global_empty_slot_span_ring_index = 0;
1638   global_empty_slot_span_ring_size = internal::kDefaultEmptySlotSpanRingSize;
1639   initialized = false;
1640 }
1641 
ResetBookkeepingForTesting()1642 void PartitionRoot::ResetBookkeepingForTesting() {
1643   ::partition_alloc::internal::ScopedGuard guard{
1644       internal::PartitionRootLock(this)};
1645   max_size_of_allocated_bytes = total_size_of_allocated_bytes;
1646   max_size_of_committed_pages.store(total_size_of_committed_pages);
1647 }
1648 
SetGlobalEmptySlotSpanRingIndexForTesting(int16_t index)1649 void PartitionRoot::SetGlobalEmptySlotSpanRingIndexForTesting(int16_t index) {
1650   ::partition_alloc::internal::ScopedGuard guard{
1651       internal::PartitionRootLock(this)};
1652   global_empty_slot_span_ring_index = index;
1653 }
1654 
MaybeInitThreadCache()1655 ThreadCache* PartitionRoot::MaybeInitThreadCache() {
1656   auto* tcache = ThreadCache::Get();
1657   // See comment in `EnableThreadCacheIfSupport()` for why this is an acquire
1658   // load.
1659   if (ThreadCache::IsTombstone(tcache) ||
1660       thread_caches_being_constructed_.load(std::memory_order_acquire)) {
1661     // Two cases:
1662     // 1. Thread is being terminated, don't try to use the thread cache, and
1663     //    don't try to resurrect it.
1664     // 2. Someone, somewhere is currently allocating a thread cache. This may
1665     //    be us, in which case we are re-entering and should not create a thread
1666     //    cache. If it is not us, then this merely delays thread cache
1667     //    construction a bit, which is not an issue.
1668     return nullptr;
1669   }
1670 
1671   // There is no per-thread ThreadCache allocated here yet, and this partition
1672   // has a thread cache, allocate a new one.
1673   //
1674   // The thread cache allocation itself will not reenter here, as it sidesteps
1675   // the thread cache by using placement new and |RawAlloc()|. However,
1676   // internally to libc, allocations may happen to create a new TLS
1677   // variable. This would end up here again, which is not what we want (and
1678   // likely is not supported by libc).
1679   //
1680   // To avoid this sort of reentrancy, increase the count of thread caches that
1681   // are currently allocating a thread cache.
1682   //
1683   // Note that there is no deadlock or data inconsistency concern, since we do
1684   // not hold the lock, and has such haven't touched any internal data.
1685   int before =
1686       thread_caches_being_constructed_.fetch_add(1, std::memory_order_relaxed);
1687   PA_CHECK(before < std::numeric_limits<int>::max());
1688   tcache = ThreadCache::Create(this);
1689   thread_caches_being_constructed_.fetch_sub(1, std::memory_order_relaxed);
1690 
1691   return tcache;
1692 }
1693 
1694 internal::LightweightQuarantineBranch
CreateSchedulerLoopQuarantineBranch(bool lock_required)1695 PartitionRoot::CreateSchedulerLoopQuarantineBranch(bool lock_required) {
1696   return scheduler_loop_quarantine_root.CreateBranch(lock_required);
1697 }
1698 
1699 // static
SetStraightenLargerSlotSpanFreeListsMode(StraightenLargerSlotSpanFreeListsMode new_value)1700 void PartitionRoot::SetStraightenLargerSlotSpanFreeListsMode(
1701     StraightenLargerSlotSpanFreeListsMode new_value) {
1702   straighten_larger_slot_span_free_lists_ = new_value;
1703 }
1704 
1705 // static
SetSortSmallerSlotSpanFreeListsEnabled(bool new_value)1706 void PartitionRoot::SetSortSmallerSlotSpanFreeListsEnabled(bool new_value) {
1707   sort_smaller_slot_span_free_lists_ = new_value;
1708 }
1709 
1710 // static
SetSortActiveSlotSpansEnabled(bool new_value)1711 void PartitionRoot::SetSortActiveSlotSpansEnabled(bool new_value) {
1712   sort_active_slot_spans_ = new_value;
1713 }
1714 
1715 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
QuarantineForBrp(const SlotSpanMetadata * slot_span,void * object)1716 PA_NOINLINE void PartitionRoot::QuarantineForBrp(
1717     const SlotSpanMetadata* slot_span,
1718     void* object) {
1719   auto usable_size = GetSlotUsableSize(slot_span);
1720   auto hook = PartitionAllocHooks::GetQuarantineOverrideHook();
1721   if (PA_UNLIKELY(hook)) {
1722     hook(object, usable_size);
1723   } else {
1724     internal::SecureMemset(object, internal::kQuarantinedByte, usable_size);
1725   }
1726 }
1727 #endif  // BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
1728 
1729 // Explicitly define common template instantiations to reduce compile time.
1730 #define EXPORT_TEMPLATE \
1731   template PA_EXPORT_TEMPLATE_DEFINE(PA_COMPONENT_EXPORT(PARTITION_ALLOC))
1732 EXPORT_TEMPLATE void* PartitionRoot::Alloc<AllocFlags::kNone>(size_t,
1733                                                               const char*);
1734 EXPORT_TEMPLATE void* PartitionRoot::Alloc<AllocFlags::kReturnNull>(
1735     size_t,
1736     const char*);
1737 EXPORT_TEMPLATE void*
1738 PartitionRoot::Realloc<AllocFlags::kNone, FreeFlags::kNone>(void*,
1739                                                             size_t,
1740                                                             const char*);
1741 EXPORT_TEMPLATE void*
1742 PartitionRoot::Realloc<AllocFlags::kReturnNull, FreeFlags::kNone>(void*,
1743                                                                   size_t,
1744                                                                   const char*);
1745 EXPORT_TEMPLATE void* PartitionRoot::AlignedAlloc<AllocFlags::kNone>(size_t,
1746                                                                      size_t);
1747 #undef EXPORT_TEMPLATE
1748 
1749 // TODO(https://crbug.com/1500662) Stop ignoring the -Winvalid-offsetof warning.
1750 #if defined(__clang__)
1751 #pragma clang diagnostic push
1752 #pragma clang diagnostic ignored "-Winvalid-offsetof"
1753 #endif
1754 static_assert(offsetof(PartitionRoot, sentinel_bucket) ==
1755                   offsetof(PartitionRoot, buckets) +
1756                       internal::kNumBuckets * sizeof(PartitionRoot::Bucket),
1757               "sentinel_bucket must be just after the regular buckets.");
1758 
1759 static_assert(
1760     offsetof(PartitionRoot, lock_) >= 64,
1761     "The lock should not be on the same cacheline as the read-mostly flags");
1762 #if defined(__clang__)
1763 #pragma clang diagnostic pop
1764 #endif
1765 
1766 }  // namespace partition_alloc
1767