xref: /aosp_15_r20/external/skia/src/text/gpu/SubRunAllocator.h (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2  * Copyright 2021 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef sktext_gpu_SubRunAllocator_DEFINED
9 #define sktext_gpu_SubRunAllocator_DEFINED
10 
11 #include "include/core/SkSpan.h"
12 #include "include/private/base/SkAssert.h"
13 #include "include/private/base/SkMath.h"
14 #include "include/private/base/SkTLogic.h"
15 #include "include/private/base/SkTemplates.h"
16 #include "include/private/base/SkTo.h"
17 #include "src/base/SkArenaAlloc.h"
18 
19 #include <algorithm>
20 #include <array>
21 #include <climits>
22 #include <cstddef>
23 #include <cstdint>
24 #include <cstring>
25 #include <limits>
26 #include <memory>
27 #include <new>
28 #include <tuple>
29 #include <utility>
30 
31 namespace sktext::gpu {
32 
33 // BagOfBytes parcels out bytes with a given size and alignment.
34 class BagOfBytes {
35 public:
36     BagOfBytes(char* block, size_t blockSize, size_t firstHeapAllocation);
37     explicit BagOfBytes(size_t firstHeapAllocation = 0);
38     BagOfBytes(const BagOfBytes&) = delete;
39     BagOfBytes& operator=(const BagOfBytes&) = delete;
BagOfBytes(BagOfBytes && that)40     BagOfBytes(BagOfBytes&& that)
41             : fEndByte{std::exchange(that.fEndByte, nullptr)}
42             , fCapacity{that.fCapacity}
43             , fFibProgression{that.fFibProgression} {}
44     BagOfBytes& operator=(BagOfBytes&& that) {
45         this->~BagOfBytes();
new(this)46         new (this) BagOfBytes{std::move(that)};
47         return *this;
48     }
49 
50     ~BagOfBytes();
51 
52     // Given a requestedSize round up to the smallest size that accounts for all the per block
53     // overhead and alignment. It crashes if requestedSize is negative or too big.
PlatformMinimumSizeWithOverhead(int requestedSize,int assumedAlignment)54     static constexpr int PlatformMinimumSizeWithOverhead(int requestedSize, int assumedAlignment) {
55         return MinimumSizeWithOverhead(
56                 requestedSize, assumedAlignment, sizeof(Block), kMaxAlignment);
57     }
58 
MinimumSizeWithOverhead(int requestedSize,int assumedAlignment,int blockSize,int maxAlignment)59     static constexpr int MinimumSizeWithOverhead(
60             int requestedSize, int assumedAlignment, int blockSize, int maxAlignment) {
61         SkASSERT_RELEASE(0 <= requestedSize && requestedSize < kMaxByteSize);
62         SkASSERT_RELEASE(SkIsPow2(assumedAlignment) && SkIsPow2(maxAlignment));
63 
64         const int minAlignment = std::min(maxAlignment, assumedAlignment);
65         // There are two cases, one easy and one subtle. The easy case is when minAlignment ==
66         // maxAlignment. When that happens, the term maxAlignment - minAlignment is zero, and the
67         // block will be placed at the proper alignment because alignUp is properly
68         // aligned.
69         // The subtle case is where minAlignment < maxAlignment. Because
70         // minAlignment < maxAlignment, alignUp(requestedSize, minAlignment) + blockSize does not
71         // guarantee that block can be placed at a maxAlignment address. Block can be placed at
72         // maxAlignment/minAlignment different address to achieve alignment, so we need
73         // to add memory to allow the block to be placed on a maxAlignment address.
74         // For example, if assumedAlignment = 4 and maxAlignment = 16 then block can be placed at
75         // the following address offsets at the end of minimumSize bytes.
76         //   0 * minAlignment =  0
77         //   1 * minAlignment =  4
78         //   2 * minAlignment =  8
79         //   3 * minAlignment = 12
80         // Following this logic, the equation for the additional bytes is
81         //   (maxAlignment/minAlignment - 1) * minAlignment
82         //     = maxAlignment - minAlignment.
83         int minimumSize = SkToInt(AlignUp(requestedSize, minAlignment))
84                           + blockSize
85                           + maxAlignment - minAlignment;
86 
87         // If minimumSize is > 32k then round to a 4K boundary unless it is too close to the
88         // maximum int. The > 32K heuristic is from the JEMalloc behavior.
89         constexpr int k32K = (1 << 15);
90         if (minimumSize >= k32K && minimumSize < std::numeric_limits<int>::max() - k4K) {
91             minimumSize = SkToInt(AlignUp(minimumSize, k4K));
92         }
93 
94         return minimumSize;
95     }
96 
97     template <int size>
98     using Storage = std::array<char, PlatformMinimumSizeWithOverhead(size, 1)>;
99 
100     // Returns true if n * sizeof(T) will fit in an allocation block.
101     template <typename T>
WillCountFit(int n)102     static bool WillCountFit(int n) {
103         constexpr int kMaxN = kMaxByteSize / sizeof(T);
104         return 0 <= n && n < kMaxN;
105     }
106 
107     // Returns a pointer to memory suitable for holding n Ts.
108     template <typename T> char* allocateBytesFor(int n = 1) {
109         static_assert(alignof(T) <= kMaxAlignment, "Alignment is too big for arena");
110         static_assert(sizeof(T) < kMaxByteSize, "Size is too big for arena");
111         SkASSERT_RELEASE(WillCountFit<T>(n));
112 
113         int size = n ? n * sizeof(T) : 1;
114         return this->allocateBytes(size, alignof(T));
115     }
116 
117     void* alignedBytes(int unsafeSize, int unsafeAlignment);
118 
119 private:
120     // The maximum alignment supported by GrBagOfBytes. 16 seems to be a good number for alignment.
121     // If a use case for larger alignments is found, we can turn this into a template parameter.
122     inline static constexpr int kMaxAlignment = std::max(16, (int)alignof(std::max_align_t));
123     // The largest size that can be allocated. In larger sizes, the block is rounded up to 4K
124     // chunks. Leave a 4K of slop.
125     inline static constexpr int k4K = (1 << 12);
126     // This should never overflow with the calculations done on the code.
127     inline static constexpr int kMaxByteSize = std::numeric_limits<int>::max() - k4K;
128     // The assumed alignment of new char[] given the platform.
129     // There is a bug in Emscripten's allocator that make alignment different than max_align_t.
130     // kAllocationAlignment accounts for this difference. For more information see:
131     // https://github.com/emscripten-core/emscripten/issues/10072
132     #if !defined(SK_FORCE_8_BYTE_ALIGNMENT)
133         static constexpr int kAllocationAlignment = alignof(std::max_align_t);
134     #else
135         static constexpr int kAllocationAlignment = 8;
136     #endif
137 
AlignUp(int size,int alignment)138     static constexpr size_t AlignUp(int size, int alignment) {
139         return (size + (alignment - 1)) & -alignment;
140     }
141 
142     // The Block starts at the location pointed to by fEndByte.
143     // Beware. Order is important here. The destructor for fPrevious must be called first because
144     // the Block is embedded in fBlockStart. Destructors are run in reverse order.
145     struct Block {
146         Block(char* previous, char* startOfBlock);
147         // The start of the originally allocated bytes. This is the thing that must be deleted.
148         char* const fBlockStart;
149         Block* const fPrevious;
150     };
151 
152     // Note: fCapacity is the number of bytes remaining, and is subtracted from fEndByte to
153     // generate the location of the object.
allocateBytes(int size,int alignment)154     char* allocateBytes(int size, int alignment) {
155         fCapacity = fCapacity & -alignment;
156         if (fCapacity < size) {
157             this->needMoreBytes(size, alignment);
158         }
159         char* const ptr = fEndByte - fCapacity;
160         SkASSERT(((intptr_t)ptr & (alignment - 1)) == 0);
161         SkASSERT(fCapacity >= size);
162         fCapacity -= size;
163         return ptr;
164     }
165 
166     // Adjust fEndByte and fCapacity give a new block starting at bytes with size.
167     void setupBytesAndCapacity(char* bytes, int size);
168 
169     // Adjust fEndByte and fCapacity to satisfy the size and alignment request.
170     void needMoreBytes(int size, int alignment);
171 
172     // This points to the highest kMaxAlignment address in the allocated block. The address of
173     // the current end of allocated data is given by fEndByte - fCapacity. While the negative side
174     // of this pointer are the bytes to be allocated. The positive side points to the Block for
175     // this memory. In other words, the pointer to the Block structure for these bytes is
176     // reinterpret_cast<Block*>(fEndByte).
177     char* fEndByte{nullptr};
178 
179     // The number of bytes remaining in this block.
180     int fCapacity{0};
181 
182     SkFibBlockSizes<kMaxByteSize> fFibProgression;
183 };
184 
185 template <typename T>
186 class SubRunInitializer {
187 public:
SubRunInitializer(void * memory)188     SubRunInitializer(void* memory) : fMemory{memory} { SkASSERT(memory != nullptr); }
~SubRunInitializer()189     ~SubRunInitializer() {
190         ::operator delete(fMemory);
191     }
192     template <typename... Args>
initialize(Args &&...args)193     T* initialize(Args&&... args) {
194         // Warn on more than one initialization.
195         SkASSERT(fMemory != nullptr);
196         return new (std::exchange(fMemory, nullptr)) T(std::forward<Args>(args)...);
197     }
198 
199 private:
200     void* fMemory;
201 };
202 
203 // GrSubRunAllocator provides fast allocation where the user takes care of calling the destructors
204 // of the returned pointers, and GrSubRunAllocator takes care of deleting the storage. The
205 // unique_ptrs returned, are to assist in assuring the object's destructor is called.
206 // A note on zero length arrays: according to the standard a pointer must be returned, and it
207 // can't be a nullptr. In such a case, SkArena allocates one byte, but does not initialize it.
208 class SubRunAllocator {
209 public:
210     struct Destroyer {
211         template <typename T>
operatorDestroyer212         void operator()(T* ptr) { ptr->~T(); }
213     };
214 
215     struct ArrayDestroyer {
216         int n;
217         template <typename T>
operatorArrayDestroyer218         void operator()(T* ptr) {
219             for (int i = 0; i < n; i++) { ptr[i].~T(); }
220         }
221     };
222 
223     template<class T>
224     inline static constexpr bool HasNoDestructor = std::is_trivially_destructible<T>::value;
225 
226     SubRunAllocator(char* block, int blockSize, int firstHeapAllocation);
227     explicit SubRunAllocator(int firstHeapAllocation = 0);
228     SubRunAllocator(const SubRunAllocator&) = delete;
229     SubRunAllocator& operator=(const SubRunAllocator&) = delete;
230     SubRunAllocator(SubRunAllocator&&) = default;
231     SubRunAllocator& operator=(SubRunAllocator&&) = default;
232 
233     template <typename T>
234     static std::tuple<SubRunInitializer<T>, int, SubRunAllocator>
AllocateClassMemoryAndArena(int allocSizeHint)235     AllocateClassMemoryAndArena(int allocSizeHint) {
236         SkASSERT_RELEASE(allocSizeHint >= 0);
237         // Round the size after the object the optimal amount.
238         int extraSize = BagOfBytes::PlatformMinimumSizeWithOverhead(allocSizeHint, alignof(T));
239 
240         // Don't overflow or die.
241         SkASSERT_RELEASE(INT_MAX - SkTo<int>(sizeof(T)) > extraSize);
242         int totalMemorySize = sizeof(T) + extraSize;
243 
244         void* memory = ::operator new (totalMemorySize);
245         SubRunAllocator alloc{SkTAddOffset<char>(memory, sizeof(T)), extraSize, extraSize/2};
246         return {memory, totalMemorySize, std::move(alloc)};
247     }
248 
makePOD(Args &&...args)249     template <typename T, typename... Args> T* makePOD(Args&&... args) {
250         static_assert(HasNoDestructor<T>, "This is not POD. Use makeUnique.");
251         char* bytes = fAlloc.template allocateBytesFor<T>();
252         return new (bytes) T(std::forward<Args>(args)...);
253     }
254 
255     template <typename T, typename... Args>
makeUnique(Args &&...args)256     std::unique_ptr<T, Destroyer> makeUnique(Args&&... args) {
257         static_assert(!HasNoDestructor<T>, "This is POD. Use makePOD.");
258         char* bytes = fAlloc.template allocateBytesFor<T>();
259         return std::unique_ptr<T, Destroyer>{new (bytes) T(std::forward<Args>(args)...)};
260     }
261 
makePODArray(int n)262     template<typename T> T* makePODArray(int n) {
263         static_assert(HasNoDestructor<T>, "This is not POD. Use makeUniqueArray.");
264         return reinterpret_cast<T*>(fAlloc.template allocateBytesFor<T>(n));
265     }
266 
267     template<typename T>
makePODSpan(SkSpan<const T> s)268     SkSpan<T> makePODSpan(SkSpan<const T> s) {
269         static_assert(HasNoDestructor<T>, "This is not POD. Use makeUniqueArray.");
270         if (s.empty()) {
271             return SkSpan<T>{};
272         }
273 
274         T* result = this->makePODArray<T>(SkTo<int>(s.size()));
275         memcpy(result, s.data(), s.size_bytes());
276         return {result, s.size()};
277     }
278 
279     template<typename T, typename Src, typename Map>
makePODArray(const Src & src,Map map)280     SkSpan<T> makePODArray(const Src& src, Map map) {
281         static_assert(HasNoDestructor<T>, "This is not POD. Use makeUniqueArray.");
282         int size = SkTo<int>(src.size());
283         T* result = this->template makePODArray<T>(size);
284         for (int i = 0; i < size; i++) {
285             new (&result[i]) T(map(src[i]));
286         }
287         return {result, src.size()};
288     }
289 
290     template<typename T>
makeUniqueArray(int n)291     std::unique_ptr<T[], ArrayDestroyer> makeUniqueArray(int n) {
292         static_assert(!HasNoDestructor<T>, "This is POD. Use makePODArray.");
293         T* array = reinterpret_cast<T*>(fAlloc.template allocateBytesFor<T>(n));
294         for (int i = 0; i < n; i++) {
295             new (&array[i]) T{};
296         }
297         return std::unique_ptr<T[], ArrayDestroyer>{array, ArrayDestroyer{n}};
298     }
299 
300     template<typename T, typename I>
makeUniqueArray(int n,I initializer)301     std::unique_ptr<T[], ArrayDestroyer> makeUniqueArray(int n, I initializer) {
302         static_assert(!HasNoDestructor<T>, "This is POD. Use makePODArray.");
303         T* array = reinterpret_cast<T*>(fAlloc.template allocateBytesFor<T>(n));
304         for (int i = 0; i < n; i++) {
305             new (&array[i]) T(initializer(i));
306         }
307         return std::unique_ptr<T[], ArrayDestroyer>{array, ArrayDestroyer{n}};
308     }
309 
310     template<typename T, typename U, typename Map>
makeUniqueArray(SkSpan<const U> src,Map map)311     std::unique_ptr<T[], ArrayDestroyer> makeUniqueArray(SkSpan<const U> src, Map map) {
312         static_assert(!HasNoDestructor<T>, "This is POD. Use makePODArray.");
313         int count = SkCount(src);
314         T* array = reinterpret_cast<T*>(fAlloc.template allocateBytesFor<T>(src.size()));
315         for (int i = 0; i < count; ++i) {
316             new (&array[i]) T(map(src[i]));
317         }
318         return std::unique_ptr<T[], ArrayDestroyer>{array, ArrayDestroyer{count}};
319     }
320 
321     void* alignedBytes(int size, int alignment);
322 
323 private:
324     BagOfBytes fAlloc;
325 };
326 
327 // Helper for defining allocators with inline/reserved storage.
328 // For argument declarations, stick to the base type (SubRunAllocator).
329 // Note: Inheriting from the storage first means the storage will outlive the
330 // SubRunAllocator, letting ~SubRunAllocator read it as it calls destructors.
331 // (This is mostly only relevant for strict tools like MSAN.)
332 template <size_t InlineStorageSize, size_t InlineStorageAlignment>
333 class STSubRunAllocator : private std::array<char,
334                                              BagOfBytes::PlatformMinimumSizeWithOverhead(
335                                                      InlineStorageSize, InlineStorageAlignment)>,
336                           public SubRunAllocator {
337 public:
338     explicit STSubRunAllocator(size_t firstHeapAllocation = InlineStorageSize)
339             : SubRunAllocator{this->data(), SkToInt(this->size()), SkToInt(firstHeapAllocation)} {}
340 };
341 }  // namespace sktext::gpu
342 
343 #endif // sktext_gpu_SubRunAllocator_DEFINED
344