1 /* 2 * Copyright 2021 Google LLC 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #ifndef sktext_gpu_SubRunAllocator_DEFINED 9 #define sktext_gpu_SubRunAllocator_DEFINED 10 11 #include "include/core/SkSpan.h" 12 #include "include/private/base/SkAssert.h" 13 #include "include/private/base/SkMath.h" 14 #include "include/private/base/SkTLogic.h" 15 #include "include/private/base/SkTemplates.h" 16 #include "include/private/base/SkTo.h" 17 #include "src/base/SkArenaAlloc.h" 18 19 #include <algorithm> 20 #include <array> 21 #include <climits> 22 #include <cstddef> 23 #include <cstdint> 24 #include <cstring> 25 #include <limits> 26 #include <memory> 27 #include <new> 28 #include <tuple> 29 #include <utility> 30 31 namespace sktext::gpu { 32 33 // BagOfBytes parcels out bytes with a given size and alignment. 34 class BagOfBytes { 35 public: 36 BagOfBytes(char* block, size_t blockSize, size_t firstHeapAllocation); 37 explicit BagOfBytes(size_t firstHeapAllocation = 0); 38 BagOfBytes(const BagOfBytes&) = delete; 39 BagOfBytes& operator=(const BagOfBytes&) = delete; BagOfBytes(BagOfBytes && that)40 BagOfBytes(BagOfBytes&& that) 41 : fEndByte{std::exchange(that.fEndByte, nullptr)} 42 , fCapacity{that.fCapacity} 43 , fFibProgression{that.fFibProgression} {} 44 BagOfBytes& operator=(BagOfBytes&& that) { 45 this->~BagOfBytes(); new(this)46 new (this) BagOfBytes{std::move(that)}; 47 return *this; 48 } 49 50 ~BagOfBytes(); 51 52 // Given a requestedSize round up to the smallest size that accounts for all the per block 53 // overhead and alignment. It crashes if requestedSize is negative or too big. PlatformMinimumSizeWithOverhead(int requestedSize,int assumedAlignment)54 static constexpr int PlatformMinimumSizeWithOverhead(int requestedSize, int assumedAlignment) { 55 return MinimumSizeWithOverhead( 56 requestedSize, assumedAlignment, sizeof(Block), kMaxAlignment); 57 } 58 MinimumSizeWithOverhead(int requestedSize,int assumedAlignment,int blockSize,int maxAlignment)59 static constexpr int MinimumSizeWithOverhead( 60 int requestedSize, int assumedAlignment, int blockSize, int maxAlignment) { 61 SkASSERT_RELEASE(0 <= requestedSize && requestedSize < kMaxByteSize); 62 SkASSERT_RELEASE(SkIsPow2(assumedAlignment) && SkIsPow2(maxAlignment)); 63 64 const int minAlignment = std::min(maxAlignment, assumedAlignment); 65 // There are two cases, one easy and one subtle. The easy case is when minAlignment == 66 // maxAlignment. When that happens, the term maxAlignment - minAlignment is zero, and the 67 // block will be placed at the proper alignment because alignUp is properly 68 // aligned. 69 // The subtle case is where minAlignment < maxAlignment. Because 70 // minAlignment < maxAlignment, alignUp(requestedSize, minAlignment) + blockSize does not 71 // guarantee that block can be placed at a maxAlignment address. Block can be placed at 72 // maxAlignment/minAlignment different address to achieve alignment, so we need 73 // to add memory to allow the block to be placed on a maxAlignment address. 74 // For example, if assumedAlignment = 4 and maxAlignment = 16 then block can be placed at 75 // the following address offsets at the end of minimumSize bytes. 76 // 0 * minAlignment = 0 77 // 1 * minAlignment = 4 78 // 2 * minAlignment = 8 79 // 3 * minAlignment = 12 80 // Following this logic, the equation for the additional bytes is 81 // (maxAlignment/minAlignment - 1) * minAlignment 82 // = maxAlignment - minAlignment. 83 int minimumSize = SkToInt(AlignUp(requestedSize, minAlignment)) 84 + blockSize 85 + maxAlignment - minAlignment; 86 87 // If minimumSize is > 32k then round to a 4K boundary unless it is too close to the 88 // maximum int. The > 32K heuristic is from the JEMalloc behavior. 89 constexpr int k32K = (1 << 15); 90 if (minimumSize >= k32K && minimumSize < std::numeric_limits<int>::max() - k4K) { 91 minimumSize = SkToInt(AlignUp(minimumSize, k4K)); 92 } 93 94 return minimumSize; 95 } 96 97 template <int size> 98 using Storage = std::array<char, PlatformMinimumSizeWithOverhead(size, 1)>; 99 100 // Returns true if n * sizeof(T) will fit in an allocation block. 101 template <typename T> WillCountFit(int n)102 static bool WillCountFit(int n) { 103 constexpr int kMaxN = kMaxByteSize / sizeof(T); 104 return 0 <= n && n < kMaxN; 105 } 106 107 // Returns a pointer to memory suitable for holding n Ts. 108 template <typename T> char* allocateBytesFor(int n = 1) { 109 static_assert(alignof(T) <= kMaxAlignment, "Alignment is too big for arena"); 110 static_assert(sizeof(T) < kMaxByteSize, "Size is too big for arena"); 111 SkASSERT_RELEASE(WillCountFit<T>(n)); 112 113 int size = n ? n * sizeof(T) : 1; 114 return this->allocateBytes(size, alignof(T)); 115 } 116 117 void* alignedBytes(int unsafeSize, int unsafeAlignment); 118 119 private: 120 // The maximum alignment supported by GrBagOfBytes. 16 seems to be a good number for alignment. 121 // If a use case for larger alignments is found, we can turn this into a template parameter. 122 inline static constexpr int kMaxAlignment = std::max(16, (int)alignof(std::max_align_t)); 123 // The largest size that can be allocated. In larger sizes, the block is rounded up to 4K 124 // chunks. Leave a 4K of slop. 125 inline static constexpr int k4K = (1 << 12); 126 // This should never overflow with the calculations done on the code. 127 inline static constexpr int kMaxByteSize = std::numeric_limits<int>::max() - k4K; 128 // The assumed alignment of new char[] given the platform. 129 // There is a bug in Emscripten's allocator that make alignment different than max_align_t. 130 // kAllocationAlignment accounts for this difference. For more information see: 131 // https://github.com/emscripten-core/emscripten/issues/10072 132 #if !defined(SK_FORCE_8_BYTE_ALIGNMENT) 133 static constexpr int kAllocationAlignment = alignof(std::max_align_t); 134 #else 135 static constexpr int kAllocationAlignment = 8; 136 #endif 137 AlignUp(int size,int alignment)138 static constexpr size_t AlignUp(int size, int alignment) { 139 return (size + (alignment - 1)) & -alignment; 140 } 141 142 // The Block starts at the location pointed to by fEndByte. 143 // Beware. Order is important here. The destructor for fPrevious must be called first because 144 // the Block is embedded in fBlockStart. Destructors are run in reverse order. 145 struct Block { 146 Block(char* previous, char* startOfBlock); 147 // The start of the originally allocated bytes. This is the thing that must be deleted. 148 char* const fBlockStart; 149 Block* const fPrevious; 150 }; 151 152 // Note: fCapacity is the number of bytes remaining, and is subtracted from fEndByte to 153 // generate the location of the object. allocateBytes(int size,int alignment)154 char* allocateBytes(int size, int alignment) { 155 fCapacity = fCapacity & -alignment; 156 if (fCapacity < size) { 157 this->needMoreBytes(size, alignment); 158 } 159 char* const ptr = fEndByte - fCapacity; 160 SkASSERT(((intptr_t)ptr & (alignment - 1)) == 0); 161 SkASSERT(fCapacity >= size); 162 fCapacity -= size; 163 return ptr; 164 } 165 166 // Adjust fEndByte and fCapacity give a new block starting at bytes with size. 167 void setupBytesAndCapacity(char* bytes, int size); 168 169 // Adjust fEndByte and fCapacity to satisfy the size and alignment request. 170 void needMoreBytes(int size, int alignment); 171 172 // This points to the highest kMaxAlignment address in the allocated block. The address of 173 // the current end of allocated data is given by fEndByte - fCapacity. While the negative side 174 // of this pointer are the bytes to be allocated. The positive side points to the Block for 175 // this memory. In other words, the pointer to the Block structure for these bytes is 176 // reinterpret_cast<Block*>(fEndByte). 177 char* fEndByte{nullptr}; 178 179 // The number of bytes remaining in this block. 180 int fCapacity{0}; 181 182 SkFibBlockSizes<kMaxByteSize> fFibProgression; 183 }; 184 185 template <typename T> 186 class SubRunInitializer { 187 public: SubRunInitializer(void * memory)188 SubRunInitializer(void* memory) : fMemory{memory} { SkASSERT(memory != nullptr); } ~SubRunInitializer()189 ~SubRunInitializer() { 190 ::operator delete(fMemory); 191 } 192 template <typename... Args> initialize(Args &&...args)193 T* initialize(Args&&... args) { 194 // Warn on more than one initialization. 195 SkASSERT(fMemory != nullptr); 196 return new (std::exchange(fMemory, nullptr)) T(std::forward<Args>(args)...); 197 } 198 199 private: 200 void* fMemory; 201 }; 202 203 // GrSubRunAllocator provides fast allocation where the user takes care of calling the destructors 204 // of the returned pointers, and GrSubRunAllocator takes care of deleting the storage. The 205 // unique_ptrs returned, are to assist in assuring the object's destructor is called. 206 // A note on zero length arrays: according to the standard a pointer must be returned, and it 207 // can't be a nullptr. In such a case, SkArena allocates one byte, but does not initialize it. 208 class SubRunAllocator { 209 public: 210 struct Destroyer { 211 template <typename T> operatorDestroyer212 void operator()(T* ptr) { ptr->~T(); } 213 }; 214 215 struct ArrayDestroyer { 216 int n; 217 template <typename T> operatorArrayDestroyer218 void operator()(T* ptr) { 219 for (int i = 0; i < n; i++) { ptr[i].~T(); } 220 } 221 }; 222 223 template<class T> 224 inline static constexpr bool HasNoDestructor = std::is_trivially_destructible<T>::value; 225 226 SubRunAllocator(char* block, int blockSize, int firstHeapAllocation); 227 explicit SubRunAllocator(int firstHeapAllocation = 0); 228 SubRunAllocator(const SubRunAllocator&) = delete; 229 SubRunAllocator& operator=(const SubRunAllocator&) = delete; 230 SubRunAllocator(SubRunAllocator&&) = default; 231 SubRunAllocator& operator=(SubRunAllocator&&) = default; 232 233 template <typename T> 234 static std::tuple<SubRunInitializer<T>, int, SubRunAllocator> AllocateClassMemoryAndArena(int allocSizeHint)235 AllocateClassMemoryAndArena(int allocSizeHint) { 236 SkASSERT_RELEASE(allocSizeHint >= 0); 237 // Round the size after the object the optimal amount. 238 int extraSize = BagOfBytes::PlatformMinimumSizeWithOverhead(allocSizeHint, alignof(T)); 239 240 // Don't overflow or die. 241 SkASSERT_RELEASE(INT_MAX - SkTo<int>(sizeof(T)) > extraSize); 242 int totalMemorySize = sizeof(T) + extraSize; 243 244 void* memory = ::operator new (totalMemorySize); 245 SubRunAllocator alloc{SkTAddOffset<char>(memory, sizeof(T)), extraSize, extraSize/2}; 246 return {memory, totalMemorySize, std::move(alloc)}; 247 } 248 makePOD(Args &&...args)249 template <typename T, typename... Args> T* makePOD(Args&&... args) { 250 static_assert(HasNoDestructor<T>, "This is not POD. Use makeUnique."); 251 char* bytes = fAlloc.template allocateBytesFor<T>(); 252 return new (bytes) T(std::forward<Args>(args)...); 253 } 254 255 template <typename T, typename... Args> makeUnique(Args &&...args)256 std::unique_ptr<T, Destroyer> makeUnique(Args&&... args) { 257 static_assert(!HasNoDestructor<T>, "This is POD. Use makePOD."); 258 char* bytes = fAlloc.template allocateBytesFor<T>(); 259 return std::unique_ptr<T, Destroyer>{new (bytes) T(std::forward<Args>(args)...)}; 260 } 261 makePODArray(int n)262 template<typename T> T* makePODArray(int n) { 263 static_assert(HasNoDestructor<T>, "This is not POD. Use makeUniqueArray."); 264 return reinterpret_cast<T*>(fAlloc.template allocateBytesFor<T>(n)); 265 } 266 267 template<typename T> makePODSpan(SkSpan<const T> s)268 SkSpan<T> makePODSpan(SkSpan<const T> s) { 269 static_assert(HasNoDestructor<T>, "This is not POD. Use makeUniqueArray."); 270 if (s.empty()) { 271 return SkSpan<T>{}; 272 } 273 274 T* result = this->makePODArray<T>(SkTo<int>(s.size())); 275 memcpy(result, s.data(), s.size_bytes()); 276 return {result, s.size()}; 277 } 278 279 template<typename T, typename Src, typename Map> makePODArray(const Src & src,Map map)280 SkSpan<T> makePODArray(const Src& src, Map map) { 281 static_assert(HasNoDestructor<T>, "This is not POD. Use makeUniqueArray."); 282 int size = SkTo<int>(src.size()); 283 T* result = this->template makePODArray<T>(size); 284 for (int i = 0; i < size; i++) { 285 new (&result[i]) T(map(src[i])); 286 } 287 return {result, src.size()}; 288 } 289 290 template<typename T> makeUniqueArray(int n)291 std::unique_ptr<T[], ArrayDestroyer> makeUniqueArray(int n) { 292 static_assert(!HasNoDestructor<T>, "This is POD. Use makePODArray."); 293 T* array = reinterpret_cast<T*>(fAlloc.template allocateBytesFor<T>(n)); 294 for (int i = 0; i < n; i++) { 295 new (&array[i]) T{}; 296 } 297 return std::unique_ptr<T[], ArrayDestroyer>{array, ArrayDestroyer{n}}; 298 } 299 300 template<typename T, typename I> makeUniqueArray(int n,I initializer)301 std::unique_ptr<T[], ArrayDestroyer> makeUniqueArray(int n, I initializer) { 302 static_assert(!HasNoDestructor<T>, "This is POD. Use makePODArray."); 303 T* array = reinterpret_cast<T*>(fAlloc.template allocateBytesFor<T>(n)); 304 for (int i = 0; i < n; i++) { 305 new (&array[i]) T(initializer(i)); 306 } 307 return std::unique_ptr<T[], ArrayDestroyer>{array, ArrayDestroyer{n}}; 308 } 309 310 template<typename T, typename U, typename Map> makeUniqueArray(SkSpan<const U> src,Map map)311 std::unique_ptr<T[], ArrayDestroyer> makeUniqueArray(SkSpan<const U> src, Map map) { 312 static_assert(!HasNoDestructor<T>, "This is POD. Use makePODArray."); 313 int count = SkCount(src); 314 T* array = reinterpret_cast<T*>(fAlloc.template allocateBytesFor<T>(src.size())); 315 for (int i = 0; i < count; ++i) { 316 new (&array[i]) T(map(src[i])); 317 } 318 return std::unique_ptr<T[], ArrayDestroyer>{array, ArrayDestroyer{count}}; 319 } 320 321 void* alignedBytes(int size, int alignment); 322 323 private: 324 BagOfBytes fAlloc; 325 }; 326 327 // Helper for defining allocators with inline/reserved storage. 328 // For argument declarations, stick to the base type (SubRunAllocator). 329 // Note: Inheriting from the storage first means the storage will outlive the 330 // SubRunAllocator, letting ~SubRunAllocator read it as it calls destructors. 331 // (This is mostly only relevant for strict tools like MSAN.) 332 template <size_t InlineStorageSize, size_t InlineStorageAlignment> 333 class STSubRunAllocator : private std::array<char, 334 BagOfBytes::PlatformMinimumSizeWithOverhead( 335 InlineStorageSize, InlineStorageAlignment)>, 336 public SubRunAllocator { 337 public: 338 explicit STSubRunAllocator(size_t firstHeapAllocation = InlineStorageSize) 339 : SubRunAllocator{this->data(), SkToInt(this->size()), SkToInt(firstHeapAllocation)} {} 340 }; 341 } // namespace sktext::gpu 342 343 #endif // sktext_gpu_SubRunAllocator_DEFINED 344