1 #pragma once 2 3 #include <cstddef> 4 #include <mutex> 5 6 #include <c10/macros/Export.h> 7 #include <c10/util/SmallVector.h> 8 #include <c10/util/flat_hash_map.h> 9 10 /* 11 * CPUCachingAllocator: 12 * DISCLAIMER: 13 * This is subject to change (beta) and only supported on mobile builds. 14 * If code snippet such as in 'Usage pattern' is used outside of mobile 15 * build you will not observe the intended behavior. 16 * See below for more information. 17 * Why? 18 * It has been observed that some mobile platforms, such as pixel 3, return 19 * memory aggressively to the system. This results in page faults in some 20 * cases and ends up hurting performance. This caching allocator aims to address 21 * that. Furthermore it also allows users to specify their own allocator by 22 * implementing allocate/free virtual interfaces. What are the cons? There are 23 * some cons that were observed where use of caching allocator led to worse 24 * performance on some platforms. Reason being that the caching mechanism used 25 * by this allocator left us worse off compared to the corresponding platform's 26 * tuned memory allocator. In that case it seemed better to not use this 27 * allocator. Note there are some ideas to fix this in the works. 28 * 29 * Usage: 30 * Usage pattern: 31 * Instantiate and own the caching allocator. 32 * std::unique_ptr<c10::CPUCachingAllocator> caching_allocator = 33 * std::make_unique<c10::CPUCachingAllocator>(); 34 * Use caching allocator with a scoped guard at inference time. 35 * { 36 * WithCPUCachingAllocatorGuard(caching_allocator.get()); 37 * ... model.forward(...); 38 * } 39 */ 40 41 namespace c10 { 42 43 class C10_API CPUCachingAllocator { 44 /* 45 * What it does: 46 * Caches all the allocations carried out by this allocator. 47 * Cache key is the size of the allocation. 48 * If requested size is found in the cache returns the cached pointer. 49 * What it does not do: 50 * No speculative allocation for any future allocations. 51 */ 52 private: 53 inline void* allocate_and_cache(const size_t bytes); 54 void free_cached(); 55 56 protected: 57 // Invariants. 58 // 1. If memory is ever allocated via this allocator then 59 // the pointer will exist in allocation_map_, unless the allocator 60 // returned the memory to OS via free_cached. 61 // 1.1. Therefore even when the said memory is "freed" via this 62 // allocator (and thus cached), it will continue to stay 63 // in allocation_map_. Furthermore it will also exist in 64 // available_map_. Thus an allocated memory pointer can be in both 65 // allocation_map_ and available_map_ simultaneously. 66 // 2. Memory pointer maybe removed from allocation_map_, when it 67 // is freed outside of the scope of this allocator, but was allocated 68 // by this allocator. 69 // 3. Available map only contains that memory which was allocated 70 // by this allocator and subsequently freed by this allocator. 71 // As a result of above invariants, allocated memory ptr cannot be in 72 // available_map_ unless it is in allocation_map_ as well. 73 ska::flat_hash_map<size_t, c10::SmallVector<void*, 16>> available_map_; 74 static ska::flat_hash_map<void*, size_t> allocation_map_; 75 // Since allocation_map, which is a global instance, is mutated/read via 76 // all public APIs we need a global mutex. 77 static std::mutex mutex_; 78 79 public: 80 static void record_free(void* ptr); 81 virtual ~CPUCachingAllocator(); 82 // Checks the cache to see if allocation of size bytes can be found. 83 // If so return cached memory, else 84 // allocates memory, records it for caching and returns. 85 virtual void* allocate(const size_t bytes); 86 // Checks if the memory being freed is was marked for allocation by 87 // an earlier call to allocate. If so cache the allocation. 88 // Otherwise free. 89 virtual void free(void* ptr); 90 }; 91 92 CPUCachingAllocator* GetDefaultCPUCachingAllocator(); 93 94 bool ThreadLocalCachingAllocatorEnabled(); 95 CPUCachingAllocator* GetThreadLocalCachingAllocator(); 96 97 class C10_API WithCPUCachingAllocatorGuard { 98 public: 99 WithCPUCachingAllocatorGuard(CPUCachingAllocator* allocator); 100 ~WithCPUCachingAllocatorGuard(); 101 102 private: 103 CPUCachingAllocator* prev_caching_allocator_ptr_{nullptr}; 104 }; 105 106 } // namespace c10 107