1*da0073e9SAndroid Build Coastguard Worker #pragma once 2*da0073e9SAndroid Build Coastguard Worker 3*da0073e9SAndroid Build Coastguard Worker #include <cstddef> 4*da0073e9SAndroid Build Coastguard Worker #include <mutex> 5*da0073e9SAndroid Build Coastguard Worker 6*da0073e9SAndroid Build Coastguard Worker #include <c10/macros/Export.h> 7*da0073e9SAndroid Build Coastguard Worker #include <c10/util/SmallVector.h> 8*da0073e9SAndroid Build Coastguard Worker #include <c10/util/flat_hash_map.h> 9*da0073e9SAndroid Build Coastguard Worker 10*da0073e9SAndroid Build Coastguard Worker /* 11*da0073e9SAndroid Build Coastguard Worker * CPUCachingAllocator: 12*da0073e9SAndroid Build Coastguard Worker * DISCLAIMER: 13*da0073e9SAndroid Build Coastguard Worker * This is subject to change (beta) and only supported on mobile builds. 14*da0073e9SAndroid Build Coastguard Worker * If code snippet such as in 'Usage pattern' is used outside of mobile 15*da0073e9SAndroid Build Coastguard Worker * build you will not observe the intended behavior. 16*da0073e9SAndroid Build Coastguard Worker * See below for more information. 17*da0073e9SAndroid Build Coastguard Worker * Why? 18*da0073e9SAndroid Build Coastguard Worker * It has been observed that some mobile platforms, such as pixel 3, return 19*da0073e9SAndroid Build Coastguard Worker * memory aggressively to the system. This results in page faults in some 20*da0073e9SAndroid Build Coastguard Worker * cases and ends up hurting performance. This caching allocator aims to address 21*da0073e9SAndroid Build Coastguard Worker * that. Furthermore it also allows users to specify their own allocator by 22*da0073e9SAndroid Build Coastguard Worker * implementing allocate/free virtual interfaces. What are the cons? There are 23*da0073e9SAndroid Build Coastguard Worker * some cons that were observed where use of caching allocator led to worse 24*da0073e9SAndroid Build Coastguard Worker * performance on some platforms. Reason being that the caching mechanism used 25*da0073e9SAndroid Build Coastguard Worker * by this allocator left us worse off compared to the corresponding platform's 26*da0073e9SAndroid Build Coastguard Worker * tuned memory allocator. In that case it seemed better to not use this 27*da0073e9SAndroid Build Coastguard Worker * allocator. Note there are some ideas to fix this in the works. 28*da0073e9SAndroid Build Coastguard Worker * 29*da0073e9SAndroid Build Coastguard Worker * Usage: 30*da0073e9SAndroid Build Coastguard Worker * Usage pattern: 31*da0073e9SAndroid Build Coastguard Worker * Instantiate and own the caching allocator. 32*da0073e9SAndroid Build Coastguard Worker * std::unique_ptr<c10::CPUCachingAllocator> caching_allocator = 33*da0073e9SAndroid Build Coastguard Worker * std::make_unique<c10::CPUCachingAllocator>(); 34*da0073e9SAndroid Build Coastguard Worker * Use caching allocator with a scoped guard at inference time. 35*da0073e9SAndroid Build Coastguard Worker * { 36*da0073e9SAndroid Build Coastguard Worker * WithCPUCachingAllocatorGuard(caching_allocator.get()); 37*da0073e9SAndroid Build Coastguard Worker * ... model.forward(...); 38*da0073e9SAndroid Build Coastguard Worker * } 39*da0073e9SAndroid Build Coastguard Worker */ 40*da0073e9SAndroid Build Coastguard Worker 41*da0073e9SAndroid Build Coastguard Worker namespace c10 { 42*da0073e9SAndroid Build Coastguard Worker 43*da0073e9SAndroid Build Coastguard Worker class C10_API CPUCachingAllocator { 44*da0073e9SAndroid Build Coastguard Worker /* 45*da0073e9SAndroid Build Coastguard Worker * What it does: 46*da0073e9SAndroid Build Coastguard Worker * Caches all the allocations carried out by this allocator. 47*da0073e9SAndroid Build Coastguard Worker * Cache key is the size of the allocation. 48*da0073e9SAndroid Build Coastguard Worker * If requested size is found in the cache returns the cached pointer. 49*da0073e9SAndroid Build Coastguard Worker * What it does not do: 50*da0073e9SAndroid Build Coastguard Worker * No speculative allocation for any future allocations. 51*da0073e9SAndroid Build Coastguard Worker */ 52*da0073e9SAndroid Build Coastguard Worker private: 53*da0073e9SAndroid Build Coastguard Worker inline void* allocate_and_cache(const size_t bytes); 54*da0073e9SAndroid Build Coastguard Worker void free_cached(); 55*da0073e9SAndroid Build Coastguard Worker 56*da0073e9SAndroid Build Coastguard Worker protected: 57*da0073e9SAndroid Build Coastguard Worker // Invariants. 58*da0073e9SAndroid Build Coastguard Worker // 1. If memory is ever allocated via this allocator then 59*da0073e9SAndroid Build Coastguard Worker // the pointer will exist in allocation_map_, unless the allocator 60*da0073e9SAndroid Build Coastguard Worker // returned the memory to OS via free_cached. 61*da0073e9SAndroid Build Coastguard Worker // 1.1. Therefore even when the said memory is "freed" via this 62*da0073e9SAndroid Build Coastguard Worker // allocator (and thus cached), it will continue to stay 63*da0073e9SAndroid Build Coastguard Worker // in allocation_map_. Furthermore it will also exist in 64*da0073e9SAndroid Build Coastguard Worker // available_map_. Thus an allocated memory pointer can be in both 65*da0073e9SAndroid Build Coastguard Worker // allocation_map_ and available_map_ simultaneously. 66*da0073e9SAndroid Build Coastguard Worker // 2. Memory pointer maybe removed from allocation_map_, when it 67*da0073e9SAndroid Build Coastguard Worker // is freed outside of the scope of this allocator, but was allocated 68*da0073e9SAndroid Build Coastguard Worker // by this allocator. 69*da0073e9SAndroid Build Coastguard Worker // 3. Available map only contains that memory which was allocated 70*da0073e9SAndroid Build Coastguard Worker // by this allocator and subsequently freed by this allocator. 71*da0073e9SAndroid Build Coastguard Worker // As a result of above invariants, allocated memory ptr cannot be in 72*da0073e9SAndroid Build Coastguard Worker // available_map_ unless it is in allocation_map_ as well. 73*da0073e9SAndroid Build Coastguard Worker ska::flat_hash_map<size_t, c10::SmallVector<void*, 16>> available_map_; 74*da0073e9SAndroid Build Coastguard Worker static ska::flat_hash_map<void*, size_t> allocation_map_; 75*da0073e9SAndroid Build Coastguard Worker // Since allocation_map, which is a global instance, is mutated/read via 76*da0073e9SAndroid Build Coastguard Worker // all public APIs we need a global mutex. 77*da0073e9SAndroid Build Coastguard Worker static std::mutex mutex_; 78*da0073e9SAndroid Build Coastguard Worker 79*da0073e9SAndroid Build Coastguard Worker public: 80*da0073e9SAndroid Build Coastguard Worker static void record_free(void* ptr); 81*da0073e9SAndroid Build Coastguard Worker virtual ~CPUCachingAllocator(); 82*da0073e9SAndroid Build Coastguard Worker // Checks the cache to see if allocation of size bytes can be found. 83*da0073e9SAndroid Build Coastguard Worker // If so return cached memory, else 84*da0073e9SAndroid Build Coastguard Worker // allocates memory, records it for caching and returns. 85*da0073e9SAndroid Build Coastguard Worker virtual void* allocate(const size_t bytes); 86*da0073e9SAndroid Build Coastguard Worker // Checks if the memory being freed is was marked for allocation by 87*da0073e9SAndroid Build Coastguard Worker // an earlier call to allocate. If so cache the allocation. 88*da0073e9SAndroid Build Coastguard Worker // Otherwise free. 89*da0073e9SAndroid Build Coastguard Worker virtual void free(void* ptr); 90*da0073e9SAndroid Build Coastguard Worker }; 91*da0073e9SAndroid Build Coastguard Worker 92*da0073e9SAndroid Build Coastguard Worker CPUCachingAllocator* GetDefaultCPUCachingAllocator(); 93*da0073e9SAndroid Build Coastguard Worker 94*da0073e9SAndroid Build Coastguard Worker bool ThreadLocalCachingAllocatorEnabled(); 95*da0073e9SAndroid Build Coastguard Worker CPUCachingAllocator* GetThreadLocalCachingAllocator(); 96*da0073e9SAndroid Build Coastguard Worker 97*da0073e9SAndroid Build Coastguard Worker class C10_API WithCPUCachingAllocatorGuard { 98*da0073e9SAndroid Build Coastguard Worker public: 99*da0073e9SAndroid Build Coastguard Worker WithCPUCachingAllocatorGuard(CPUCachingAllocator* allocator); 100*da0073e9SAndroid Build Coastguard Worker ~WithCPUCachingAllocatorGuard(); 101*da0073e9SAndroid Build Coastguard Worker 102*da0073e9SAndroid Build Coastguard Worker private: 103*da0073e9SAndroid Build Coastguard Worker CPUCachingAllocator* prev_caching_allocator_ptr_{nullptr}; 104*da0073e9SAndroid Build Coastguard Worker }; 105*da0073e9SAndroid Build Coastguard Worker 106*da0073e9SAndroid Build Coastguard Worker } // namespace c10 107