xref: /aosp_15_r20/external/pytorch/c10/mobile/CPUCachingAllocator.h (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1*da0073e9SAndroid Build Coastguard Worker #pragma once
2*da0073e9SAndroid Build Coastguard Worker 
3*da0073e9SAndroid Build Coastguard Worker #include <cstddef>
4*da0073e9SAndroid Build Coastguard Worker #include <mutex>
5*da0073e9SAndroid Build Coastguard Worker 
6*da0073e9SAndroid Build Coastguard Worker #include <c10/macros/Export.h>
7*da0073e9SAndroid Build Coastguard Worker #include <c10/util/SmallVector.h>
8*da0073e9SAndroid Build Coastguard Worker #include <c10/util/flat_hash_map.h>
9*da0073e9SAndroid Build Coastguard Worker 
10*da0073e9SAndroid Build Coastguard Worker /*
11*da0073e9SAndroid Build Coastguard Worker  * CPUCachingAllocator:
12*da0073e9SAndroid Build Coastguard Worker  * DISCLAIMER:
13*da0073e9SAndroid Build Coastguard Worker  *    This is subject to change (beta) and only supported on mobile builds.
14*da0073e9SAndroid Build Coastguard Worker  *    If code snippet such as in 'Usage pattern' is used outside of mobile
15*da0073e9SAndroid Build Coastguard Worker  *    build you will not observe the intended behavior.
16*da0073e9SAndroid Build Coastguard Worker  *    See below for more information.
17*da0073e9SAndroid Build Coastguard Worker  * Why?
18*da0073e9SAndroid Build Coastguard Worker  *    It has been observed that some mobile platforms, such as pixel 3, return
19*da0073e9SAndroid Build Coastguard Worker  *    memory aggressively to the system. This results in page faults in some
20*da0073e9SAndroid Build Coastguard Worker  * cases and ends up hurting performance. This caching allocator aims to address
21*da0073e9SAndroid Build Coastguard Worker  * that. Furthermore it also allows users to specify their own allocator by
22*da0073e9SAndroid Build Coastguard Worker  * implementing allocate/free virtual interfaces. What are the cons? There are
23*da0073e9SAndroid Build Coastguard Worker  * some cons that were observed where use of caching allocator led to worse
24*da0073e9SAndroid Build Coastguard Worker  * performance on some platforms. Reason being that the caching mechanism used
25*da0073e9SAndroid Build Coastguard Worker  * by this allocator left us worse off compared to the corresponding platform's
26*da0073e9SAndroid Build Coastguard Worker  *    tuned memory allocator. In that case it seemed better to not use this
27*da0073e9SAndroid Build Coastguard Worker  * allocator. Note there are some ideas to fix this in the works.
28*da0073e9SAndroid Build Coastguard Worker  *
29*da0073e9SAndroid Build Coastguard Worker  * Usage:
30*da0073e9SAndroid Build Coastguard Worker  * Usage pattern:
31*da0073e9SAndroid Build Coastguard Worker  * Instantiate and own the caching allocator.
32*da0073e9SAndroid Build Coastguard Worker  * std::unique_ptr<c10::CPUCachingAllocator> caching_allocator =
33*da0073e9SAndroid Build Coastguard Worker  *   std::make_unique<c10::CPUCachingAllocator>();
34*da0073e9SAndroid Build Coastguard Worker  * Use caching allocator with a scoped guard at inference time.
35*da0073e9SAndroid Build Coastguard Worker  * {
36*da0073e9SAndroid Build Coastguard Worker  * WithCPUCachingAllocatorGuard(caching_allocator.get());
37*da0073e9SAndroid Build Coastguard Worker  * ... model.forward(...);
38*da0073e9SAndroid Build Coastguard Worker  * }
39*da0073e9SAndroid Build Coastguard Worker  */
40*da0073e9SAndroid Build Coastguard Worker 
41*da0073e9SAndroid Build Coastguard Worker namespace c10 {
42*da0073e9SAndroid Build Coastguard Worker 
43*da0073e9SAndroid Build Coastguard Worker class C10_API CPUCachingAllocator {
44*da0073e9SAndroid Build Coastguard Worker   /*
45*da0073e9SAndroid Build Coastguard Worker    * What it does:
46*da0073e9SAndroid Build Coastguard Worker    * Caches all the allocations carried out by this allocator.
47*da0073e9SAndroid Build Coastguard Worker    * Cache key is the size of the allocation.
48*da0073e9SAndroid Build Coastguard Worker    * If requested size is found in the cache returns the cached pointer.
49*da0073e9SAndroid Build Coastguard Worker    * What it does not do:
50*da0073e9SAndroid Build Coastguard Worker    * No speculative allocation for any future allocations.
51*da0073e9SAndroid Build Coastguard Worker    */
52*da0073e9SAndroid Build Coastguard Worker  private:
53*da0073e9SAndroid Build Coastguard Worker   inline void* allocate_and_cache(const size_t bytes);
54*da0073e9SAndroid Build Coastguard Worker   void free_cached();
55*da0073e9SAndroid Build Coastguard Worker 
56*da0073e9SAndroid Build Coastguard Worker  protected:
57*da0073e9SAndroid Build Coastguard Worker   // Invariants.
58*da0073e9SAndroid Build Coastguard Worker   // 1. If memory is ever allocated via this allocator then
59*da0073e9SAndroid Build Coastguard Worker   //    the pointer will exist in allocation_map_, unless the allocator
60*da0073e9SAndroid Build Coastguard Worker   //    returned the memory to OS via free_cached.
61*da0073e9SAndroid Build Coastguard Worker   //  1.1. Therefore even when the said memory is "freed" via this
62*da0073e9SAndroid Build Coastguard Worker   //       allocator (and thus cached), it will continue to stay
63*da0073e9SAndroid Build Coastguard Worker   //       in allocation_map_. Furthermore it will also exist in
64*da0073e9SAndroid Build Coastguard Worker   //       available_map_. Thus an allocated memory pointer can be in both
65*da0073e9SAndroid Build Coastguard Worker   //       allocation_map_ and available_map_ simultaneously.
66*da0073e9SAndroid Build Coastguard Worker   // 2. Memory pointer maybe removed from allocation_map_, when it
67*da0073e9SAndroid Build Coastguard Worker   //    is freed outside of the scope of this allocator, but was allocated
68*da0073e9SAndroid Build Coastguard Worker   //    by this allocator.
69*da0073e9SAndroid Build Coastguard Worker   // 3. Available map only contains that memory which was allocated
70*da0073e9SAndroid Build Coastguard Worker   //    by this allocator and subsequently freed by this allocator.
71*da0073e9SAndroid Build Coastguard Worker   // As a result of above invariants, allocated memory ptr cannot be in
72*da0073e9SAndroid Build Coastguard Worker   // available_map_ unless it is in allocation_map_ as well.
73*da0073e9SAndroid Build Coastguard Worker   ska::flat_hash_map<size_t, c10::SmallVector<void*, 16>> available_map_;
74*da0073e9SAndroid Build Coastguard Worker   static ska::flat_hash_map<void*, size_t> allocation_map_;
75*da0073e9SAndroid Build Coastguard Worker   // Since allocation_map, which is a global instance, is mutated/read via
76*da0073e9SAndroid Build Coastguard Worker   // all public APIs we need a global mutex.
77*da0073e9SAndroid Build Coastguard Worker   static std::mutex mutex_;
78*da0073e9SAndroid Build Coastguard Worker 
79*da0073e9SAndroid Build Coastguard Worker  public:
80*da0073e9SAndroid Build Coastguard Worker   static void record_free(void* ptr);
81*da0073e9SAndroid Build Coastguard Worker   virtual ~CPUCachingAllocator();
82*da0073e9SAndroid Build Coastguard Worker   // Checks the cache to see if allocation of size bytes can be found.
83*da0073e9SAndroid Build Coastguard Worker   // If so return cached memory, else
84*da0073e9SAndroid Build Coastguard Worker   // allocates memory, records it for caching and returns.
85*da0073e9SAndroid Build Coastguard Worker   virtual void* allocate(const size_t bytes);
86*da0073e9SAndroid Build Coastguard Worker   // Checks if the memory being freed is was marked for allocation by
87*da0073e9SAndroid Build Coastguard Worker   // an earlier call to allocate. If so cache the allocation.
88*da0073e9SAndroid Build Coastguard Worker   // Otherwise free.
89*da0073e9SAndroid Build Coastguard Worker   virtual void free(void* ptr);
90*da0073e9SAndroid Build Coastguard Worker };
91*da0073e9SAndroid Build Coastguard Worker 
92*da0073e9SAndroid Build Coastguard Worker CPUCachingAllocator* GetDefaultCPUCachingAllocator();
93*da0073e9SAndroid Build Coastguard Worker 
94*da0073e9SAndroid Build Coastguard Worker bool ThreadLocalCachingAllocatorEnabled();
95*da0073e9SAndroid Build Coastguard Worker CPUCachingAllocator* GetThreadLocalCachingAllocator();
96*da0073e9SAndroid Build Coastguard Worker 
97*da0073e9SAndroid Build Coastguard Worker class C10_API WithCPUCachingAllocatorGuard {
98*da0073e9SAndroid Build Coastguard Worker  public:
99*da0073e9SAndroid Build Coastguard Worker   WithCPUCachingAllocatorGuard(CPUCachingAllocator* allocator);
100*da0073e9SAndroid Build Coastguard Worker   ~WithCPUCachingAllocatorGuard();
101*da0073e9SAndroid Build Coastguard Worker 
102*da0073e9SAndroid Build Coastguard Worker  private:
103*da0073e9SAndroid Build Coastguard Worker   CPUCachingAllocator* prev_caching_allocator_ptr_{nullptr};
104*da0073e9SAndroid Build Coastguard Worker };
105*da0073e9SAndroid Build Coastguard Worker 
106*da0073e9SAndroid Build Coastguard Worker } // namespace c10
107