xref: /aosp_15_r20/external/pytorch/c10/mobile/CPUCachingAllocator.h (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #pragma once
2 
3 #include <cstddef>
4 #include <mutex>
5 
6 #include <c10/macros/Export.h>
7 #include <c10/util/SmallVector.h>
8 #include <c10/util/flat_hash_map.h>
9 
10 /*
11  * CPUCachingAllocator:
12  * DISCLAIMER:
13  *    This is subject to change (beta) and only supported on mobile builds.
14  *    If code snippet such as in 'Usage pattern' is used outside of mobile
15  *    build you will not observe the intended behavior.
16  *    See below for more information.
17  * Why?
18  *    It has been observed that some mobile platforms, such as pixel 3, return
19  *    memory aggressively to the system. This results in page faults in some
20  * cases and ends up hurting performance. This caching allocator aims to address
21  * that. Furthermore it also allows users to specify their own allocator by
22  * implementing allocate/free virtual interfaces. What are the cons? There are
23  * some cons that were observed where use of caching allocator led to worse
24  * performance on some platforms. Reason being that the caching mechanism used
25  * by this allocator left us worse off compared to the corresponding platform's
26  *    tuned memory allocator. In that case it seemed better to not use this
27  * allocator. Note there are some ideas to fix this in the works.
28  *
29  * Usage:
30  * Usage pattern:
31  * Instantiate and own the caching allocator.
32  * std::unique_ptr<c10::CPUCachingAllocator> caching_allocator =
33  *   std::make_unique<c10::CPUCachingAllocator>();
34  * Use caching allocator with a scoped guard at inference time.
35  * {
36  * WithCPUCachingAllocatorGuard(caching_allocator.get());
37  * ... model.forward(...);
38  * }
39  */
40 
41 namespace c10 {
42 
43 class C10_API CPUCachingAllocator {
44   /*
45    * What it does:
46    * Caches all the allocations carried out by this allocator.
47    * Cache key is the size of the allocation.
48    * If requested size is found in the cache returns the cached pointer.
49    * What it does not do:
50    * No speculative allocation for any future allocations.
51    */
52  private:
53   inline void* allocate_and_cache(const size_t bytes);
54   void free_cached();
55 
56  protected:
57   // Invariants.
58   // 1. If memory is ever allocated via this allocator then
59   //    the pointer will exist in allocation_map_, unless the allocator
60   //    returned the memory to OS via free_cached.
61   //  1.1. Therefore even when the said memory is "freed" via this
62   //       allocator (and thus cached), it will continue to stay
63   //       in allocation_map_. Furthermore it will also exist in
64   //       available_map_. Thus an allocated memory pointer can be in both
65   //       allocation_map_ and available_map_ simultaneously.
66   // 2. Memory pointer maybe removed from allocation_map_, when it
67   //    is freed outside of the scope of this allocator, but was allocated
68   //    by this allocator.
69   // 3. Available map only contains that memory which was allocated
70   //    by this allocator and subsequently freed by this allocator.
71   // As a result of above invariants, allocated memory ptr cannot be in
72   // available_map_ unless it is in allocation_map_ as well.
73   ska::flat_hash_map<size_t, c10::SmallVector<void*, 16>> available_map_;
74   static ska::flat_hash_map<void*, size_t> allocation_map_;
75   // Since allocation_map, which is a global instance, is mutated/read via
76   // all public APIs we need a global mutex.
77   static std::mutex mutex_;
78 
79  public:
80   static void record_free(void* ptr);
81   virtual ~CPUCachingAllocator();
82   // Checks the cache to see if allocation of size bytes can be found.
83   // If so return cached memory, else
84   // allocates memory, records it for caching and returns.
85   virtual void* allocate(const size_t bytes);
86   // Checks if the memory being freed is was marked for allocation by
87   // an earlier call to allocate. If so cache the allocation.
88   // Otherwise free.
89   virtual void free(void* ptr);
90 };
91 
92 CPUCachingAllocator* GetDefaultCPUCachingAllocator();
93 
94 bool ThreadLocalCachingAllocatorEnabled();
95 CPUCachingAllocator* GetThreadLocalCachingAllocator();
96 
97 class C10_API WithCPUCachingAllocatorGuard {
98  public:
99   WithCPUCachingAllocatorGuard(CPUCachingAllocator* allocator);
100   ~WithCPUCachingAllocatorGuard();
101 
102  private:
103   CPUCachingAllocator* prev_caching_allocator_ptr_{nullptr};
104 };
105 
106 } // namespace c10
107