xref: /aosp_15_r20/external/pytorch/c10/mobile/CPUProfilingAllocator.h (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #pragma once
2 
3 #include <c10/macros/Export.h>
4 #include <c10/util/flat_hash_map.h>
5 #include <cstddef>
6 #include <cstdint>
7 #include <memory>
8 #include <vector>
9 
10 namespace c10 {
11 
12 /*
13  * Given a sequence of allocations in a thread, AllocationPlan records
14  * 1. size of each allocation
15  * 2. Lifetime of each allocation.
16  * 3. allocation offsets: Memory offset for each allocation in a single blob of
17  * memory
18  * 4. Total size of a blob of memory required to satisfy all the allocations.
19  */
20 class C10_API AllocationPlan {
21  private:
22   // Records size of each allocation by their sequential allocation ids.
23   std::vector<uint64_t> allocation_sizes;
24   // This maps one allocation id (X) to another allocation id (Y).
25   // Allocation X is alive until allocation Y. From allocation Y onwards
26   // allocation X is not referenced.
27   // Thus Y is the id of the first allocation after X is freed.
28   // NB: When an allocation is recorded, along with recording its size,
29   // we also set the lifetime to be numeric_limits::max()
30   // This is to track allocations that are made during the scope of
31   // profiling but were not freed until after the scope ended.
32   // Such allocations are not managed by profiling allocator.
33   std::vector<uint64_t> allocation_lifetimes;
34   // Maps an allocation to some offset in a blob of memory.
35   std::vector<uint64_t> allocation_offsets;
36   uint64_t total_size{0};
37   void clear();
38   friend class AllocationPlanner;
39   friend class CPUProfilingAllocator;
40 };
41 
42 /*
43  * Map of memory ptr to allocation id. This is auxiliary information only
44  * used to establish lifetime of allocations.
45  */
46 class C10_API AllocationPlanner {
47  private:
48   AllocationPlan* allocation_plan_{nullptr};
49   // Maps allocated ptr to its allocation id.
50   // This is used when freeing the memory to look up the allocation id
51   // in order to establish the lifetime of a particular allocation.
52   ska::flat_hash_map<const void*, uint64_t> allocation_ptr_to_id_;
53   uint64_t allocation_id_{0};
54   bool validation_mode_{false};
55 
56   bool validate_allocation(const uint64_t size, const void* ptr);
57   bool validate_free(const void* ptr);
58 
59  public:
60   bool validation_success{true};
61 
62   AllocationPlanner() = delete;
63   AllocationPlanner(AllocationPlan* plan, bool validate = false)
allocation_plan_(plan)64       : allocation_plan_(plan), validation_mode_(validate) {}
65   void record_allocation(const uint64_t size, const void* ptr);
66   void record_free(const void* ptr);
67   void formulate_plan();
68   void clear();
69 };
70 
71 // NOT THREAD SAFE profiling allocator.
72 class C10_API CPUProfilingAllocator {
73  private:
74   const AllocationPlan* plan_{nullptr};
75   uint64_t allocation_id_{0};
76   uint64_t current_size_{0};
77   void* blob_{nullptr};
78   ska::flat_hash_map<const void*, uint64_t> allocation_ptr_to_id_;
79 
80  public:
81   ~CPUProfilingAllocator();
82   void set_plan(const AllocationPlan* plan);
83   void unset_plan();
84   void* allocate(const size_t bytes);
85   void free(void* const ptr);
86 };
87 
88 /*
89  * Usage: Profile allocations made by one run of the model.
90  * AllocationPlan plan;
91  * {
92  *   WithProfileAllocationGuard profile_guard(&plan);
93  *   module.forward(...);
94  * }
95  * plan now contains allocation plan.
96  */
97 class C10_API WithProfileAllocationsGuard {
98  public:
99   WithProfileAllocationsGuard(AllocationPlan* plan);
100   ~WithProfileAllocationsGuard();
101 
102  private:
103   std::unique_ptr<AllocationPlanner> planner_;
104 };
105 
106 /*
107  * Usage: Validate allocation plan made with WithProfileAllocationGuard
108  * bool plan_validation_success, success = true;
109  * for (some number of representative inputs)
110  * {
111  *   WithValidateAllocationPlanGuard(&plan, &plan_validation_success);
112  *   module.forward(...);
113  *   success = success && plan_validation_success;
114  * }
115  * success == true means allocations are according to plan
116  * else for some inputs allocation pattern changed.
117  */
118 class C10_API WithValidateAllocationPlanGuard {
119  public:
120   WithValidateAllocationPlanGuard(AllocationPlan* plan, bool* success);
121   ~WithValidateAllocationPlanGuard();
122 
123  private:
124   std::unique_ptr<AllocationPlanner> planner_;
125   bool* success_;
126 };
127 
128 AllocationPlanner* GetThreadLocalAllocationPlanner();
129 
130 /*
131  * Usage: Allocate tensors accordingly to allocation plan
132  * First make allocation plan.
133  *  See WithProfileAllocationsGuard usage.
134  * Second validate allocation plan.
135  *  See WithValidateAllocationPlanGuard usage.
136  * CPUProfilingAllocator profiling_allocator;
137  * {
138  *   WithProfilingAllocatorGuard allocator_guard(&profiling_allocator, &plan);
139  *   module.forward(...);
140  * }
141  */
142 class C10_API WithProfilingAllocatorGuard {
143  public:
144   WithProfilingAllocatorGuard(
145       CPUProfilingAllocator* allocator,
146       const AllocationPlan* plan);
147   ~WithProfilingAllocatorGuard();
148 };
149 
150 CPUProfilingAllocator* GetThreadLocalProfilingAllocator();
151 
152 } // namespace c10
153