1 #pragma once 2 3 #include <c10/macros/Export.h> 4 #include <c10/util/flat_hash_map.h> 5 #include <cstddef> 6 #include <cstdint> 7 #include <memory> 8 #include <vector> 9 10 namespace c10 { 11 12 /* 13 * Given a sequence of allocations in a thread, AllocationPlan records 14 * 1. size of each allocation 15 * 2. Lifetime of each allocation. 16 * 3. allocation offsets: Memory offset for each allocation in a single blob of 17 * memory 18 * 4. Total size of a blob of memory required to satisfy all the allocations. 19 */ 20 class C10_API AllocationPlan { 21 private: 22 // Records size of each allocation by their sequential allocation ids. 23 std::vector<uint64_t> allocation_sizes; 24 // This maps one allocation id (X) to another allocation id (Y). 25 // Allocation X is alive until allocation Y. From allocation Y onwards 26 // allocation X is not referenced. 27 // Thus Y is the id of the first allocation after X is freed. 28 // NB: When an allocation is recorded, along with recording its size, 29 // we also set the lifetime to be numeric_limits::max() 30 // This is to track allocations that are made during the scope of 31 // profiling but were not freed until after the scope ended. 32 // Such allocations are not managed by profiling allocator. 33 std::vector<uint64_t> allocation_lifetimes; 34 // Maps an allocation to some offset in a blob of memory. 35 std::vector<uint64_t> allocation_offsets; 36 uint64_t total_size{0}; 37 void clear(); 38 friend class AllocationPlanner; 39 friend class CPUProfilingAllocator; 40 }; 41 42 /* 43 * Map of memory ptr to allocation id. This is auxiliary information only 44 * used to establish lifetime of allocations. 45 */ 46 class C10_API AllocationPlanner { 47 private: 48 AllocationPlan* allocation_plan_{nullptr}; 49 // Maps allocated ptr to its allocation id. 50 // This is used when freeing the memory to look up the allocation id 51 // in order to establish the lifetime of a particular allocation. 52 ska::flat_hash_map<const void*, uint64_t> allocation_ptr_to_id_; 53 uint64_t allocation_id_{0}; 54 bool validation_mode_{false}; 55 56 bool validate_allocation(const uint64_t size, const void* ptr); 57 bool validate_free(const void* ptr); 58 59 public: 60 bool validation_success{true}; 61 62 AllocationPlanner() = delete; 63 AllocationPlanner(AllocationPlan* plan, bool validate = false) allocation_plan_(plan)64 : allocation_plan_(plan), validation_mode_(validate) {} 65 void record_allocation(const uint64_t size, const void* ptr); 66 void record_free(const void* ptr); 67 void formulate_plan(); 68 void clear(); 69 }; 70 71 // NOT THREAD SAFE profiling allocator. 72 class C10_API CPUProfilingAllocator { 73 private: 74 const AllocationPlan* plan_{nullptr}; 75 uint64_t allocation_id_{0}; 76 uint64_t current_size_{0}; 77 void* blob_{nullptr}; 78 ska::flat_hash_map<const void*, uint64_t> allocation_ptr_to_id_; 79 80 public: 81 ~CPUProfilingAllocator(); 82 void set_plan(const AllocationPlan* plan); 83 void unset_plan(); 84 void* allocate(const size_t bytes); 85 void free(void* const ptr); 86 }; 87 88 /* 89 * Usage: Profile allocations made by one run of the model. 90 * AllocationPlan plan; 91 * { 92 * WithProfileAllocationGuard profile_guard(&plan); 93 * module.forward(...); 94 * } 95 * plan now contains allocation plan. 96 */ 97 class C10_API WithProfileAllocationsGuard { 98 public: 99 WithProfileAllocationsGuard(AllocationPlan* plan); 100 ~WithProfileAllocationsGuard(); 101 102 private: 103 std::unique_ptr<AllocationPlanner> planner_; 104 }; 105 106 /* 107 * Usage: Validate allocation plan made with WithProfileAllocationGuard 108 * bool plan_validation_success, success = true; 109 * for (some number of representative inputs) 110 * { 111 * WithValidateAllocationPlanGuard(&plan, &plan_validation_success); 112 * module.forward(...); 113 * success = success && plan_validation_success; 114 * } 115 * success == true means allocations are according to plan 116 * else for some inputs allocation pattern changed. 117 */ 118 class C10_API WithValidateAllocationPlanGuard { 119 public: 120 WithValidateAllocationPlanGuard(AllocationPlan* plan, bool* success); 121 ~WithValidateAllocationPlanGuard(); 122 123 private: 124 std::unique_ptr<AllocationPlanner> planner_; 125 bool* success_; 126 }; 127 128 AllocationPlanner* GetThreadLocalAllocationPlanner(); 129 130 /* 131 * Usage: Allocate tensors accordingly to allocation plan 132 * First make allocation plan. 133 * See WithProfileAllocationsGuard usage. 134 * Second validate allocation plan. 135 * See WithValidateAllocationPlanGuard usage. 136 * CPUProfilingAllocator profiling_allocator; 137 * { 138 * WithProfilingAllocatorGuard allocator_guard(&profiling_allocator, &plan); 139 * module.forward(...); 140 * } 141 */ 142 class C10_API WithProfilingAllocatorGuard { 143 public: 144 WithProfilingAllocatorGuard( 145 CPUProfilingAllocator* allocator, 146 const AllocationPlan* plan); 147 ~WithProfilingAllocatorGuard(); 148 }; 149 150 CPUProfilingAllocator* GetThreadLocalProfilingAllocator(); 151 152 } // namespace c10 153