xref: /aosp_15_r20/external/pytorch/torch/csrc/jit/mobile/profiler_edge.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #include <c10/core/Allocator.h>
2 #include <c10/util/Exception.h>
3 #include <c10/util/overloaded.h>
4 #include <torch/csrc/jit/mobile/profiler_edge.h>
5 #include <string>
6 #include <vector>
7 
8 namespace torch::jit::mobile {
9 
10 thread_local KinetoEdgeCPUProfiler* tls_edge_profiler{nullptr};
11 
KinetoEdgeCPUProfiler(const torch::jit::mobile::Module & m,const std::string & fname,const bool report_input_shapes,const bool profile_memory,const bool with_stack,const bool with_flops,const bool with_modules,std::vector<std::string> events,const bool adjust_vulkan_timestamps)12 KinetoEdgeCPUProfiler::KinetoEdgeCPUProfiler(
13     const torch::jit::mobile::Module& m,
14     const std::string& fname,
15     const bool report_input_shapes,
16     const bool profile_memory,
17     const bool with_stack,
18     const bool with_flops,
19     const bool with_modules,
20     std::vector<std::string> events,
21     const bool adjust_vulkan_timestamps)
22     : m_(m), trace_file_name_(fname) {
23   torch::profiler::impl::ExperimentalConfig experimental_config;
24   // Enable hardware counters
25   if (!events.empty()) {
26     experimental_config.performance_events = std::move(events);
27   }
28 
29   // Adjust vulkan timestamps from query pool to align with cpu event times
30   experimental_config.adjust_timestamps = adjust_vulkan_timestamps;
31 
32   torch::profiler::impl::ProfilerConfig config(
33       torch::profiler::impl::ProfilerState::KINETO,
34       report_input_shapes,
35       profile_memory,
36       with_stack,
37       with_flops,
38       with_modules,
39       experimental_config);
40   torch::autograd::profiler::prepareProfiler(
41       config, {torch::autograd::profiler::ActivityType::CPU});
42   if (with_modules || with_stack) {
43     auto post_processing = [this, with_stack, with_modules](
44                                int64_t debug_handle,
45                                std::vector<std::string>& jit_stack,
46                                std::vector<std::string>& jit_modules) {
47       std::string no_debug_info("Model was not saved with debug information");
48       if (with_modules) {
49         // Since KinetoEvents's module hierarchy takes vector of strings
50         // we just construct a temporary vector using one string element
51         jit_modules = std::vector<std::string>(
52             {this->m_.hasDebugHandles()
53                  ? this->m_.getModuleHierarchy(debug_handle)
54                  : no_debug_info});
55       } else if (with_stack) {
56         // Since KinetoEvents's stack trace takes vector of strings we
57         // just construct a temporary vector using one string element
58         jit_stack = std::vector<std::string>(
59             {this->m_.hasDebugHandles() ? this->m_.getCallStack(debug_handle)
60                                         : no_debug_info});
61       }
62     };
63     torch::autograd::profiler::enableProfilerWithEventPostProcess(
64         config,
65         {torch::autograd::profiler::ActivityType::CPU},
66         post_processing,
67         {at::RecordScope::LITE_INTERPRETER});
68   } else {
69     torch::autograd::profiler::enableProfiler(
70         config,
71         {torch::autograd::profiler::ActivityType::CPU},
72         {at::RecordScope::LITE_INTERPRETER});
73   }
74   trace_file_name_ = fname;
75   TORCH_CHECK(
76       tls_edge_profiler == nullptr, "Edge profiler is already profiling.")
77   tls_edge_profiler = this;
78 }
79 
recordBackendMemoryEvent(void * ptr,int64_t alloc_size,size_t total_allocated,size_t total_reserved,c10::Device device)80 void KinetoEdgeCPUProfiler::recordBackendMemoryEvent(
81     void* ptr,
82     int64_t alloc_size,
83     size_t total_allocated,
84     size_t total_reserved,
85     c10::Device device) {
86   c10::reportMemoryUsageToProfiler(
87       ptr, alloc_size, total_allocated, total_reserved, device);
88 }
89 
recordBackendEvent(const int64_t start_time_us,const int64_t end_time_us,const int64_t debug_handle,const std::string & event_name,const std::string & backend_name)90 void KinetoEdgeCPUProfiler::recordBackendEvent(
91     const int64_t start_time_us,
92     const int64_t end_time_us,
93     const int64_t debug_handle,
94     const std::string& event_name,
95     const std::string& backend_name) {
96   torch::autograd::profiler::reportBackendEventToActiveKinetoProfiler(
97       start_time_us,
98       end_time_us,
99       debug_handle,
100       at::RecordScope::LITE_INTERPRETER,
101       event_name,
102       backend_name);
103 }
104 
105 const std::unique_ptr<torch::autograd::profiler::ProfilerResult>&
disableProfiler()106 KinetoEdgeCPUProfiler::disableProfiler() {
107   TORCH_CHECK(
108       !profiler_result_,
109       "KinetoEdgeCPUProfiler already disabled. "
110       "To get list of events use getProfilerResults()");
111   profiler_result_ = torch::autograd::profiler::disableProfiler();
112   return profiler_result_;
113 }
114 
115 const std::unique_ptr<torch::autograd::profiler::ProfilerResult>&
getProfilerResult()116 KinetoEdgeCPUProfiler::getProfilerResult() {
117   TORCH_CHECK(
118       profiler_result_,
119       "KinetoEdgeCPUProfiler has not been disabled. "
120       "use disableProfiler() API first, which returns the ProfilerResult.");
121   return profiler_result_;
122 }
123 
~KinetoEdgeCPUProfiler()124 KinetoEdgeCPUProfiler::~KinetoEdgeCPUProfiler() {
125   if (!trace_file_name_.empty()) {
126     if (profiler_result_) {
127       profiler_result_->save(trace_file_name_);
128     } else {
129       torch::autograd::profiler::disableProfiler()->save(trace_file_name_);
130     }
131   }
132   tls_edge_profiler = nullptr;
133 }
134 
getCurrentEdgeProfiler()135 KinetoEdgeCPUProfiler* getCurrentEdgeProfiler() {
136   return tls_edge_profiler;
137 }
138 
139 } // namespace torch::jit::mobile
140