1 /*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9 #include <executorch/backends/xnnpack/runtime/profiling/XNNProfiler.h>
10 #include <executorch/runtime/core/error.h>
11 #include <executorch/runtime/core/event_tracer.h>
12
13 // Headers used only when event tracer is compiled in.
14 // NOLINTBEGIN
15 #include <executorch/runtime/platform/log.h>
16 #include <executorch/runtime/platform/platform.h>
17 #include <executorch/runtime/platform/types.h>
18
19 #include <cinttypes>
20 #include <cstring>
21 #include <string>
22 #include <unordered_map>
23 // NOLINTEND
24
25 namespace executorch::backends::xnnpack::delegate::profiling {
26
27 using executorch::runtime::Error;
28 using executorch::runtime::EventTracer;
29
30 #if defined(ET_EVENT_TRACER_ENABLED) || defined(ENABLE_XNNPACK_PROFILING)
31
XNNProfiler()32 XNNProfiler::XNNProfiler()
33 : state_(XNNProfilerState::Uninitialized), run_count_(0) {}
34
initialize(xnn_runtime_t runtime)35 Error XNNProfiler::initialize(xnn_runtime_t runtime) {
36 runtime_ = runtime;
37
38 // Fetch the runtime operator information from XNNPACK.
39 ET_CHECK_OK_OR_RETURN_ERROR(get_runtime_num_operators());
40 ET_CHECK_OK_OR_RETURN_ERROR(get_runtime_operator_names());
41
42 state_ = XNNProfilerState::Ready;
43
44 return Error::Ok;
45 }
46
start(EventTracer * event_tracer)47 Error XNNProfiler::start(EventTracer* event_tracer) {
48 // Validate profiler state.
49 if (state_ == XNNProfilerState::Uninitialized) {
50 ET_LOG(
51 Error,
52 "XNNProfiler must be initialized prior to calling begin_execution.");
53 return Error::InvalidState;
54 } else if (state_ == XNNProfilerState::Running) {
55 ET_LOG(
56 Error,
57 "XNNProfiler is already running. Call end_execution() before calling begin_execution().");
58 return Error::InvalidState;
59 }
60
61 event_tracer_ = event_tracer;
62 state_ = XNNProfilerState::Running;
63
64 // Log the start of execution timestamp.
65 start_time_ = et_pal_current_ticks();
66
67 return Error::Ok;
68 }
69
end()70 Error XNNProfiler::end() {
71 // Validate profiler state.
72 ET_CHECK_OR_RETURN_ERROR(
73 state_ == XNNProfilerState::Running,
74 InvalidState,
75 "XNNProfiler is not running. Ensure begin_execution() is called before end_execution().");
76
77 // Retrieve operator timing from XNNPACK.
78 ET_CHECK_OK_OR_RETURN_ERROR(get_runtime_operator_timings());
79
80 if (event_tracer_ != nullptr) {
81 submit_trace();
82 }
83
84 log_operator_timings();
85
86 state_ = XNNProfilerState::Ready;
87 return Error::Ok;
88 }
89
get_runtime_operator_names()90 Error XNNProfiler::get_runtime_operator_names() {
91 size_t required_size = 0;
92
93 // First call returns xnn_status_out_of_memory, but sets required_size to
94 // the correct size of the buffer to store the result.
95 xnn_status status = xnn_get_runtime_profiling_info(
96 runtime_, // runtime
97 xnn_profile_info_operator_name, // param_name
98 0, // param_value_size
99 nullptr, // param_value
100 &required_size // param_value_size_ret
101 );
102
103 if (status == xnn_status_out_of_memory) {
104 op_names_.resize(required_size);
105 status = xnn_get_runtime_profiling_info(
106 runtime_,
107 xnn_profile_info_operator_name,
108 op_names_.size(),
109 op_names_.data(),
110 &required_size);
111 }
112
113 if (status != xnn_status_success) {
114 ET_LOG(Error, "Failed to get XNNPACK operator names: %d", status);
115 return Error::Internal;
116 }
117
118 return Error::Ok;
119 }
120
get_runtime_num_operators()121 Error XNNProfiler::get_runtime_num_operators() {
122 size_t required_size = 0;
123
124 xnn_status status = xnn_get_runtime_profiling_info(
125 runtime_,
126 xnn_profile_info_num_operators,
127 sizeof(op_count_),
128 &op_count_,
129 &required_size);
130
131 if (status != xnn_status_success) {
132 ET_LOG(Error, "Failed to get XNNPACK operator count: %d", status);
133 return Error::Internal;
134 }
135
136 return Error::Ok;
137 }
138
get_runtime_operator_timings()139 Error XNNProfiler::get_runtime_operator_timings() {
140 size_t required_size;
141
142 // Get number of runtime operators for timing_stats.size
143 op_timings_.resize(op_count_);
144 xnn_status status = xnn_get_runtime_profiling_info(
145 runtime_,
146 xnn_profile_info_operator_timing,
147 op_timings_.size() * sizeof(uint64_t),
148 op_timings_.data(),
149 &required_size);
150
151 if (status != xnn_status_success) {
152 ET_LOG(Error, "Failed to get XNNPACK operator timing: %d", status);
153 return Error::Internal;
154 }
155
156 return Error::Ok;
157 }
158
log_operator_timings()159 void XNNProfiler::log_operator_timings() {
160 #ifdef ENABLE_XNNPACK_PROFILING
161 // Update running average state and log average timing for each op.
162 run_count_++;
163 size_t name_len = 0;
164 const char* op_name = nullptr;
165 auto total_time = 0.0f;
166
167 if (op_timings_sum_.size() != op_count_) {
168 op_timings_sum_ = std::vector<uint64_t>(op_count_, 0);
169 }
170
171 for (size_t i = 0; i < op_count_; i++) {
172 op_name = &op_names_[name_len];
173 name_len += strlen(op_name) + 1;
174
175 op_timings_sum_[i] += op_timings_[i];
176 auto avg_op_time = op_timings_sum_[i] / static_cast<float>(run_count_);
177 total_time += avg_op_time;
178
179 ET_LOG(
180 Info, ">>, %s, %" PRId64 " (%f)", op_name, op_timings_[i], avg_op_time);
181 }
182 ET_LOG(Info, ">>, Total Time, %f", total_time);
183 #else
184 run_count_++;
185 #endif
186 }
187
submit_trace()188 void XNNProfiler::submit_trace() {
189 // Retrieve the system tick rate (ratio between ticks and nanoseconds).
190 auto tick_ns_conv_multiplier = et_pal_ticks_to_ns_multiplier();
191
192 ET_CHECK(op_timings_.size() == op_count_);
193 size_t name_len = 0;
194 et_timestamp_t time = start_time_;
195 std::unordered_map<std::string, uint32_t> op_counts;
196
197 for (auto i = 0u; i < op_count_; i++) {
198 auto op_name = &op_names_[name_len];
199 name_len += strlen(op_name) + 1;
200
201 // Format the op name as {name} #{count}.
202 auto op_name_str = std::string(op_name);
203 op_counts[op_name_str]++;
204 auto name_formatted =
205 op_name_str + " #" + std::to_string(op_counts[op_name_str]);
206
207 // Convert from microseconds (XNNPACK) to PAL ticks (ET).
208 // The tick_ns_conv_ratio is ns / tick. We want ticks:
209 // ticks = us * (ns / us) / conv_ratio
210 // = us * 1000 * conv_ratio.denom / conv_ratio.num
211 auto interval_ticks = static_cast<et_timestamp_t>(
212 op_timings_[i] * 1000 * tick_ns_conv_multiplier.denominator /
213 tick_ns_conv_multiplier.numerator);
214
215 auto end_time = time + interval_ticks;
216
217 executorch::runtime::event_tracer_log_profiling_delegate(
218 event_tracer_,
219 name_formatted.c_str(),
220 /*delegate_debug_id=*/static_cast<executorch::runtime::DebugHandle>(-1),
221 time,
222 end_time);
223
224 // Assume that the next op starts immediately after the previous op.
225 // This may not be strictly true, but it should be close enough.
226 // Ideally, we'll get the start and end times from XNNPACK in the
227 // future.
228 time = end_time;
229 }
230 }
231
232 #else // defined(ET_EVENT_TRACER_ENABLED) || defined(ENABLE_XNNPACK_PROFILING)
233
234 // Stub implementation for when profiling is disabled.
XNNProfiler()235 XNNProfiler::XNNProfiler() {}
236
initialize(xnn_runtime_t runtime)237 Error XNNProfiler::initialize(xnn_runtime_t runtime) {
238 (void)runtime;
239 return Error::Ok;
240 }
241
start(EventTracer * event_tracer)242 Error XNNProfiler::start(EventTracer* event_tracer) {
243 (void)event_tracer;
244 return Error::Ok;
245 }
246
end()247 Error XNNProfiler::end() {
248 return Error::Ok;
249 }
250
251 #endif
252
253 } // namespace executorch::backends::xnnpack::delegate::profiling
254