1 /*
2 * Copyright (c) Qualcomm Innovation Center, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9 #include <executorch/backends/qualcomm/aot/wrappers/TensorWrapper.h>
10 #include <executorch/backends/qualcomm/qc_compiler_spec_generated.h>
11 #include <executorch/backends/qualcomm/runtime/QnnExecuTorchBackend.h>
12 #include <executorch/backends/qualcomm/runtime/QnnManager.h>
13
14 namespace executorch {
15 namespace backends {
16 namespace qnn {
17
18 using namespace qnn_delegate;
19 using executorch::runtime::ArrayRef;
20 using executorch::runtime::BackendExecutionContext;
21 using executorch::runtime::BackendInitContext;
22 using executorch::runtime::CompileSpec;
23 using executorch::runtime::DelegateHandle;
24 using executorch::runtime::EValue;
25 using executorch::runtime::FreeableBuffer;
26 using executorch::runtime::MemoryAllocator;
27 using executorch::runtime::Result;
28
29 // ========== Public method implementations =========================
30 constexpr const char* QNN_COMPILE_SPEC = "qnn_compile_spec";
init(BackendInitContext & context,FreeableBuffer * processed,ArrayRef<CompileSpec> compile_specs) const31 Result<DelegateHandle*> QnnExecuTorchBackend::init(
32 BackendInitContext& context,
33 FreeableBuffer* processed,
34 ArrayRef<CompileSpec> compile_specs) const {
35 // covert SizedBuffer to qnn ExecuTorch option
36 QnnExecuTorchContextBinary qnn_context_blob;
37 const qnn_delegate::QnnExecuTorchOptions* qnn_executorch_options = nullptr;
38
39 qnn_context_blob.buffer = const_cast<void*>(processed->data());
40 qnn_context_blob.nbytes = processed->size();
41
42 // convert CompileSpec to qnn ExecuTorch option
43 for (auto& compile_spec : compile_specs) {
44 if (std::strcmp(compile_spec.key, QNN_COMPILE_SPEC) == 0)
45 qnn_executorch_options =
46 GetQnnExecuTorchOptions(compile_spec.value.buffer);
47 else
48 QNN_EXECUTORCH_LOG_WARN("unknown argument: %s", compile_spec.key);
49 }
50
51 // Create QnnManager
52 MemoryAllocator* runtime_allocator = context.get_runtime_allocator();
53 QnnManager* qnn_manager =
54 ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(runtime_allocator, QnnManager);
55
56 // NOTE: Since we use placement new and since this type is not trivially
57 // destructible, we must call the destructor manually in destroy().
58 new (qnn_manager) QnnManager(qnn_executorch_options, qnn_context_blob);
59
60 // TODO: this is a temporal solution for multi-graph support, will be
61 // removed once framework starts to accept runtime configuration
62 // ---
63 // check if current context binary has already been initialized
64 // return cached one for reducing memory footprint
65 std::string signature = qnn_manager->GetBinarySignature();
66 auto iter = delegate_map_.find(signature);
67 if (iter != delegate_map_.end()) {
68 QNN_EXECUTORCH_LOG_INFO(
69 "Use cached delegate handle for current method: %s",
70 context.get_method_name());
71 return iter->second;
72 }
73
74 ET_CHECK_OR_RETURN_ERROR(
75 qnn_manager->Init() == Error::Ok,
76 Internal,
77 "Fail to initialize Qnn Manager");
78
79 if (qnn_manager->IsOnlinePrepare()) {
80 ET_CHECK_OR_RETURN_ERROR(
81 qnn_manager->CompileQcir() == Error::Ok,
82 Internal,
83 "Fail to compile binary in qcir format");
84 } else {
85 for (const std::string& graph_name : qnn_manager->GetGraphNames()) {
86 ET_CHECK_OR_RETURN_ERROR(
87 qnn_manager->AllocateTensor(graph_name) == Error::Ok,
88 Internal,
89 "Fail to allocate tensor");
90 }
91 }
92 add_cached_delegate(signature, qnn_manager);
93 // This backend does not need its processed data after Init.
94 processed->Free();
95 return qnn_manager;
96 }
97
execute(BackendExecutionContext & context,DelegateHandle * handle,EValue ** args) const98 Error QnnExecuTorchBackend::execute(
99 BackendExecutionContext& context,
100 DelegateHandle* handle,
101 EValue** args) const {
102 ET_CHECK_OR_RETURN_ERROR(
103 delegate_map_rev_.count(handle) != 0,
104 Internal,
105 "DelegateHandle has been deleted");
106 QnnManager* qnn_manager = static_cast<QnnManager*>(handle);
107
108 std::string method_name = context.get_method_name();
109 std::vector<std::shared_ptr<TensorWrapper>> input_tensors =
110 qnn_manager->GetGraphInputs(method_name);
111 std::vector<std::shared_ptr<TensorWrapper>> output_tensors =
112 qnn_manager->GetGraphOutputs(method_name);
113 std::vector<Qnn_Tensor_t> input_tensor_structs;
114 std::vector<Qnn_Tensor_t> output_tensor_structs;
115
116 input_tensor_structs.reserve(input_tensors.size());
117 for (int i = 0; i < input_tensors.size(); ++i) {
118 if (qnn_manager->RegisterMem(
119 args[i]->toTensor().mutable_data_ptr(), input_tensors[i]) !=
120 Error::Ok) {
121 // update data ptr only should be fine
122 input_tensors[i]->FillDataBuffer(
123 args[i]->toTensor().const_data_ptr(), false /* copy_data */);
124 }
125 input_tensor_structs.push_back(input_tensors[i]->CloneTensorStruct());
126 }
127
128 int output_index = input_tensors.size();
129 for (const auto& output_tensor : output_tensors) {
130 // pos=0 limits the search to the prefix
131 if (output_tensor->GetName().rfind("output_", 0) == 0) {
132 void* mutable_data_ptr =
133 args[output_index]->toTensor().mutable_data_ptr();
134 if (qnn_manager->RegisterMem(mutable_data_ptr, output_tensor) !=
135 Error::Ok) {
136 output_tensor->FillDataBuffer(mutable_data_ptr, false /* copy_data */);
137 }
138 output_index++;
139 }
140 output_tensor_structs.push_back(output_tensor->CloneTensorStruct());
141 }
142
143 ET_CHECK_OR_RETURN_ERROR(
144 qnn_manager->Execute(
145 method_name,
146 input_tensor_structs,
147 output_tensor_structs,
148 context.event_tracer()) == Error::Ok,
149 Internal,
150 "Fail to execute graph");
151 ET_CHECK_OR_RETURN_ERROR(
152 qnn_manager->ProfileExecuteData(method_name, context.event_tracer()) ==
153 Error::Ok,
154 Internal,
155 "Fail to profile graph");
156
157 return Error::Ok;
158 }
159
destroy(DelegateHandle * handle) const160 void QnnExecuTorchBackend::destroy(DelegateHandle* handle) const {
161 if (handle != nullptr && delegate_map_rev_.count(handle)) {
162 QnnManager* qnn_manager = static_cast<QnnManager*>(handle);
163 qnn_manager->Destroy();
164 erase_cached_delegate(handle);
165 }
166 }
167
is_available() const168 bool QnnExecuTorchBackend::is_available() const {
169 return true;
170 }
171
add_cached_delegate(const std::string & signature,executorch::runtime::DelegateHandle * handle) const172 void QnnExecuTorchBackend::add_cached_delegate(
173 const std::string& signature,
174 executorch::runtime::DelegateHandle* handle) const {
175 std::lock_guard<std::mutex> guard(mutex_);
176 delegate_map_[signature] = handle;
177 delegate_map_rev_[handle] = signature;
178 }
179
erase_cached_delegate(executorch::runtime::DelegateHandle * handle) const180 void QnnExecuTorchBackend::erase_cached_delegate(
181 executorch::runtime::DelegateHandle* handle) const {
182 std::lock_guard<std::mutex> guard(mutex_);
183 auto iter = delegate_map_rev_.find(handle);
184 if (iter == delegate_map_rev_.end()) {
185 return;
186 }
187 delegate_map_.erase(iter->second);
188 delegate_map_rev_.erase(handle);
189 }
190
191 namespace {
192 auto cls = QnnExecuTorchBackend();
193 executorch::runtime::Backend backend{"QnnBackend", &cls};
194 static auto success_with_compiler = register_backend(backend);
195 } // namespace
196 } // namespace qnn
197 } // namespace backends
198 } // namespace executorch
199