1 /*
2 * Copyright (c) Qualcomm Innovation Center, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9 #include <executorch/backends/qualcomm/aot/ir/qcir_utils.h>
10 #include <executorch/backends/qualcomm/qc_binary_info_generated.h>
11 #include <executorch/backends/qualcomm/runtime/QnnManager.h>
12 #include <executorch/backends/qualcomm/runtime/SharedBuffer.h>
13 #include <executorch/backends/qualcomm/runtime/Utils.h>
14 #include <executorch/backends/qualcomm/runtime/backends/QnnBackendCommon.h>
15 #include <executorch/backends/qualcomm/runtime/backends/QnnImplementation.h>
16 #include <executorch/extension/tensor/tensor.h>
17 #include <algorithm>
18 #include <cstdlib>
19 #include <cstring>
20 #include <fstream>
21 #include <string>
22
23 namespace executorch {
24 namespace backends {
25 namespace qnn {
26
27 using executorch::runtime::Error;
28
CompareExportedInput(const std::shared_ptr<TensorWrapper> & a,const std::shared_ptr<TensorWrapper> & b)29 bool CompareExportedInput(
30 const std::shared_ptr<TensorWrapper>& a,
31 const std::shared_ptr<TensorWrapper>& b) {
32 // Using the order of the nodes as external_id in AOT
33 // to extract the right arg from *args at runtime
34 int numA = std::stoi(a->GetName().substr(a->GetName().find('_') + 1));
35 int numB = std::stoi(b->GetName().substr(b->GetName().find('_') + 1));
36 return numA < numB;
37 }
38
~QnnManager()39 QnnManager::~QnnManager() {
40 backend_params_ptr_.reset(new BackendConfigParameters());
41 logger_.reset();
42 qnn_loaded_backend_.TerminateAllBackends();
43 }
44
QnnManager(const QnnExecuTorchOptions * options,const QnnExecuTorchContextBinary & qnn_executorch_context_binary)45 QnnManager::QnnManager(
46 const QnnExecuTorchOptions* options,
47 const QnnExecuTorchContextBinary& qnn_executorch_context_binary)
48 : qnn_context_blob_(qnn_executorch_context_binary),
49 qnn_loaded_backend_(""),
50 // options' life cycle is decided by compiler specs which is
51 // kept by executorch runtime framework
52 // please pay attention to any potential seg fault
53 options_(options) {
54 QnnExecuTorchBackendType backend_type =
55 options->backend_options()->backend_type();
56 std::string library_path = options->library_path()->str();
57
58 if (options->log_level() >= QnnExecuTorchLogLevel::kLogLevelInfo) {
59 QNN_EXECUTORCH_LOG_INFO(
60 "soc_model in soc_info: %s",
61 EnumNameQcomChipset(options_->soc_info()->soc_model()));
62 QNN_EXECUTORCH_LOG_INFO(
63 "backend_type: %s", EnumNameQnnExecuTorchBackendType(backend_type));
64 QNN_EXECUTORCH_LOG_INFO("graph_name: %s", options_->graph_name()->c_str());
65 QNN_EXECUTORCH_LOG_INFO("library_path: %s", library_path.c_str());
66 QNN_EXECUTORCH_LOG_INFO("dump intermediate outputs: %s", IsTensorDump());
67 QNN_EXECUTORCH_LOG_INFO(
68 "log_level: %s", EnumNameQnnExecuTorchLogLevel(options_->log_level()));
69 QNN_EXECUTORCH_LOG_INFO(
70 "profile_level: %s",
71 EnumNameQnnExecuTorchProfileLevel(options_->profile_level()));
72 QNN_EXECUTORCH_LOG_INFO(
73 "the size of qnn context binary: %d",
74 qnn_executorch_context_binary.nbytes);
75 QNN_EXECUTORCH_LOG_INFO(
76 "Is on-device graph construction: %d", options->online_prepare());
77 QNN_EXECUTORCH_LOG_INFO(
78 "Enable shared buffer: %d", options->shared_buffer());
79 }
80
81 if (library_path.empty()) {
82 switch (backend_type) {
83 case QnnExecuTorchBackendType::kHtpBackend:
84 library_path = htp_library_name_;
85 break;
86 case QnnExecuTorchBackendType::kDspBackend:
87 library_path = dsp_library_name_;
88 break;
89 case QnnExecuTorchBackendType::kGpuBackend:
90 library_path = gpu_library_name_;
91 break;
92 default:
93 QNN_EXECUTORCH_LOG_ERROR("Unknown backend type: %d", backend_type);
94 break;
95 }
96 }
97 qnn_loaded_backend_ = QnnImplementation(library_path);
98 backend_params_ptr_ = std::make_unique<BackendConfigParameters>();
99 }
100
LoadQnnLibrary()101 Error QnnManager::LoadQnnLibrary() {
102 Error ret = qnn_loaded_backend_.Load(nullptr);
103 return ret;
104 }
105
PreRegisterMem()106 Error QnnManager::PreRegisterMem() {
107 SharedBuffer& shared_buffer_manager = SharedBuffer::GetSharedBufferManager();
108 for (const auto info : shared_buffer_manager.GetCustomMemTensorInfoSet()) {
109 void* unaligned_custom_mem_base =
110 shared_buffer_manager.GetUnAlignedAddr(info.custom_mem);
111
112 size_t tensor_offset = (static_cast<char*>(info.custom_mem) -
113 static_cast<char*>(unaligned_custom_mem_base)) +
114 info.pos;
115 size_t total_custom_mem_size =
116 shared_buffer_manager.GetAllocatedSize(info.custom_mem);
117
118 int32_t mem_fd = shared_buffer_manager.MemToFd(unaligned_custom_mem_base);
119 if (mem_fd == -1) {
120 QNN_EXECUTORCH_LOG_WARN(
121 "PreRegisterMem failed to get file descriptor.",
122 "custom_mem: %p",
123 "tensor_addr: %p",
124 "pos: %uz",
125 "tensor_bytes: %uz",
126 "shape: %p",
127 "rank: %zu",
128 "qnn_dtype: %X",
129 info.custom_mem,
130 info.tensor_addr,
131 info.pos,
132 info.tensor_bytes,
133 info.shape,
134 info.rank,
135 info.dtype);
136 return Error::Internal;
137 }
138
139 ET_CHECK_OR_RETURN_ERROR(
140 backend_params_ptr_->qnn_mem_manager_ptr_->PreRegisterCustomMemHandle(
141 mem_fd,
142 unaligned_custom_mem_base,
143 total_custom_mem_size,
144 tensor_offset,
145 info) == Error::Ok,
146 Internal,
147 "Fail to register to shared memory.");
148 }
149 return Error::Ok;
150 }
151
RegisterMem(void * data_ptr,const std::shared_ptr<TensorWrapper> & tensor_wrapper)152 Error QnnManager::RegisterMem(
153 void* data_ptr,
154 const std::shared_ptr<TensorWrapper>& tensor_wrapper) {
155 SharedBuffer& shared_buffer_manager = SharedBuffer::GetSharedBufferManager();
156 // Not enable shared buffer
157 if (!options_->shared_buffer())
158 return Error::Internal;
159
160 if (backend_params_ptr_->qnn_mem_manager_ptr_ == nullptr) {
161 QNN_EXECUTORCH_LOG_WARN(
162 "Backend %s doesn't supported shared buffer.",
163 EnumNameQnnExecuTorchBackendType(
164 options_->backend_options()->backend_type()));
165 return Error::Internal;
166 }
167
168 void* custom_mem_base = shared_buffer_manager.GetCustomMemBase(data_ptr);
169 if (custom_mem_base != nullptr) {
170 return RegisterCustomMem(data_ptr, custom_mem_base, tensor_wrapper);
171 }
172 return RegisterIonMem(data_ptr, tensor_wrapper);
173 }
174
RegisterIonMem(void * data_ptr,const std::shared_ptr<TensorWrapper> & tensor_wrapper)175 Error QnnManager::RegisterIonMem(
176 void* data_ptr,
177 const std::shared_ptr<TensorWrapper>& tensor_wrapper) {
178 SharedBuffer& shared_buffer_manager = SharedBuffer::GetSharedBufferManager();
179 if (!shared_buffer_manager.IsAllocated(data_ptr)) {
180 // It means two scenarios here:
181 // 1. the input and output partitioned graph
182 // 2. Actually, user doesn't allocate shared buffer with
183 // QnnExecuTorchAllocCustomMem API
184 return Error::Internal;
185 } else if (backend_params_ptr_->qnn_mem_manager_ptr_->IsRegistered(
186 tensor_wrapper->GetMemHandle(), data_ptr)) {
187 if (options_->log_level() >= QnnExecuTorchLogLevel::kLogLevelInfo)
188 QNN_EXECUTORCH_LOG_INFO(
189 "Tensor name %s has been registered shared memory.",
190 tensor_wrapper->GetName().c_str());
191 return Error::Ok;
192 }
193
194 int32_t mem_fd = shared_buffer_manager.MemToFd(data_ptr);
195 if (mem_fd == -1) {
196 QNN_EXECUTORCH_LOG_WARN(
197 "Tensor name %s is failed to get file descriptor.",
198 tensor_wrapper->GetName().c_str());
199 return Error::Internal;
200 }
201 ET_CHECK_OR_RETURN_ERROR(
202 backend_params_ptr_->qnn_mem_manager_ptr_->RegisterIonMem(
203 tensor_wrapper, mem_fd, data_ptr) == Error::Ok,
204 Internal,
205 "Fail to register to shared memory.");
206
207 return Error::Ok;
208 }
209
RegisterCustomMem(void * data_ptr,void * custom_mem_base,const std::shared_ptr<TensorWrapper> & tensor_wrapper)210 Error QnnManager::RegisterCustomMem(
211 void* data_ptr,
212 void* custom_mem_base,
213 const std::shared_ptr<TensorWrapper>& tensor_wrapper) {
214 if (backend_params_ptr_->qnn_mem_manager_ptr_->IsRegistered(
215 tensor_wrapper->GetMemHandle(), data_ptr)) {
216 if (options_->log_level() >= QnnExecuTorchLogLevel::kLogLevelInfo)
217 QNN_EXECUTORCH_LOG_INFO(
218 "Tensor name %s has been registered shared memory.",
219 tensor_wrapper->GetName().c_str());
220 return Error::Ok;
221 }
222
223 CustomMemTensorInfo info{
224 custom_mem_base,
225 data_ptr,
226 static_cast<size_t>(
227 static_cast<char*>(data_ptr) - static_cast<char*>(custom_mem_base)),
228 tensor_wrapper->GetBytes(),
229 tensor_wrapper->GetDims(),
230 tensor_wrapper->GetRank(),
231 qnn_dtype_to_scalar_type_[tensor_wrapper->GetDataType()]};
232
233 Qnn_MemHandle_t pre_registered_handle =
234 backend_params_ptr_->qnn_mem_manager_ptr_->GetPreRegisteredHandle(info);
235 if (pre_registered_handle != nullptr) {
236 if (options_->log_level() >= QnnExecuTorchLogLevel::kLogLevelInfo) {
237 QNN_EXECUTORCH_LOG_INFO(
238 "Tensor name %s found a pre-registered memHandle.",
239 tensor_wrapper->GetName().c_str());
240 }
241 return backend_params_ptr_->qnn_mem_manager_ptr_->SetMemHandle(
242 tensor_wrapper, data_ptr, pre_registered_handle);
243 }
244
245 SharedBuffer& shared_buffer_manager = SharedBuffer::GetSharedBufferManager();
246 void* unaligned_custom_mem_base =
247 shared_buffer_manager.GetUnAlignedAddr(custom_mem_base);
248
249 size_t tensor_offset = static_cast<char*>(custom_mem_base) -
250 static_cast<char*>(unaligned_custom_mem_base) + info.pos;
251 size_t total_custom_mem_size =
252 shared_buffer_manager.GetAllocatedSize(custom_mem_base);
253
254 int32_t mem_fd = shared_buffer_manager.MemToFd(unaligned_custom_mem_base);
255 if (mem_fd == -1) {
256 QNN_EXECUTORCH_LOG_WARN(
257 "Tensor name %s failed to get file descriptor.",
258 tensor_wrapper->GetName().c_str());
259 return Error::Internal;
260 }
261
262 ET_CHECK_OR_RETURN_ERROR(
263 backend_params_ptr_->qnn_mem_manager_ptr_->RegisterCustomMem(
264 tensor_wrapper,
265 mem_fd,
266 data_ptr,
267 unaligned_custom_mem_base,
268 total_custom_mem_size,
269 tensor_offset) == Error::Ok,
270 Internal,
271 "Fail to register to shared memory.");
272
273 return Error::Ok;
274 }
275
Init()276 Error QnnManager::Init() {
277 ET_CHECK_OR_RETURN_ERROR(
278 LoadQnnLibrary() == Error::Ok, Internal, "Fail to load Qnn library");
279 logger_ = std::make_unique<QnnLogger>(
280 qnn_loaded_backend_, LoggingCallback, options_->log_level());
281 if (backend_params_ptr_->backend_init_state_ ==
282 BackendInitializeState::UNINITIALIZED) {
283 QNN_EXECUTORCH_LOG_INFO(
284 "Initialize Qnn backend "
285 "parameters for Qnn executorch backend type %d",
286 options_->backend_options()->backend_type());
287 backend_params_ptr_ = QnnBackendFactory().Create(
288 qnn_loaded_backend_, logger_.get(), qnn_context_blob_, options_);
289 ET_CHECK_OR_RETURN_ERROR(
290 backend_params_ptr_ != nullptr, Internal, "Failed to load Qnn backend.")
291 ET_CHECK_OR_RETURN_ERROR(
292 backend_params_ptr_->qnn_backend_cache_ptr_->Configure() == Error::Ok,
293 Internal,
294 "Fail to configure Qnn backend cache");
295 ET_CHECK_OR_RETURN_ERROR(
296 backend_params_ptr_->qnn_backend_ptr_->Configure() == Error::Ok,
297 Internal,
298 "Fail to configure Qnn backend");
299 ET_CHECK_OR_RETURN_ERROR(
300 backend_params_ptr_->qnn_device_ptr_->Configure() == Error::Ok,
301 Internal,
302 "Fail to configure Qnn device");
303 ET_CHECK_OR_RETURN_ERROR(
304 backend_params_ptr_->qnn_context_ptr_->Configure() == Error::Ok,
305 Internal,
306 "Fail to configure Qnn context");
307 for (const std::string& graph_name :
308 backend_params_ptr_->qnn_context_ptr_->GetGraphNames()) {
309 ET_CHECK_OR_RETURN_ERROR(
310 backend_params_ptr_->qnn_graph_ptr_->Configure(graph_name) ==
311 Error::Ok,
312 Internal,
313 "Fail to configure Qnn graph");
314 }
315 backend_params_ptr_->backend_init_state_ =
316 BackendInitializeState::INITIALIZED;
317 }
318
319 #if defined(__aarch64__)
320 ET_CHECK_OR_RETURN_ERROR(
321 PreRegisterMem() == Error::Ok,
322 Internal,
323 "Fail to pre register custom memory handle");
324 #endif
325 return Error::Ok;
326 }
327
AllocateTensor(const std::string & graph_name)328 Error QnnManager::AllocateTensor(const std::string& graph_name) {
329 std::vector<Qnn_Tensor_t> input_tensors =
330 backend_params_ptr_->qnn_context_ptr_->GetGraphInputs(graph_name);
331 std::vector<Qnn_Tensor_t> output_tensors =
332 backend_params_ptr_->qnn_context_ptr_->GetGraphOutputs(graph_name);
333
334 for (auto& tensor : input_tensors) {
335 std::shared_ptr<TensorWrapper> tensor_wrapper = CreateTensorWrapper(tensor);
336 tensor_wrapper->UpdateQnnTensorMeta(tensor);
337 input_tensors_[graph_name].emplace_back(std::move(tensor_wrapper));
338 }
339 if (!options_->is_from_context_binary()) {
340 std::sort(
341 input_tensors_[graph_name].begin(),
342 input_tensors_[graph_name].end(),
343 CompareExportedInput);
344 }
345 for (size_t i = 0; i < output_tensors.size(); ++i) {
346 std::shared_ptr<TensorWrapper> tensor_wrapper =
347 CreateTensorWrapper(output_tensors[i]);
348 tensor_wrapper->UpdateQnnTensorMeta(output_tensors[i]);
349 const std::string& tensor_name = tensor_wrapper->GetName();
350 // this is required by identifying shared buffer mechanism
351 // info might be missed if context binary came from qnn_converter
352 if (options_->is_from_context_binary() &&
353 tensor_name.find("output_") == std::string::npos) {
354 tensor_wrapper->SetName("output_" + tensor_name);
355 }
356 if (IsTensorDump()) {
357 tensor_wrapper->AllocateDataBuffer();
358 }
359 output_tensors_[graph_name].emplace_back(std::move(tensor_wrapper));
360 }
361 return Error::Ok;
362 }
363
AllocateTensor(const std::string & graph_name,std::vector<std::shared_ptr<TensorWrapper>> & inputs,std::vector<std::shared_ptr<TensorWrapper>> & outputs)364 Error QnnManager::AllocateTensor(
365 const std::string& graph_name,
366 std::vector<std::shared_ptr<TensorWrapper>>& inputs,
367 std::vector<std::shared_ptr<TensorWrapper>>& outputs) {
368 input_tensors_[graph_name] = std::move(inputs);
369 // TODO: suuport per-tensor dump in online prepare mode
370 // should be achievable with some pre-process
371 if (!options_->is_from_context_binary()) {
372 std::sort(
373 input_tensors_[graph_name].begin(),
374 input_tensors_[graph_name].end(),
375 CompareExportedInput);
376 }
377 output_tensors_[graph_name] = std::move(outputs);
378 return Error::Ok;
379 }
380
Execute(const std::string & graph_name,const std::vector<Qnn_Tensor_t> & input_tensor_structs,std::vector<Qnn_Tensor_t> & output_tensor_structs,executorch::runtime::EventTracer * event_tracer)381 Error QnnManager::Execute(
382 const std::string& graph_name,
383 const std::vector<Qnn_Tensor_t>& input_tensor_structs,
384 std::vector<Qnn_Tensor_t>& output_tensor_structs,
385 executorch::runtime::EventTracer* event_tracer) {
386 Qnn_ErrorHandle_t error = QNN_SUCCESS;
387
388 error = backend_params_ptr_->qnn_graph_ptr_->GraphExecute(
389 graph_name, input_tensor_structs, output_tensor_structs);
390
391 if (error != QNN_SUCCESS) {
392 QNN_EXECUTORCH_LOG_ERROR(
393 "qnn_graph_execute failed. Error %d", QNN_GET_ERROR_CODE(error));
394 return Error::Internal;
395 }
396 if (IsTensorDump()) {
397 // TODO: Need to handle the graph which is partitioned.
398 // Maybe we could use graph name.
399 for (std::size_t out_idx = 0; out_idx < output_tensor_structs.size();
400 ++out_idx) {
401 const Qnn_Tensor_t& output_tensor = output_tensor_structs[out_idx];
402 std::vector<executorch::aten::SizesType> sizes(
403 QNN_VER_PTR(output_tensor)->dimensions,
404 QNN_VER_PTR(output_tensor)->dimensions +
405 QNN_VER_PTR(output_tensor)->rank);
406
407 auto dump_tensor = executorch::extension::from_blob(
408 QNN_VER_PTR(output_tensor)->clientBuf.data,
409 sizes,
410 qnn_dtype_to_scalar_type_[QNN_VER_PTR(output_tensor)->dataType]);
411
412 executorch::runtime::event_tracer_log_output_delegate<
413 executorch::aten::Tensor>(
414 event_tracer,
415 QNN_VER_PTR(output_tensor)->name,
416 /*delegate_debug_id=*/
417 static_cast<executorch::runtime::DebugHandle>(-1),
418 *dump_tensor);
419 }
420 }
421
422 return Error::Ok;
423 }
424
ProfileExecuteData(const std::string & graph_name,executorch::runtime::EventTracer * event_tracer)425 Error QnnManager::ProfileExecuteData(
426 const std::string& graph_name,
427 executorch::runtime::EventTracer* event_tracer) {
428 Qnn_ErrorHandle_t error = QNN_SUCCESS;
429 if (options_->profile_level() != QnnExecuTorchProfileLevel::kProfileOff) {
430 error = backend_params_ptr_->qnn_graph_ptr_->ProfileExecuteData(
431 graph_name, event_tracer);
432 if (error != QNN_SUCCESS) {
433 QNN_EXECUTORCH_LOG_ERROR(
434 " Failed to profile. Error %d", QNN_GET_ERROR_CODE(error));
435 return Error::Internal;
436 }
437 }
438 return Error::Ok;
439 }
440
Destroy()441 void QnnManager::Destroy() {
442 QNN_EXECUTORCH_LOG_INFO("Destroy Qnn backend parameters");
443 backend_params_ptr_.reset(new BackendConfigParameters());
444 logger_.reset();
445
446 qnn_loaded_backend_.TerminateAllBackends();
447 }
448
IsNodeSupportedByBackend(std::vector<std::shared_ptr<OpWrapper>> & op_wrappers)449 bool QnnManager::IsNodeSupportedByBackend(
450 std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
451 Qnn_ErrorHandle_t error = QNN_SUCCESS;
452
453 for (std::shared_ptr<OpWrapper>& op_wrapper : op_wrappers) {
454 for (const auto& param : op_wrapper->GetParams()) {
455 // unused?
456 // auto* p_tensor_param = dynamic_cast<TensorParamWrapper*>(param.get());
457 if (param->PopulateQnnParam() != Error::Ok) {
458 QNN_EXECUTORCH_LOG_WARN(
459 "Qnn Backend op validation failed "
460 "with PopulateQnnParam: %d",
461 QNN_GET_ERROR_CODE(error));
462 return false;
463 }
464 }
465
466 error = backend_params_ptr_->qnn_backend_ptr_->BackendValidateOpConfig(
467 op_wrapper->GetOpConfig());
468 if (error != QNN_SUCCESS) {
469 QNN_EXECUTORCH_LOG_WARN(
470 "Qnn Backend op validation failed with error: %d",
471 QNN_GET_ERROR_CODE(error));
472
473 return false;
474 }
475 }
476 return true;
477 }
478
GetContextBinary(QnnExecuTorchContextBinary & qnn_executorch_context_binary)479 Error QnnManager::GetContextBinary(
480 QnnExecuTorchContextBinary& qnn_executorch_context_binary) {
481 ET_CHECK_OR_RETURN_ERROR(
482 backend_params_ptr_->qnn_context_ptr_->GetContextBinary(
483 qnn_executorch_context_binary) == Error::Ok,
484 Internal,
485 "Fail to get context binary.");
486
487 return Error::Ok;
488 }
489
CompileQcir()490 Error QnnManager::CompileQcir() {
491 flatbuffers::Verifier verifier_binary_info(
492 static_cast<const uint8_t* const>(qnn_context_blob_.buffer),
493 qnn_context_blob_.nbytes);
494 if (!qnn_delegate::VerifyBinaryInfoBuffer(verifier_binary_info)) {
495 QNN_EXECUTORCH_LOG_ERROR("Fail to verify binary info");
496 return Error::Internal;
497 }
498
499 auto binary_info = qnn_delegate::GetBinaryInfo(qnn_context_blob_.buffer);
500 flatbuffers::Verifier verifier_qcir(
501 binary_info->data()->data(), binary_info->data()->size());
502 if (!qcir::VerifyContextBuffer(verifier_qcir)) {
503 QNN_EXECUTORCH_LOG_ERROR("Fail to verify qcir format");
504 return Error::Internal;
505 }
506
507 auto context = qcir::GetContext(binary_info->data()->data());
508 for (const auto& graph : *context->graphs()) {
509 // qcir tensors to TensorWrapper
510 std::vector<std::shared_ptr<TensorWrapper>> graph_inputs, graph_outputs,
511 tensors;
512 for (const auto& tensor : *graph->tensors()) {
513 tensors.emplace_back(CreateTensorWrapper(ToTensor(tensor)));
514 if (tensor->type() == qcir::TensorType::WRITE) {
515 graph_inputs.push_back(tensors.back());
516 } else if (tensor->type() == qcir::TensorType::READ) {
517 graph_outputs.push_back(tensors.back());
518 }
519 }
520 std::vector<std::shared_ptr<OpWrapper>> op_wrappers;
521 // qcir graph node to OpWrapper
522 for (const auto& node : *graph->nodes()) {
523 std::shared_ptr<OpWrapper> op = std::make_shared<OpWrapper>(
524 node->name()->str(),
525 node->package_name()->str(),
526 node->type_name()->str());
527
528 // qcir input tensors to OpWrapper input tensors
529 std::vector<std::shared_ptr<TensorWrapper>> inputs;
530 for (uint32_t index : *node->inputs()) {
531 inputs.push_back(tensors[index]);
532 }
533 op->AddInputTensors(inputs);
534
535 // qcir output tensors to OpWrapper output tensors
536 std::vector<std::shared_ptr<TensorWrapper>> outputs;
537 for (uint32_t index : *node->outputs()) {
538 outputs.push_back(tensors[index]);
539 }
540 op->AddOutputTensors(outputs);
541
542 // qcir operator param to OpWrapper param
543 for (uint32_t index : *node->params()) {
544 const auto& tensor = graph->tensors()->Get(index);
545 std::string name = tensor->name()->str();
546 Qnn_DataType_t dtype = ToDataType(tensor->dtype());
547 if (tensor->shape()->size() != 0) {
548 // add tensor param
549 op->AddTensorParam(
550 name,
551 dtype,
552 tensor->shape()->size(),
553 tensor->shape()->data(),
554 tensor->data()->data());
555 } else {
556 // add scalar param
557 switch (dtype) {
558 case Qnn_DataType_t::QNN_DATATYPE_INT_32:
559 op->AddScalarParam(
560 name,
561 dtype,
562 *reinterpret_cast<const int32_t*>(tensor->data()->Data()));
563 break;
564 case Qnn_DataType_t::QNN_DATATYPE_INT_16:
565 op->AddScalarParam(
566 name,
567 dtype,
568 *reinterpret_cast<const int16_t*>(tensor->data()->Data()));
569 break;
570 case Qnn_DataType_t::QNN_DATATYPE_INT_8:
571 op->AddScalarParam(
572 name, dtype, static_cast<int8_t>(*tensor->data()->Data()));
573 break;
574 case Qnn_DataType_t::QNN_DATATYPE_UINT_32:
575 op->AddScalarParam(
576 name,
577 dtype,
578 *reinterpret_cast<const uint32_t*>(tensor->data()->Data()));
579 break;
580 case Qnn_DataType_t::QNN_DATATYPE_UINT_16:
581 op->AddScalarParam(
582 name,
583 dtype,
584 *reinterpret_cast<const uint16_t*>(tensor->data()->Data()));
585 break;
586 case Qnn_DataType_t::QNN_DATATYPE_UINT_8:
587 op->AddScalarParam(name, dtype, *tensor->data()->Data());
588 break;
589 case Qnn_DataType_t::QNN_DATATYPE_FLOAT_32:
590 case Qnn_DataType_t::QNN_DATATYPE_FLOAT_16:
591 op->AddScalarParam(
592 name,
593 dtype,
594 *reinterpret_cast<const float*>(tensor->data()->Data()));
595 break;
596 case Qnn_DataType_t::QNN_DATATYPE_BOOL_8:
597 op->AddScalarParam(name, dtype, *tensor->data()->Data());
598 break;
599 default:
600 QNN_EXECUTORCH_LOG_ERROR(
601 "Invalid scalar type: %s", tensor->name()->c_str());
602 break;
603 }
604 }
605 }
606 op_wrappers.push_back(std::move(op));
607 }
608
609 ET_CHECK_OR_RETURN_ERROR(
610 Compile(graph->name()->str(), op_wrappers) == Error::Ok,
611 Internal,
612 "Fail to compile graph from qcir with graph_name: %s",
613 graph->name()->str().c_str());
614
615 ET_CHECK_OR_RETURN_ERROR(
616 AllocateTensor(graph->name()->str(), graph_inputs, graph_outputs) ==
617 Error::Ok,
618 Internal,
619 "Fail to allocate tensor for qcir with graph_name: %s",
620 graph->name()->str().c_str());
621 }
622
623 return Error::Ok;
624 }
625
Compile(const std::string & graph_name,std::vector<std::shared_ptr<OpWrapper>> & op_wrappers)626 Error QnnManager::Compile(
627 const std::string& graph_name,
628 std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
629 Qnn_ErrorHandle_t error = QNN_SUCCESS;
630
631 for (std::shared_ptr<OpWrapper>& op_wrapper : op_wrappers) {
632 for (const auto& tensor_wrapper : op_wrapper->GetInputTensors()) {
633 ET_CHECK_OR_RETURN_ERROR(
634 backend_params_ptr_->qnn_graph_ptr_->EnsureTensorInQnnGraph(
635 graph_name, tensor_wrapper) == Error::Ok,
636 Internal,
637 "Tensor name %s isn't added to Qnn Graph",
638 tensor_wrapper->GetName().c_str());
639 }
640
641 for (const auto& tensor_wrapper : op_wrapper->GetOutputTensors()) {
642 ET_CHECK_OR_RETURN_ERROR(
643 backend_params_ptr_->qnn_graph_ptr_->EnsureTensorInQnnGraph(
644 graph_name, tensor_wrapper) == Error::Ok,
645 Internal,
646 "Tensor name %s isn't added to Qnn Graph",
647 tensor_wrapper->GetName().c_str());
648 }
649
650 for (const auto& param : op_wrapper->GetParams()) {
651 auto* p_tensor_param = dynamic_cast<TensorParamWrapper*>(param.get());
652 if (p_tensor_param != nullptr) {
653 ET_CHECK_OR_RETURN_ERROR(
654 backend_params_ptr_->qnn_graph_ptr_->EnsureTensorInQnnGraph(
655 graph_name, p_tensor_param->GetTensorWrapper()) == Error::Ok,
656 Internal,
657 "Param tensor name %s isn't added to Qnn Graph",
658 p_tensor_param->GetName().c_str());
659 }
660 ET_CHECK_OR_RETURN_ERROR(
661 param->PopulateQnnParam() == Error::Ok,
662 Internal,
663 "Fail to configure Qnn backend");
664 }
665
666 error = backend_params_ptr_->qnn_graph_ptr_->GraphAddNode(
667 graph_name, op_wrapper->GetOpConfig());
668 if (error != QNN_SUCCESS) {
669 QNN_EXECUTORCH_LOG_ERROR(
670 "Failed to add node to Qnn Graph with error: %d",
671 QNN_GET_ERROR_CODE(error));
672 return Error::Internal;
673 }
674 }
675
676 error = backend_params_ptr_->qnn_graph_ptr_->GraphFinalize(graph_name);
677 if (error != QNN_SUCCESS) {
678 QNN_EXECUTORCH_LOG_ERROR(
679 "Failed to finalize Qnn Graph with error: %d",
680 QNN_GET_ERROR_CODE(error));
681 return Error::Internal;
682 }
683
684 return Error::Ok;
685 }
686
GetBinarySignature()687 std::string QnnManager::GetBinarySignature() {
688 flatbuffers::Verifier verifier(
689 static_cast<const uint8_t* const>(qnn_context_blob_.buffer),
690 qnn_context_blob_.nbytes);
691 return VerifyBinaryInfoBuffer(verifier)
692 ? GetBinaryInfo(qnn_context_blob_.buffer)->signature()->str()
693 : "";
694 }
695
696 } // namespace qnn
697 } // namespace backends
698 } // namespace executorch
QnnExecuTorchAllocCustomMem(size_t bytes,size_t alignment)699 void* QnnExecuTorchAllocCustomMem(size_t bytes, size_t alignment) {
700 void* buffer_ptr =
701 executorch::backends::qnn::SharedBuffer::GetSharedBufferManager()
702 .AllocMem(bytes, alignment);
703 return buffer_ptr;
704 }
705
QnnExecuTorchFreeCustomMem(void * buffer_ptr)706 void QnnExecuTorchFreeCustomMem(void* buffer_ptr) {
707 executorch::backends::qnn::SharedBuffer::GetSharedBufferManager().FreeMem(
708 buffer_ptr);
709 }
710
QnnExecuTorchAddCustomMemTensorAddr(void * tensor_addr,void * custom_mem)711 void QnnExecuTorchAddCustomMemTensorAddr(void* tensor_addr, void* custom_mem) {
712 executorch::backends::qnn::SharedBuffer::GetSharedBufferManager()
713 .AddCusomMemTensorAddr(tensor_addr, custom_mem);
714 }
715
QnnExecuTorchAddCustomMemTensorInfo(const CustomMemTensorInfo & info)716 void QnnExecuTorchAddCustomMemTensorInfo(const CustomMemTensorInfo& info) {
717 executorch::backends::qnn::SharedBuffer::GetSharedBufferManager()
718 .AddCusomMemTensorInfo(info);
719 }
720