xref: /aosp_15_r20/external/executorch/backends/qualcomm/runtime/QnnManager.cpp (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) Qualcomm Innovation Center, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #include <executorch/backends/qualcomm/aot/ir/qcir_utils.h>
10 #include <executorch/backends/qualcomm/qc_binary_info_generated.h>
11 #include <executorch/backends/qualcomm/runtime/QnnManager.h>
12 #include <executorch/backends/qualcomm/runtime/SharedBuffer.h>
13 #include <executorch/backends/qualcomm/runtime/Utils.h>
14 #include <executorch/backends/qualcomm/runtime/backends/QnnBackendCommon.h>
15 #include <executorch/backends/qualcomm/runtime/backends/QnnImplementation.h>
16 #include <executorch/extension/tensor/tensor.h>
17 #include <algorithm>
18 #include <cstdlib>
19 #include <cstring>
20 #include <fstream>
21 #include <string>
22 
23 namespace executorch {
24 namespace backends {
25 namespace qnn {
26 
27 using executorch::runtime::Error;
28 
CompareExportedInput(const std::shared_ptr<TensorWrapper> & a,const std::shared_ptr<TensorWrapper> & b)29 bool CompareExportedInput(
30     const std::shared_ptr<TensorWrapper>& a,
31     const std::shared_ptr<TensorWrapper>& b) {
32   // Using the order of the nodes as external_id in AOT
33   // to extract the right arg from *args at runtime
34   int numA = std::stoi(a->GetName().substr(a->GetName().find('_') + 1));
35   int numB = std::stoi(b->GetName().substr(b->GetName().find('_') + 1));
36   return numA < numB;
37 }
38 
~QnnManager()39 QnnManager::~QnnManager() {
40   backend_params_ptr_.reset(new BackendConfigParameters());
41   logger_.reset();
42   qnn_loaded_backend_.TerminateAllBackends();
43 }
44 
QnnManager(const QnnExecuTorchOptions * options,const QnnExecuTorchContextBinary & qnn_executorch_context_binary)45 QnnManager::QnnManager(
46     const QnnExecuTorchOptions* options,
47     const QnnExecuTorchContextBinary& qnn_executorch_context_binary)
48     : qnn_context_blob_(qnn_executorch_context_binary),
49       qnn_loaded_backend_(""),
50       // options' life cycle is decided by compiler specs which is
51       // kept by executorch runtime framework
52       // please pay attention to any potential seg fault
53       options_(options) {
54   QnnExecuTorchBackendType backend_type =
55       options->backend_options()->backend_type();
56   std::string library_path = options->library_path()->str();
57 
58   if (options->log_level() >= QnnExecuTorchLogLevel::kLogLevelInfo) {
59     QNN_EXECUTORCH_LOG_INFO(
60         "soc_model in soc_info: %s",
61         EnumNameQcomChipset(options_->soc_info()->soc_model()));
62     QNN_EXECUTORCH_LOG_INFO(
63         "backend_type: %s", EnumNameQnnExecuTorchBackendType(backend_type));
64     QNN_EXECUTORCH_LOG_INFO("graph_name: %s", options_->graph_name()->c_str());
65     QNN_EXECUTORCH_LOG_INFO("library_path: %s", library_path.c_str());
66     QNN_EXECUTORCH_LOG_INFO("dump intermediate outputs: %s", IsTensorDump());
67     QNN_EXECUTORCH_LOG_INFO(
68         "log_level: %s", EnumNameQnnExecuTorchLogLevel(options_->log_level()));
69     QNN_EXECUTORCH_LOG_INFO(
70         "profile_level: %s",
71         EnumNameQnnExecuTorchProfileLevel(options_->profile_level()));
72     QNN_EXECUTORCH_LOG_INFO(
73         "the size of qnn context binary: %d",
74         qnn_executorch_context_binary.nbytes);
75     QNN_EXECUTORCH_LOG_INFO(
76         "Is on-device graph construction: %d", options->online_prepare());
77     QNN_EXECUTORCH_LOG_INFO(
78         "Enable shared buffer: %d", options->shared_buffer());
79   }
80 
81   if (library_path.empty()) {
82     switch (backend_type) {
83       case QnnExecuTorchBackendType::kHtpBackend:
84         library_path = htp_library_name_;
85         break;
86       case QnnExecuTorchBackendType::kDspBackend:
87         library_path = dsp_library_name_;
88         break;
89       case QnnExecuTorchBackendType::kGpuBackend:
90         library_path = gpu_library_name_;
91         break;
92       default:
93         QNN_EXECUTORCH_LOG_ERROR("Unknown backend type: %d", backend_type);
94         break;
95     }
96   }
97   qnn_loaded_backend_ = QnnImplementation(library_path);
98   backend_params_ptr_ = std::make_unique<BackendConfigParameters>();
99 }
100 
LoadQnnLibrary()101 Error QnnManager::LoadQnnLibrary() {
102   Error ret = qnn_loaded_backend_.Load(nullptr);
103   return ret;
104 }
105 
PreRegisterMem()106 Error QnnManager::PreRegisterMem() {
107   SharedBuffer& shared_buffer_manager = SharedBuffer::GetSharedBufferManager();
108   for (const auto info : shared_buffer_manager.GetCustomMemTensorInfoSet()) {
109     void* unaligned_custom_mem_base =
110         shared_buffer_manager.GetUnAlignedAddr(info.custom_mem);
111 
112     size_t tensor_offset = (static_cast<char*>(info.custom_mem) -
113                             static_cast<char*>(unaligned_custom_mem_base)) +
114         info.pos;
115     size_t total_custom_mem_size =
116         shared_buffer_manager.GetAllocatedSize(info.custom_mem);
117 
118     int32_t mem_fd = shared_buffer_manager.MemToFd(unaligned_custom_mem_base);
119     if (mem_fd == -1) {
120       QNN_EXECUTORCH_LOG_WARN(
121           "PreRegisterMem failed to get file descriptor.",
122           "custom_mem: %p",
123           "tensor_addr: %p",
124           "pos: %uz",
125           "tensor_bytes: %uz",
126           "shape: %p",
127           "rank: %zu",
128           "qnn_dtype: %X",
129           info.custom_mem,
130           info.tensor_addr,
131           info.pos,
132           info.tensor_bytes,
133           info.shape,
134           info.rank,
135           info.dtype);
136       return Error::Internal;
137     }
138 
139     ET_CHECK_OR_RETURN_ERROR(
140         backend_params_ptr_->qnn_mem_manager_ptr_->PreRegisterCustomMemHandle(
141             mem_fd,
142             unaligned_custom_mem_base,
143             total_custom_mem_size,
144             tensor_offset,
145             info) == Error::Ok,
146         Internal,
147         "Fail to register to shared memory.");
148   }
149   return Error::Ok;
150 }
151 
RegisterMem(void * data_ptr,const std::shared_ptr<TensorWrapper> & tensor_wrapper)152 Error QnnManager::RegisterMem(
153     void* data_ptr,
154     const std::shared_ptr<TensorWrapper>& tensor_wrapper) {
155   SharedBuffer& shared_buffer_manager = SharedBuffer::GetSharedBufferManager();
156   // Not enable shared buffer
157   if (!options_->shared_buffer())
158     return Error::Internal;
159 
160   if (backend_params_ptr_->qnn_mem_manager_ptr_ == nullptr) {
161     QNN_EXECUTORCH_LOG_WARN(
162         "Backend %s doesn't supported shared buffer.",
163         EnumNameQnnExecuTorchBackendType(
164             options_->backend_options()->backend_type()));
165     return Error::Internal;
166   }
167 
168   void* custom_mem_base = shared_buffer_manager.GetCustomMemBase(data_ptr);
169   if (custom_mem_base != nullptr) {
170     return RegisterCustomMem(data_ptr, custom_mem_base, tensor_wrapper);
171   }
172   return RegisterIonMem(data_ptr, tensor_wrapper);
173 }
174 
RegisterIonMem(void * data_ptr,const std::shared_ptr<TensorWrapper> & tensor_wrapper)175 Error QnnManager::RegisterIonMem(
176     void* data_ptr,
177     const std::shared_ptr<TensorWrapper>& tensor_wrapper) {
178   SharedBuffer& shared_buffer_manager = SharedBuffer::GetSharedBufferManager();
179   if (!shared_buffer_manager.IsAllocated(data_ptr)) {
180     // It means two scenarios here:
181     // 1. the input and output partitioned graph
182     // 2. Actually, user doesn't allocate shared buffer with
183     // QnnExecuTorchAllocCustomMem API
184     return Error::Internal;
185   } else if (backend_params_ptr_->qnn_mem_manager_ptr_->IsRegistered(
186                  tensor_wrapper->GetMemHandle(), data_ptr)) {
187     if (options_->log_level() >= QnnExecuTorchLogLevel::kLogLevelInfo)
188       QNN_EXECUTORCH_LOG_INFO(
189           "Tensor name %s has been registered shared memory.",
190           tensor_wrapper->GetName().c_str());
191     return Error::Ok;
192   }
193 
194   int32_t mem_fd = shared_buffer_manager.MemToFd(data_ptr);
195   if (mem_fd == -1) {
196     QNN_EXECUTORCH_LOG_WARN(
197         "Tensor name %s is failed to get file descriptor.",
198         tensor_wrapper->GetName().c_str());
199     return Error::Internal;
200   }
201   ET_CHECK_OR_RETURN_ERROR(
202       backend_params_ptr_->qnn_mem_manager_ptr_->RegisterIonMem(
203           tensor_wrapper, mem_fd, data_ptr) == Error::Ok,
204       Internal,
205       "Fail to register to shared memory.");
206 
207   return Error::Ok;
208 }
209 
RegisterCustomMem(void * data_ptr,void * custom_mem_base,const std::shared_ptr<TensorWrapper> & tensor_wrapper)210 Error QnnManager::RegisterCustomMem(
211     void* data_ptr,
212     void* custom_mem_base,
213     const std::shared_ptr<TensorWrapper>& tensor_wrapper) {
214   if (backend_params_ptr_->qnn_mem_manager_ptr_->IsRegistered(
215           tensor_wrapper->GetMemHandle(), data_ptr)) {
216     if (options_->log_level() >= QnnExecuTorchLogLevel::kLogLevelInfo)
217       QNN_EXECUTORCH_LOG_INFO(
218           "Tensor name %s has been registered shared memory.",
219           tensor_wrapper->GetName().c_str());
220     return Error::Ok;
221   }
222 
223   CustomMemTensorInfo info{
224       custom_mem_base,
225       data_ptr,
226       static_cast<size_t>(
227           static_cast<char*>(data_ptr) - static_cast<char*>(custom_mem_base)),
228       tensor_wrapper->GetBytes(),
229       tensor_wrapper->GetDims(),
230       tensor_wrapper->GetRank(),
231       qnn_dtype_to_scalar_type_[tensor_wrapper->GetDataType()]};
232 
233   Qnn_MemHandle_t pre_registered_handle =
234       backend_params_ptr_->qnn_mem_manager_ptr_->GetPreRegisteredHandle(info);
235   if (pre_registered_handle != nullptr) {
236     if (options_->log_level() >= QnnExecuTorchLogLevel::kLogLevelInfo) {
237       QNN_EXECUTORCH_LOG_INFO(
238           "Tensor name %s found a pre-registered memHandle.",
239           tensor_wrapper->GetName().c_str());
240     }
241     return backend_params_ptr_->qnn_mem_manager_ptr_->SetMemHandle(
242         tensor_wrapper, data_ptr, pre_registered_handle);
243   }
244 
245   SharedBuffer& shared_buffer_manager = SharedBuffer::GetSharedBufferManager();
246   void* unaligned_custom_mem_base =
247       shared_buffer_manager.GetUnAlignedAddr(custom_mem_base);
248 
249   size_t tensor_offset = static_cast<char*>(custom_mem_base) -
250       static_cast<char*>(unaligned_custom_mem_base) + info.pos;
251   size_t total_custom_mem_size =
252       shared_buffer_manager.GetAllocatedSize(custom_mem_base);
253 
254   int32_t mem_fd = shared_buffer_manager.MemToFd(unaligned_custom_mem_base);
255   if (mem_fd == -1) {
256     QNN_EXECUTORCH_LOG_WARN(
257         "Tensor name %s failed to get file descriptor.",
258         tensor_wrapper->GetName().c_str());
259     return Error::Internal;
260   }
261 
262   ET_CHECK_OR_RETURN_ERROR(
263       backend_params_ptr_->qnn_mem_manager_ptr_->RegisterCustomMem(
264           tensor_wrapper,
265           mem_fd,
266           data_ptr,
267           unaligned_custom_mem_base,
268           total_custom_mem_size,
269           tensor_offset) == Error::Ok,
270       Internal,
271       "Fail to register to shared memory.");
272 
273   return Error::Ok;
274 }
275 
Init()276 Error QnnManager::Init() {
277   ET_CHECK_OR_RETURN_ERROR(
278       LoadQnnLibrary() == Error::Ok, Internal, "Fail to load Qnn library");
279   logger_ = std::make_unique<QnnLogger>(
280       qnn_loaded_backend_, LoggingCallback, options_->log_level());
281   if (backend_params_ptr_->backend_init_state_ ==
282       BackendInitializeState::UNINITIALIZED) {
283     QNN_EXECUTORCH_LOG_INFO(
284         "Initialize Qnn backend "
285         "parameters for Qnn executorch backend type %d",
286         options_->backend_options()->backend_type());
287     backend_params_ptr_ = QnnBackendFactory().Create(
288         qnn_loaded_backend_, logger_.get(), qnn_context_blob_, options_);
289     ET_CHECK_OR_RETURN_ERROR(
290         backend_params_ptr_ != nullptr, Internal, "Failed to load Qnn backend.")
291     ET_CHECK_OR_RETURN_ERROR(
292         backend_params_ptr_->qnn_backend_cache_ptr_->Configure() == Error::Ok,
293         Internal,
294         "Fail to configure Qnn backend cache");
295     ET_CHECK_OR_RETURN_ERROR(
296         backend_params_ptr_->qnn_backend_ptr_->Configure() == Error::Ok,
297         Internal,
298         "Fail to configure Qnn backend");
299     ET_CHECK_OR_RETURN_ERROR(
300         backend_params_ptr_->qnn_device_ptr_->Configure() == Error::Ok,
301         Internal,
302         "Fail to configure Qnn device");
303     ET_CHECK_OR_RETURN_ERROR(
304         backend_params_ptr_->qnn_context_ptr_->Configure() == Error::Ok,
305         Internal,
306         "Fail to configure Qnn context");
307     for (const std::string& graph_name :
308          backend_params_ptr_->qnn_context_ptr_->GetGraphNames()) {
309       ET_CHECK_OR_RETURN_ERROR(
310           backend_params_ptr_->qnn_graph_ptr_->Configure(graph_name) ==
311               Error::Ok,
312           Internal,
313           "Fail to configure Qnn graph");
314     }
315     backend_params_ptr_->backend_init_state_ =
316         BackendInitializeState::INITIALIZED;
317   }
318 
319 #if defined(__aarch64__)
320   ET_CHECK_OR_RETURN_ERROR(
321       PreRegisterMem() == Error::Ok,
322       Internal,
323       "Fail to pre register custom memory handle");
324 #endif
325   return Error::Ok;
326 }
327 
AllocateTensor(const std::string & graph_name)328 Error QnnManager::AllocateTensor(const std::string& graph_name) {
329   std::vector<Qnn_Tensor_t> input_tensors =
330       backend_params_ptr_->qnn_context_ptr_->GetGraphInputs(graph_name);
331   std::vector<Qnn_Tensor_t> output_tensors =
332       backend_params_ptr_->qnn_context_ptr_->GetGraphOutputs(graph_name);
333 
334   for (auto& tensor : input_tensors) {
335     std::shared_ptr<TensorWrapper> tensor_wrapper = CreateTensorWrapper(tensor);
336     tensor_wrapper->UpdateQnnTensorMeta(tensor);
337     input_tensors_[graph_name].emplace_back(std::move(tensor_wrapper));
338   }
339   if (!options_->is_from_context_binary()) {
340     std::sort(
341         input_tensors_[graph_name].begin(),
342         input_tensors_[graph_name].end(),
343         CompareExportedInput);
344   }
345   for (size_t i = 0; i < output_tensors.size(); ++i) {
346     std::shared_ptr<TensorWrapper> tensor_wrapper =
347         CreateTensorWrapper(output_tensors[i]);
348     tensor_wrapper->UpdateQnnTensorMeta(output_tensors[i]);
349     const std::string& tensor_name = tensor_wrapper->GetName();
350     // this is required by identifying shared buffer mechanism
351     // info might be missed if context binary came from qnn_converter
352     if (options_->is_from_context_binary() &&
353         tensor_name.find("output_") == std::string::npos) {
354       tensor_wrapper->SetName("output_" + tensor_name);
355     }
356     if (IsTensorDump()) {
357       tensor_wrapper->AllocateDataBuffer();
358     }
359     output_tensors_[graph_name].emplace_back(std::move(tensor_wrapper));
360   }
361   return Error::Ok;
362 }
363 
AllocateTensor(const std::string & graph_name,std::vector<std::shared_ptr<TensorWrapper>> & inputs,std::vector<std::shared_ptr<TensorWrapper>> & outputs)364 Error QnnManager::AllocateTensor(
365     const std::string& graph_name,
366     std::vector<std::shared_ptr<TensorWrapper>>& inputs,
367     std::vector<std::shared_ptr<TensorWrapper>>& outputs) {
368   input_tensors_[graph_name] = std::move(inputs);
369   // TODO: suuport per-tensor dump in online prepare mode
370   //       should be achievable with some pre-process
371   if (!options_->is_from_context_binary()) {
372     std::sort(
373         input_tensors_[graph_name].begin(),
374         input_tensors_[graph_name].end(),
375         CompareExportedInput);
376   }
377   output_tensors_[graph_name] = std::move(outputs);
378   return Error::Ok;
379 }
380 
Execute(const std::string & graph_name,const std::vector<Qnn_Tensor_t> & input_tensor_structs,std::vector<Qnn_Tensor_t> & output_tensor_structs,executorch::runtime::EventTracer * event_tracer)381 Error QnnManager::Execute(
382     const std::string& graph_name,
383     const std::vector<Qnn_Tensor_t>& input_tensor_structs,
384     std::vector<Qnn_Tensor_t>& output_tensor_structs,
385     executorch::runtime::EventTracer* event_tracer) {
386   Qnn_ErrorHandle_t error = QNN_SUCCESS;
387 
388   error = backend_params_ptr_->qnn_graph_ptr_->GraphExecute(
389       graph_name, input_tensor_structs, output_tensor_structs);
390 
391   if (error != QNN_SUCCESS) {
392     QNN_EXECUTORCH_LOG_ERROR(
393         "qnn_graph_execute failed. Error %d", QNN_GET_ERROR_CODE(error));
394     return Error::Internal;
395   }
396   if (IsTensorDump()) {
397     // TODO: Need to handle the graph which is partitioned.
398     // Maybe we could use graph name.
399     for (std::size_t out_idx = 0; out_idx < output_tensor_structs.size();
400          ++out_idx) {
401       const Qnn_Tensor_t& output_tensor = output_tensor_structs[out_idx];
402       std::vector<executorch::aten::SizesType> sizes(
403           QNN_VER_PTR(output_tensor)->dimensions,
404           QNN_VER_PTR(output_tensor)->dimensions +
405               QNN_VER_PTR(output_tensor)->rank);
406 
407       auto dump_tensor = executorch::extension::from_blob(
408           QNN_VER_PTR(output_tensor)->clientBuf.data,
409           sizes,
410           qnn_dtype_to_scalar_type_[QNN_VER_PTR(output_tensor)->dataType]);
411 
412       executorch::runtime::event_tracer_log_output_delegate<
413           executorch::aten::Tensor>(
414           event_tracer,
415           QNN_VER_PTR(output_tensor)->name,
416           /*delegate_debug_id=*/
417           static_cast<executorch::runtime::DebugHandle>(-1),
418           *dump_tensor);
419     }
420   }
421 
422   return Error::Ok;
423 }
424 
ProfileExecuteData(const std::string & graph_name,executorch::runtime::EventTracer * event_tracer)425 Error QnnManager::ProfileExecuteData(
426     const std::string& graph_name,
427     executorch::runtime::EventTracer* event_tracer) {
428   Qnn_ErrorHandle_t error = QNN_SUCCESS;
429   if (options_->profile_level() != QnnExecuTorchProfileLevel::kProfileOff) {
430     error = backend_params_ptr_->qnn_graph_ptr_->ProfileExecuteData(
431         graph_name, event_tracer);
432     if (error != QNN_SUCCESS) {
433       QNN_EXECUTORCH_LOG_ERROR(
434           " Failed to profile. Error %d", QNN_GET_ERROR_CODE(error));
435       return Error::Internal;
436     }
437   }
438   return Error::Ok;
439 }
440 
Destroy()441 void QnnManager::Destroy() {
442   QNN_EXECUTORCH_LOG_INFO("Destroy Qnn backend parameters");
443   backend_params_ptr_.reset(new BackendConfigParameters());
444   logger_.reset();
445 
446   qnn_loaded_backend_.TerminateAllBackends();
447 }
448 
IsNodeSupportedByBackend(std::vector<std::shared_ptr<OpWrapper>> & op_wrappers)449 bool QnnManager::IsNodeSupportedByBackend(
450     std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
451   Qnn_ErrorHandle_t error = QNN_SUCCESS;
452 
453   for (std::shared_ptr<OpWrapper>& op_wrapper : op_wrappers) {
454     for (const auto& param : op_wrapper->GetParams()) {
455       // unused?
456       // auto* p_tensor_param = dynamic_cast<TensorParamWrapper*>(param.get());
457       if (param->PopulateQnnParam() != Error::Ok) {
458         QNN_EXECUTORCH_LOG_WARN(
459             "Qnn Backend op validation failed "
460             "with PopulateQnnParam: %d",
461             QNN_GET_ERROR_CODE(error));
462         return false;
463       }
464     }
465 
466     error = backend_params_ptr_->qnn_backend_ptr_->BackendValidateOpConfig(
467         op_wrapper->GetOpConfig());
468     if (error != QNN_SUCCESS) {
469       QNN_EXECUTORCH_LOG_WARN(
470           "Qnn Backend op validation failed with error: %d",
471           QNN_GET_ERROR_CODE(error));
472 
473       return false;
474     }
475   }
476   return true;
477 }
478 
GetContextBinary(QnnExecuTorchContextBinary & qnn_executorch_context_binary)479 Error QnnManager::GetContextBinary(
480     QnnExecuTorchContextBinary& qnn_executorch_context_binary) {
481   ET_CHECK_OR_RETURN_ERROR(
482       backend_params_ptr_->qnn_context_ptr_->GetContextBinary(
483           qnn_executorch_context_binary) == Error::Ok,
484       Internal,
485       "Fail to get context binary.");
486 
487   return Error::Ok;
488 }
489 
CompileQcir()490 Error QnnManager::CompileQcir() {
491   flatbuffers::Verifier verifier_binary_info(
492       static_cast<const uint8_t* const>(qnn_context_blob_.buffer),
493       qnn_context_blob_.nbytes);
494   if (!qnn_delegate::VerifyBinaryInfoBuffer(verifier_binary_info)) {
495     QNN_EXECUTORCH_LOG_ERROR("Fail to verify binary info");
496     return Error::Internal;
497   }
498 
499   auto binary_info = qnn_delegate::GetBinaryInfo(qnn_context_blob_.buffer);
500   flatbuffers::Verifier verifier_qcir(
501       binary_info->data()->data(), binary_info->data()->size());
502   if (!qcir::VerifyContextBuffer(verifier_qcir)) {
503     QNN_EXECUTORCH_LOG_ERROR("Fail to verify qcir format");
504     return Error::Internal;
505   }
506 
507   auto context = qcir::GetContext(binary_info->data()->data());
508   for (const auto& graph : *context->graphs()) {
509     // qcir tensors to TensorWrapper
510     std::vector<std::shared_ptr<TensorWrapper>> graph_inputs, graph_outputs,
511         tensors;
512     for (const auto& tensor : *graph->tensors()) {
513       tensors.emplace_back(CreateTensorWrapper(ToTensor(tensor)));
514       if (tensor->type() == qcir::TensorType::WRITE) {
515         graph_inputs.push_back(tensors.back());
516       } else if (tensor->type() == qcir::TensorType::READ) {
517         graph_outputs.push_back(tensors.back());
518       }
519     }
520     std::vector<std::shared_ptr<OpWrapper>> op_wrappers;
521     // qcir graph node to OpWrapper
522     for (const auto& node : *graph->nodes()) {
523       std::shared_ptr<OpWrapper> op = std::make_shared<OpWrapper>(
524           node->name()->str(),
525           node->package_name()->str(),
526           node->type_name()->str());
527 
528       // qcir input tensors to OpWrapper input tensors
529       std::vector<std::shared_ptr<TensorWrapper>> inputs;
530       for (uint32_t index : *node->inputs()) {
531         inputs.push_back(tensors[index]);
532       }
533       op->AddInputTensors(inputs);
534 
535       // qcir output tensors to OpWrapper output tensors
536       std::vector<std::shared_ptr<TensorWrapper>> outputs;
537       for (uint32_t index : *node->outputs()) {
538         outputs.push_back(tensors[index]);
539       }
540       op->AddOutputTensors(outputs);
541 
542       // qcir operator param to OpWrapper param
543       for (uint32_t index : *node->params()) {
544         const auto& tensor = graph->tensors()->Get(index);
545         std::string name = tensor->name()->str();
546         Qnn_DataType_t dtype = ToDataType(tensor->dtype());
547         if (tensor->shape()->size() != 0) {
548           // add tensor param
549           op->AddTensorParam(
550               name,
551               dtype,
552               tensor->shape()->size(),
553               tensor->shape()->data(),
554               tensor->data()->data());
555         } else {
556           // add scalar param
557           switch (dtype) {
558             case Qnn_DataType_t::QNN_DATATYPE_INT_32:
559               op->AddScalarParam(
560                   name,
561                   dtype,
562                   *reinterpret_cast<const int32_t*>(tensor->data()->Data()));
563               break;
564             case Qnn_DataType_t::QNN_DATATYPE_INT_16:
565               op->AddScalarParam(
566                   name,
567                   dtype,
568                   *reinterpret_cast<const int16_t*>(tensor->data()->Data()));
569               break;
570             case Qnn_DataType_t::QNN_DATATYPE_INT_8:
571               op->AddScalarParam(
572                   name, dtype, static_cast<int8_t>(*tensor->data()->Data()));
573               break;
574             case Qnn_DataType_t::QNN_DATATYPE_UINT_32:
575               op->AddScalarParam(
576                   name,
577                   dtype,
578                   *reinterpret_cast<const uint32_t*>(tensor->data()->Data()));
579               break;
580             case Qnn_DataType_t::QNN_DATATYPE_UINT_16:
581               op->AddScalarParam(
582                   name,
583                   dtype,
584                   *reinterpret_cast<const uint16_t*>(tensor->data()->Data()));
585               break;
586             case Qnn_DataType_t::QNN_DATATYPE_UINT_8:
587               op->AddScalarParam(name, dtype, *tensor->data()->Data());
588               break;
589             case Qnn_DataType_t::QNN_DATATYPE_FLOAT_32:
590             case Qnn_DataType_t::QNN_DATATYPE_FLOAT_16:
591               op->AddScalarParam(
592                   name,
593                   dtype,
594                   *reinterpret_cast<const float*>(tensor->data()->Data()));
595               break;
596             case Qnn_DataType_t::QNN_DATATYPE_BOOL_8:
597               op->AddScalarParam(name, dtype, *tensor->data()->Data());
598               break;
599             default:
600               QNN_EXECUTORCH_LOG_ERROR(
601                   "Invalid scalar type: %s", tensor->name()->c_str());
602               break;
603           }
604         }
605       }
606       op_wrappers.push_back(std::move(op));
607     }
608 
609     ET_CHECK_OR_RETURN_ERROR(
610         Compile(graph->name()->str(), op_wrappers) == Error::Ok,
611         Internal,
612         "Fail to compile graph from qcir with graph_name: %s",
613         graph->name()->str().c_str());
614 
615     ET_CHECK_OR_RETURN_ERROR(
616         AllocateTensor(graph->name()->str(), graph_inputs, graph_outputs) ==
617             Error::Ok,
618         Internal,
619         "Fail to allocate tensor for qcir with graph_name: %s",
620         graph->name()->str().c_str());
621   }
622 
623   return Error::Ok;
624 }
625 
Compile(const std::string & graph_name,std::vector<std::shared_ptr<OpWrapper>> & op_wrappers)626 Error QnnManager::Compile(
627     const std::string& graph_name,
628     std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
629   Qnn_ErrorHandle_t error = QNN_SUCCESS;
630 
631   for (std::shared_ptr<OpWrapper>& op_wrapper : op_wrappers) {
632     for (const auto& tensor_wrapper : op_wrapper->GetInputTensors()) {
633       ET_CHECK_OR_RETURN_ERROR(
634           backend_params_ptr_->qnn_graph_ptr_->EnsureTensorInQnnGraph(
635               graph_name, tensor_wrapper) == Error::Ok,
636           Internal,
637           "Tensor name %s isn't added to Qnn Graph",
638           tensor_wrapper->GetName().c_str());
639     }
640 
641     for (const auto& tensor_wrapper : op_wrapper->GetOutputTensors()) {
642       ET_CHECK_OR_RETURN_ERROR(
643           backend_params_ptr_->qnn_graph_ptr_->EnsureTensorInQnnGraph(
644               graph_name, tensor_wrapper) == Error::Ok,
645           Internal,
646           "Tensor name %s isn't added to Qnn Graph",
647           tensor_wrapper->GetName().c_str());
648     }
649 
650     for (const auto& param : op_wrapper->GetParams()) {
651       auto* p_tensor_param = dynamic_cast<TensorParamWrapper*>(param.get());
652       if (p_tensor_param != nullptr) {
653         ET_CHECK_OR_RETURN_ERROR(
654             backend_params_ptr_->qnn_graph_ptr_->EnsureTensorInQnnGraph(
655                 graph_name, p_tensor_param->GetTensorWrapper()) == Error::Ok,
656             Internal,
657             "Param tensor name %s isn't added to Qnn Graph",
658             p_tensor_param->GetName().c_str());
659       }
660       ET_CHECK_OR_RETURN_ERROR(
661           param->PopulateQnnParam() == Error::Ok,
662           Internal,
663           "Fail to configure Qnn backend");
664     }
665 
666     error = backend_params_ptr_->qnn_graph_ptr_->GraphAddNode(
667         graph_name, op_wrapper->GetOpConfig());
668     if (error != QNN_SUCCESS) {
669       QNN_EXECUTORCH_LOG_ERROR(
670           "Failed to add node to Qnn Graph with error: %d",
671           QNN_GET_ERROR_CODE(error));
672       return Error::Internal;
673     }
674   }
675 
676   error = backend_params_ptr_->qnn_graph_ptr_->GraphFinalize(graph_name);
677   if (error != QNN_SUCCESS) {
678     QNN_EXECUTORCH_LOG_ERROR(
679         "Failed to finalize Qnn Graph with error: %d",
680         QNN_GET_ERROR_CODE(error));
681     return Error::Internal;
682   }
683 
684   return Error::Ok;
685 }
686 
GetBinarySignature()687 std::string QnnManager::GetBinarySignature() {
688   flatbuffers::Verifier verifier(
689       static_cast<const uint8_t* const>(qnn_context_blob_.buffer),
690       qnn_context_blob_.nbytes);
691   return VerifyBinaryInfoBuffer(verifier)
692       ? GetBinaryInfo(qnn_context_blob_.buffer)->signature()->str()
693       : "";
694 }
695 
696 } // namespace qnn
697 } // namespace backends
698 } // namespace executorch
QnnExecuTorchAllocCustomMem(size_t bytes,size_t alignment)699 void* QnnExecuTorchAllocCustomMem(size_t bytes, size_t alignment) {
700   void* buffer_ptr =
701       executorch::backends::qnn::SharedBuffer::GetSharedBufferManager()
702           .AllocMem(bytes, alignment);
703   return buffer_ptr;
704 }
705 
QnnExecuTorchFreeCustomMem(void * buffer_ptr)706 void QnnExecuTorchFreeCustomMem(void* buffer_ptr) {
707   executorch::backends::qnn::SharedBuffer::GetSharedBufferManager().FreeMem(
708       buffer_ptr);
709 }
710 
QnnExecuTorchAddCustomMemTensorAddr(void * tensor_addr,void * custom_mem)711 void QnnExecuTorchAddCustomMemTensorAddr(void* tensor_addr, void* custom_mem) {
712   executorch::backends::qnn::SharedBuffer::GetSharedBufferManager()
713       .AddCusomMemTensorAddr(tensor_addr, custom_mem);
714 }
715 
QnnExecuTorchAddCustomMemTensorInfo(const CustomMemTensorInfo & info)716 void QnnExecuTorchAddCustomMemTensorInfo(const CustomMemTensorInfo& info) {
717   executorch::backends::qnn::SharedBuffer::GetSharedBufferManager()
718       .AddCusomMemTensorInfo(info);
719 }
720