xref: /aosp_15_r20/external/executorch/backends/qualcomm/runtime/QnnManager.h (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) Qualcomm Innovation Center, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 #pragma once
9 
10 #include <executorch/backends/qualcomm/aot/wrappers/OpWrapper.h>
11 #include <executorch/backends/qualcomm/aot/wrappers/TensorWrapper.h>
12 #include <executorch/backends/qualcomm/qc_compiler_spec_generated.h>
13 #include <executorch/backends/qualcomm/runtime/Logging.h>
14 #include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
15 #include <executorch/backends/qualcomm/runtime/backends/QnnBackendFactory.h>
16 #include <executorch/runtime/core/error.h>
17 
18 #include <memory>
19 #include <unordered_map>
20 
21 namespace executorch {
22 namespace backends {
23 namespace qnn {
24 class QnnManager {
25  public:
26   // Construct QnnManager
27   explicit QnnManager(
28       const QnnExecuTorchOptions* options,
29       const QnnExecuTorchContextBinary& qnn_executorch_context_binary);
30 
31   ~QnnManager();
32   executorch::runtime::Error Init();
33   executorch::runtime::Error AllocateTensor(const std::string& graph_name);
34   executorch::runtime::Error AllocateTensor(
35       const std::string& graph_name,
36       std::vector<std::shared_ptr<TensorWrapper>>& inputs,
37       std::vector<std::shared_ptr<TensorWrapper>>& outputs);
38 
39   executorch::runtime::Error Execute(
40       const std::string& graph_name,
41       const std::vector<Qnn_Tensor_t>& input_tensor_structs,
42       std::vector<Qnn_Tensor_t>& output_tensor_structs,
43       executorch::runtime::EventTracer* event_tracer);
44 
45   executorch::runtime::Error ProfileExecuteData(
46       const std::string& graph_name,
47       executorch::runtime::EventTracer* event_tracer);
48 
49   void Destroy();
50 
IsAvailable()51   bool IsAvailable() {
52     return true;
53   }
54 
IsOnlinePrepare()55   bool IsOnlinePrepare() {
56     return options_->online_prepare();
57   }
58 
IsMultipleGraphs()59   bool IsMultipleGraphs() {
60     return options_->multiple_graphs();
61   }
62 
IsTensorDump()63   bool IsTensorDump() {
64     return options_->dump_intermediate_outputs();
65   }
66 
67   bool IsNodeSupportedByBackend(
68       std::vector<std::shared_ptr<OpWrapper>>& op_wrappers);
69 
70   executorch::runtime::Error GetContextBinary(
71       QnnExecuTorchContextBinary& qnn_executorch_context_binary);
72 
73   executorch::runtime::Error CompileQcir();
74 
75   executorch::runtime::Error Compile(
76       const std::string& graph_name,
77       std::vector<std::shared_ptr<OpWrapper>>& op_wrappers);
78 
79   executorch::runtime::Error RegisterMem(
80       void* data_ptr,
81       const std::shared_ptr<TensorWrapper>& tensor_wrapper);
82 
83   // Pre-register custom memory handle from the SharedBuffer before execution
84   executorch::runtime::Error PreRegisterMem();
85 
GetSpillFillBufferSize()86   uint64_t GetSpillFillBufferSize() {
87     auto* htp_backend_cache_ptr = static_cast<HtpBackendCache*>(
88         backend_params_ptr_->qnn_backend_cache_ptr_.get());
89     return htp_backend_cache_ptr->GetSpillFillBufferSize();
90   }
91 
GetGraphInputs(const std::string & graph_name)92   std::vector<std::shared_ptr<TensorWrapper>> GetGraphInputs(
93       const std::string& graph_name) {
94     return !input_tensors_.count(graph_name)
95         ? std::vector<std::shared_ptr<TensorWrapper>>()
96         : input_tensors_[graph_name];
97   }
98 
GetGraphOutputs(const std::string & graph_name)99   std::vector<std::shared_ptr<TensorWrapper>> GetGraphOutputs(
100       const std::string& graph_name) {
101     return !output_tensors_.count(graph_name)
102         ? std::vector<std::shared_ptr<TensorWrapper>>()
103         : output_tensors_[graph_name];
104   }
105 
GetGraphNames()106   std::vector<std::string> GetGraphNames() {
107     return backend_params_ptr_->qnn_context_ptr_->GetGraphNames();
108   }
109 
110   std::string GetBinarySignature();
111 
112  private:
113   executorch::runtime::Error LoadQnnLibrary();
114 
115   static constexpr const char* htp_library_name_ = "libQnnHtp.so";
116   static constexpr const char* gpu_library_name_ = "libQnnGpu.so";
117   static constexpr const char* dsp_library_name_ = "libQnnDsp.so";
118 
119   QnnExecuTorchContextBinary qnn_context_blob_;
120   std::unique_ptr<BackendConfigParameters> backend_params_ptr_;
121   QnnImplementation qnn_loaded_backend_;
122   std::unique_ptr<QnnLogger> logger_;
123   const QnnExecuTorchOptions* options_;
124   std::unordered_map<std::string, std::vector<std::shared_ptr<TensorWrapper>>>
125       input_tensors_;
126   std::unordered_map<std::string, std::vector<std::shared_ptr<TensorWrapper>>>
127       output_tensors_;
128   executorch::runtime::Error RegisterIonMem(
129       void* data_ptr,
130       const std::shared_ptr<TensorWrapper>& tensor_wrapper);
131   executorch::runtime::Error RegisterCustomMem(
132       void* data_ptr,
133       void* custom_mem_base,
134       const std::shared_ptr<TensorWrapper>& tensor_wrapper);
135   std::unordered_map<Qnn_DataType_t, executorch::aten::ScalarType>
136       qnn_dtype_to_scalar_type_ = {
137           {Qnn_DataType_t::QNN_DATATYPE_INT_32,
138            executorch::aten::ScalarType::Int},
139           {Qnn_DataType_t::QNN_DATATYPE_FLOAT_32,
140            executorch::aten::ScalarType::Float},
141           {Qnn_DataType_t::QNN_DATATYPE_SFIXED_POINT_8,
142            executorch::aten::ScalarType::Char},
143           {Qnn_DataType_t::QNN_DATATYPE_SFIXED_POINT_16,
144            executorch::aten::ScalarType::Short},
145           {Qnn_DataType_t::QNN_DATATYPE_UFIXED_POINT_8,
146            executorch::aten::ScalarType::Byte},
147           {Qnn_DataType_t::QNN_DATATYPE_UFIXED_POINT_16,
148            executorch::aten::ScalarType::Bits16},
149   };
150 };
151 } // namespace qnn
152 } // namespace backends
153 } // namespace executorch
154