xref: /aosp_15_r20/external/executorch/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) Qualcomm Innovation Center, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 #pragma once
9 #include <executorch/backends/qualcomm/aot/ir/qcir_utils.h>
10 #include <executorch/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h>
11 #include <executorch/backends/qualcomm/qc_binary_info_generated.h>
12 #include <executorch/backends/qualcomm/qc_compiler_spec_generated.h>
13 #include <executorch/backends/qualcomm/runtime/Logging.h>
14 #include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
15 #include <executorch/backends/qualcomm/runtime/QnnManager.h>
16 #include <pybind11/numpy.h>
17 #include <pybind11/pybind11.h>
18 #include <pybind11/stl.h>
19 #include <memory>
20 #include <string_view>
21 
22 namespace py = pybind11;
23 namespace executorch {
24 namespace backends {
25 namespace qnn {
26 class PyQnnManager {
27  public:
28   // used for AoT compilation
PyQnnManager(const py::bytes & buffer)29   explicit PyQnnManager(const py::bytes& buffer)
30       : qnn_executorch_option_ptr_(buffer),
31         qnn_executorch_context_binary_(QNN_EXECUTORCH_CONTEXT_BINARY) {
32     // Choose non-allocating non-owning string pieces exposed as string_view for
33     // parsers
34     auto qnn_executorch_options = GetQnnExecuTorchOptions(
35         qnn_executorch_option_ptr_.cast<std::string_view>().data());
36     qnn_manager_ = std::make_shared<QnnManager>(
37         qnn_executorch_options, qnn_executorch_context_binary_);
38   }
39 
40   // used for loading context binary directly
PyQnnManager(const py::bytes & buffer,const py::bytes & ctx_bin)41   explicit PyQnnManager(const py::bytes& buffer, const py::bytes& ctx_bin)
42       : qnn_executorch_option_ptr_(buffer) {
43     auto qnn_executorch_options = GetQnnExecuTorchOptions(
44         qnn_executorch_option_ptr_.cast<std::string_view>().data());
45 
46     py::buffer_info info(py::buffer(ctx_bin).request());
47     qnn_executorch_context_binary_.buffer = info.ptr;
48     qnn_executorch_context_binary_.nbytes = info.size * info.itemsize;
49     qnn_manager_ = std::make_shared<QnnManager>(
50         qnn_executorch_options, qnn_executorch_context_binary_);
51   }
52 
53   // used for loading multiple graphs in qcir
PyQnnManager(const py::bytes & buffer,const py::list & qcirs)54   explicit PyQnnManager(const py::bytes& buffer, const py::list& qcirs)
55       : qnn_executorch_option_ptr_(buffer) {
56     auto qnn_executorch_options = GetQnnExecuTorchOptions(
57         qnn_executorch_option_ptr_.cast<std::string_view>().data());
58 
59     // merge multiple qcirs into one context with multiple graphs
60     std::vector<flatbuffers::Offset<qcir::Graph>> graphs;
61     for (size_t i = 0; i < qcirs.size(); ++i) {
62       py::buffer_info info(py::buffer(qcirs[i].cast<py::bytes>()).request());
63       flatbuffers::Verifier verifier_binary_info(
64           static_cast<const uint8_t* const>(info.ptr),
65           info.size * info.itemsize);
66       if (!qnn_delegate::VerifyBinaryInfoBuffer(verifier_binary_info)) {
67         QNN_EXECUTORCH_LOG_ERROR("Fail to verify binary info");
68         return;
69       }
70       auto binary_info = qnn_delegate::GetBinaryInfo(info.ptr);
71 
72       flatbuffers::Verifier verifier_qcir(
73           binary_info->data()->data(), binary_info->data()->size());
74       if (!qcir::VerifyContextBuffer(verifier_qcir)) {
75         QNN_EXECUTORCH_LOG_ERROR("Fail to verify qcir format");
76         return;
77       }
78       auto context = qcir::GetContext(binary_info->data()->data());
79       for (const auto& graph : *context->graphs()) {
80         std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
81         for (const auto tensor : *graph->tensors()) {
82           // here we need to take a detour to merge multiple qcir flatbuffers
83           // outer ToTensor
84           //   return: flatbuffers::Offset<Tensor>
85           //   consume: QnnTensor, flatbuffers::FlatBufferBuilder*
86           // inner ToTensor
87           //   return: QnnTensor
88           //   consume: flatbuffers::Vector<::flatbuffers::Offset<qcir::Tensor>>
89           tensors.emplace_back(ToTensor(ToTensor(tensor), &builder_));
90         }
91         std::vector<flatbuffers::Offset<qcir::Operator>> nodes;
92         for (const auto& node : *graph->nodes()) {
93           int32_t* inputs_ptr = const_cast<int32_t*>(node->inputs()->data());
94           int32_t* outputs_ptr = const_cast<int32_t*>(node->outputs()->data());
95           int32_t* params_ptr = const_cast<int32_t*>(node->params()->data());
96           std::vector<int32_t> inputs(
97               inputs_ptr, inputs_ptr + node->inputs()->size());
98           std::vector<int32_t> outputs(
99               outputs_ptr, outputs_ptr + node->outputs()->size());
100           std::vector<int32_t> params(
101               params_ptr, params_ptr + node->params()->size());
102           nodes.emplace_back(qcir::CreateOperatorDirect(
103               builder_,
104               node->name()->str().c_str(),
105               node->package_name()->str().c_str(),
106               node->type_name()->str().c_str(),
107               &inputs,
108               &outputs,
109               &params));
110         }
111         graphs.emplace_back(qcir::CreateGraphDirect(
112             builder_, graph->name()->str().c_str(), &nodes, &tensors));
113       }
114     }
115 
116     auto context = qcir::CreateContextDirect(builder_, &graphs);
117     builder_.Finish(context);
118     QnnExecuTorchContextBinary qcir_bin(
119         {builder_.GetBufferPointer(), builder_.GetSize()});
120 
121     qnn_executorch_context_binary_ = MakeBinaryInfo(qcir_bin);
122     qnn_manager_ = std::make_shared<QnnManager>(
123         qnn_executorch_options, qnn_executorch_context_binary_);
124   }
125 
Init()126   executorch::runtime::Error Init() {
127     return qnn_manager_->Init();
128   }
129 
IsNodeSupportedByBackend(std::vector<std::shared_ptr<OpWrapper>> & op_wrappers)130   bool IsNodeSupportedByBackend(
131       std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
132     return qnn_manager_->IsNodeSupportedByBackend(op_wrappers);
133   }
134 
135   // this method is specific for compiling multi-graphs
Compile()136   py::array_t<char> Compile() {
137     if (qnn_manager_->CompileQcir() != Error::Ok) {
138       QNN_EXECUTORCH_LOG_ERROR("Fail to compile qcir");
139       return py::array_t<char>(0);
140     }
141 
142     // generate context binary if compilation succeded
143     QnnExecuTorchContextBinary binary_info;
144     qnn_manager_->GetContextBinary(binary_info);
145     // allocate py::array (to pass the result of the C++ function to Python)
146     auto result = py::array_t<char>(binary_info.nbytes);
147     auto result_buffer = result.request();
148     char* result_ptr = (char*)result_buffer.ptr;
149     std::memcpy(result_ptr, binary_info.buffer, binary_info.nbytes);
150     return result;
151   }
152 
Compile(const std::string & graph_name,std::vector<std::shared_ptr<OpWrapper>> & op_wrappers)153   py::array_t<char> Compile(
154       const std::string& graph_name,
155       std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
156     QnnExecuTorchContextBinary binary_info;
157 
158     if (qnn_manager_->IsOnlinePrepare() || qnn_manager_->IsMultipleGraphs()) {
159       builder_.Reset();
160       std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
161       std::unordered_map<void*, int> tensor_map;
162 
163       auto set_tensor = [&](const std::shared_ptr<TensorWrapper>& wrapper,
164                             std::vector<int>& index) {
165         auto it = tensor_map.find(wrapper.get());
166         if (it != tensor_map.end()) {
167           index.push_back(it->second);
168         } else {
169           int i = tensors.size();
170           tensor_map[wrapper.get()] = i;
171           index.push_back(i);
172           tensors.emplace_back(
173               ToTensor(wrapper->CloneTensorStruct(), &builder_));
174         }
175       };
176 
177       std::vector<flatbuffers::Offset<qcir::Operator>> operators;
178       for (std::shared_ptr<OpWrapper>& op_wrapper : op_wrappers) {
179         std::vector<int> inputs, outputs, params;
180 
181         for (const auto& tensor_wrapper : op_wrapper->GetInputTensors()) {
182           set_tensor(tensor_wrapper, inputs);
183         }
184 
185         for (const auto& tensor_wrapper : op_wrapper->GetOutputTensors()) {
186           set_tensor(tensor_wrapper, outputs);
187         }
188 
189         for (const auto& param : op_wrapper->GetParams()) {
190           auto* p_tensor_param = dynamic_cast<TensorParamWrapper*>(param.get());
191           if (p_tensor_param != nullptr) {
192             auto wrapper = p_tensor_param->GetTensorWrapper();
193             wrapper->SetName(param->GetName());
194             set_tensor(wrapper, params);
195           } else {
196             executorch::runtime::Error err = param->PopulateQnnParam();
197             if (err != executorch::runtime::Error::Ok) {
198               QNN_EXECUTORCH_LOG_ERROR(
199                   "Fail to get scalar parameter in online prepare stage");
200               return py::array_t<char>(0);
201             }
202             Qnn_Param_t p = param->GetQnnParam();
203             Qnn_Tensor_t t = QNN_TENSOR_INIT;
204             QNN_VER_PTR(t)->name = p.name;
205             QNN_VER_PTR(t)->dataType = p.scalarParam.dataType;
206             QNN_VER_PTR(t)->clientBuf.data =
207                 static_cast<void*>(&p.scalarParam.uint8Value);
208             QNN_VER_PTR(t)->clientBuf.dataSize =
209                 GetDataTypeSize(QNN_VER_PTR(t)->dataType);
210             params.push_back(tensors.size());
211             tensors.emplace_back(ToTensor(t, &builder_));
212           }
213         }
214 
215         Qnn_OpConfig_t op_config = op_wrapper->GetOpConfig();
216         operators.emplace_back(qcir::CreateOperatorDirect(
217             builder_,
218             QNN_VER_PTR(op_config)->name,
219             QNN_VER_PTR(op_config)->packageName,
220             QNN_VER_PTR(op_config)->typeName,
221             &inputs,
222             &outputs,
223             &params));
224       }
225       auto graph = qcir::CreateGraphDirect(
226           builder_, graph_name.c_str(), &operators, &tensors);
227       std::vector<flatbuffers::Offset<qcir::Graph>> graphs({graph});
228       auto context = qcir::CreateContextDirect(builder_, &graphs);
229       builder_.Finish(context);
230       QnnExecuTorchContextBinary qcir_binary(
231           {builder_.GetBufferPointer(), builder_.GetSize()});
232       binary_info = MakeBinaryInfo(qcir_binary);
233     } else {
234       if (qnn_manager_->Compile(graph_name, op_wrappers) !=
235           executorch::runtime::Error::Ok) {
236         QNN_EXECUTORCH_LOG_ERROR("Fail to compile QNN graph");
237         return py::array_t<char>(0);
238       }
239       if (qnn_manager_->GetContextBinary(binary_info) !=
240           executorch::runtime::Error::Ok) {
241         return py::array_t<char>(0);
242       }
243     }
244 
245     // allocate py::array (to pass the result of the C++ function to Python)
246     auto result = py::array_t<char>(binary_info.nbytes);
247     auto result_buffer = result.request();
248     char* result_ptr = (char*)result_buffer.ptr;
249     std::memcpy(result_ptr, binary_info.buffer, binary_info.nbytes);
250     return result;
251   }
252 
Destroy()253   void Destroy() {
254     return qnn_manager_->Destroy();
255   }
256 
IsAvailable()257   bool IsAvailable() {
258     return qnn_manager_->IsAvailable();
259   }
260 
IsTensorDump()261   bool IsTensorDump() {
262     return qnn_manager_->IsTensorDump();
263   }
264 
AllocateTensor(const std::string & graph_name)265   executorch::runtime::Error AllocateTensor(const std::string& graph_name) {
266     return qnn_manager_->AllocateTensor(graph_name);
267   }
268 
GetGraphInputs(const std::string & graph_name)269   py::list GetGraphInputs(const std::string& graph_name) {
270     py::list ret;
271     for (const std::shared_ptr<TensorWrapper>& input :
272          qnn_manager_->GetGraphInputs(graph_name)) {
273       ret.append(PyQnnTensorWrapper(input));
274     }
275     return ret;
276   }
277 
GetGraphOutputs(const std::string & graph_name)278   py::list GetGraphOutputs(const std::string& graph_name) {
279     py::list ret;
280     for (const std::shared_ptr<TensorWrapper>& output :
281          qnn_manager_->GetGraphOutputs(graph_name)) {
282       ret.append(PyQnnTensorWrapper(output));
283     }
284     return ret;
285   }
286 
GetGraphNames()287   py::list GetGraphNames() {
288     py::list ret;
289     for (const std::string& graph_name : qnn_manager_->GetGraphNames()) {
290       ret.append(graph_name);
291     }
292     return ret;
293   }
294 
GetSpillFillBufferSize()295   uint64_t GetSpillFillBufferSize() {
296     return qnn_manager_->GetSpillFillBufferSize();
297   }
298 
MakeBinaryInfo(const py::bytes & ctx_bin)299   py::array_t<char> MakeBinaryInfo(const py::bytes& ctx_bin) {
300     py::buffer_info info(py::buffer(ctx_bin).request());
301     QnnExecuTorchContextBinary binary(
302         {info.ptr, static_cast<uint64_t>(info.size * info.itemsize)});
303     auto binary_info = MakeBinaryInfo(binary);
304     auto result = py::array_t<char>(binary_info.nbytes);
305     auto result_buffer = result.request();
306     std::memcpy(result_buffer.ptr, binary_info.buffer, binary_info.nbytes);
307     return result;
308   }
309 
310  private:
MakeBinaryInfo(const QnnExecuTorchContextBinary & ctx_bin)311   QnnExecuTorchContextBinary MakeBinaryInfo(
312       const QnnExecuTorchContextBinary& ctx_bin) {
313     auto signature = []() {
314       return std::to_string(
315           std::chrono::high_resolution_clock::now().time_since_epoch().count());
316     };
317     const uint8_t* base = static_cast<uint8_t*>(ctx_bin.buffer);
318     std::vector<uint8_t> data(base, base + ctx_bin.nbytes);
319     // add signature to binary for cache reuse in runtime
320     builder_.Reset();
321     auto binary_info = qnn_delegate::CreateBinaryInfoDirect(
322         builder_, signature().c_str(), &data);
323     builder_.Finish(binary_info);
324 
325     return QnnExecuTorchContextBinary(
326         {builder_.GetBufferPointer(), builder_.GetSize()});
327   }
328 
329   // Store the bytes object instead of a raw pointer so that this module will
330   // keep the bytes alive.
331   const py::bytes qnn_executorch_option_ptr_;
332   QnnExecuTorchContextBinary qnn_executorch_context_binary_;
333   std::shared_ptr<QnnManager> qnn_manager_;
334   flatbuffers::FlatBufferBuilder builder_;
335 };
336 } // namespace qnn
337 } // namespace backends
338 } // namespace executorch
339