1 /* 2 * Copyright (c) Qualcomm Innovation Center, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under the BSD-style license found in the 6 * LICENSE file in the root directory of this source tree. 7 */ 8 #pragma once 9 #include <executorch/backends/qualcomm/aot/ir/qcir_utils.h> 10 #include <executorch/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h> 11 #include <executorch/backends/qualcomm/qc_binary_info_generated.h> 12 #include <executorch/backends/qualcomm/qc_compiler_spec_generated.h> 13 #include <executorch/backends/qualcomm/runtime/Logging.h> 14 #include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h> 15 #include <executorch/backends/qualcomm/runtime/QnnManager.h> 16 #include <pybind11/numpy.h> 17 #include <pybind11/pybind11.h> 18 #include <pybind11/stl.h> 19 #include <memory> 20 #include <string_view> 21 22 namespace py = pybind11; 23 namespace executorch { 24 namespace backends { 25 namespace qnn { 26 class PyQnnManager { 27 public: 28 // used for AoT compilation PyQnnManager(const py::bytes & buffer)29 explicit PyQnnManager(const py::bytes& buffer) 30 : qnn_executorch_option_ptr_(buffer), 31 qnn_executorch_context_binary_(QNN_EXECUTORCH_CONTEXT_BINARY) { 32 // Choose non-allocating non-owning string pieces exposed as string_view for 33 // parsers 34 auto qnn_executorch_options = GetQnnExecuTorchOptions( 35 qnn_executorch_option_ptr_.cast<std::string_view>().data()); 36 qnn_manager_ = std::make_shared<QnnManager>( 37 qnn_executorch_options, qnn_executorch_context_binary_); 38 } 39 40 // used for loading context binary directly PyQnnManager(const py::bytes & buffer,const py::bytes & ctx_bin)41 explicit PyQnnManager(const py::bytes& buffer, const py::bytes& ctx_bin) 42 : qnn_executorch_option_ptr_(buffer) { 43 auto qnn_executorch_options = GetQnnExecuTorchOptions( 44 qnn_executorch_option_ptr_.cast<std::string_view>().data()); 45 46 py::buffer_info info(py::buffer(ctx_bin).request()); 47 qnn_executorch_context_binary_.buffer = info.ptr; 48 qnn_executorch_context_binary_.nbytes = info.size * info.itemsize; 49 qnn_manager_ = std::make_shared<QnnManager>( 50 qnn_executorch_options, qnn_executorch_context_binary_); 51 } 52 53 // used for loading multiple graphs in qcir PyQnnManager(const py::bytes & buffer,const py::list & qcirs)54 explicit PyQnnManager(const py::bytes& buffer, const py::list& qcirs) 55 : qnn_executorch_option_ptr_(buffer) { 56 auto qnn_executorch_options = GetQnnExecuTorchOptions( 57 qnn_executorch_option_ptr_.cast<std::string_view>().data()); 58 59 // merge multiple qcirs into one context with multiple graphs 60 std::vector<flatbuffers::Offset<qcir::Graph>> graphs; 61 for (size_t i = 0; i < qcirs.size(); ++i) { 62 py::buffer_info info(py::buffer(qcirs[i].cast<py::bytes>()).request()); 63 flatbuffers::Verifier verifier_binary_info( 64 static_cast<const uint8_t* const>(info.ptr), 65 info.size * info.itemsize); 66 if (!qnn_delegate::VerifyBinaryInfoBuffer(verifier_binary_info)) { 67 QNN_EXECUTORCH_LOG_ERROR("Fail to verify binary info"); 68 return; 69 } 70 auto binary_info = qnn_delegate::GetBinaryInfo(info.ptr); 71 72 flatbuffers::Verifier verifier_qcir( 73 binary_info->data()->data(), binary_info->data()->size()); 74 if (!qcir::VerifyContextBuffer(verifier_qcir)) { 75 QNN_EXECUTORCH_LOG_ERROR("Fail to verify qcir format"); 76 return; 77 } 78 auto context = qcir::GetContext(binary_info->data()->data()); 79 for (const auto& graph : *context->graphs()) { 80 std::vector<flatbuffers::Offset<qcir::Tensor>> tensors; 81 for (const auto tensor : *graph->tensors()) { 82 // here we need to take a detour to merge multiple qcir flatbuffers 83 // outer ToTensor 84 // return: flatbuffers::Offset<Tensor> 85 // consume: QnnTensor, flatbuffers::FlatBufferBuilder* 86 // inner ToTensor 87 // return: QnnTensor 88 // consume: flatbuffers::Vector<::flatbuffers::Offset<qcir::Tensor>> 89 tensors.emplace_back(ToTensor(ToTensor(tensor), &builder_)); 90 } 91 std::vector<flatbuffers::Offset<qcir::Operator>> nodes; 92 for (const auto& node : *graph->nodes()) { 93 int32_t* inputs_ptr = const_cast<int32_t*>(node->inputs()->data()); 94 int32_t* outputs_ptr = const_cast<int32_t*>(node->outputs()->data()); 95 int32_t* params_ptr = const_cast<int32_t*>(node->params()->data()); 96 std::vector<int32_t> inputs( 97 inputs_ptr, inputs_ptr + node->inputs()->size()); 98 std::vector<int32_t> outputs( 99 outputs_ptr, outputs_ptr + node->outputs()->size()); 100 std::vector<int32_t> params( 101 params_ptr, params_ptr + node->params()->size()); 102 nodes.emplace_back(qcir::CreateOperatorDirect( 103 builder_, 104 node->name()->str().c_str(), 105 node->package_name()->str().c_str(), 106 node->type_name()->str().c_str(), 107 &inputs, 108 &outputs, 109 ¶ms)); 110 } 111 graphs.emplace_back(qcir::CreateGraphDirect( 112 builder_, graph->name()->str().c_str(), &nodes, &tensors)); 113 } 114 } 115 116 auto context = qcir::CreateContextDirect(builder_, &graphs); 117 builder_.Finish(context); 118 QnnExecuTorchContextBinary qcir_bin( 119 {builder_.GetBufferPointer(), builder_.GetSize()}); 120 121 qnn_executorch_context_binary_ = MakeBinaryInfo(qcir_bin); 122 qnn_manager_ = std::make_shared<QnnManager>( 123 qnn_executorch_options, qnn_executorch_context_binary_); 124 } 125 Init()126 executorch::runtime::Error Init() { 127 return qnn_manager_->Init(); 128 } 129 IsNodeSupportedByBackend(std::vector<std::shared_ptr<OpWrapper>> & op_wrappers)130 bool IsNodeSupportedByBackend( 131 std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) { 132 return qnn_manager_->IsNodeSupportedByBackend(op_wrappers); 133 } 134 135 // this method is specific for compiling multi-graphs Compile()136 py::array_t<char> Compile() { 137 if (qnn_manager_->CompileQcir() != Error::Ok) { 138 QNN_EXECUTORCH_LOG_ERROR("Fail to compile qcir"); 139 return py::array_t<char>(0); 140 } 141 142 // generate context binary if compilation succeded 143 QnnExecuTorchContextBinary binary_info; 144 qnn_manager_->GetContextBinary(binary_info); 145 // allocate py::array (to pass the result of the C++ function to Python) 146 auto result = py::array_t<char>(binary_info.nbytes); 147 auto result_buffer = result.request(); 148 char* result_ptr = (char*)result_buffer.ptr; 149 std::memcpy(result_ptr, binary_info.buffer, binary_info.nbytes); 150 return result; 151 } 152 Compile(const std::string & graph_name,std::vector<std::shared_ptr<OpWrapper>> & op_wrappers)153 py::array_t<char> Compile( 154 const std::string& graph_name, 155 std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) { 156 QnnExecuTorchContextBinary binary_info; 157 158 if (qnn_manager_->IsOnlinePrepare() || qnn_manager_->IsMultipleGraphs()) { 159 builder_.Reset(); 160 std::vector<flatbuffers::Offset<qcir::Tensor>> tensors; 161 std::unordered_map<void*, int> tensor_map; 162 163 auto set_tensor = [&](const std::shared_ptr<TensorWrapper>& wrapper, 164 std::vector<int>& index) { 165 auto it = tensor_map.find(wrapper.get()); 166 if (it != tensor_map.end()) { 167 index.push_back(it->second); 168 } else { 169 int i = tensors.size(); 170 tensor_map[wrapper.get()] = i; 171 index.push_back(i); 172 tensors.emplace_back( 173 ToTensor(wrapper->CloneTensorStruct(), &builder_)); 174 } 175 }; 176 177 std::vector<flatbuffers::Offset<qcir::Operator>> operators; 178 for (std::shared_ptr<OpWrapper>& op_wrapper : op_wrappers) { 179 std::vector<int> inputs, outputs, params; 180 181 for (const auto& tensor_wrapper : op_wrapper->GetInputTensors()) { 182 set_tensor(tensor_wrapper, inputs); 183 } 184 185 for (const auto& tensor_wrapper : op_wrapper->GetOutputTensors()) { 186 set_tensor(tensor_wrapper, outputs); 187 } 188 189 for (const auto& param : op_wrapper->GetParams()) { 190 auto* p_tensor_param = dynamic_cast<TensorParamWrapper*>(param.get()); 191 if (p_tensor_param != nullptr) { 192 auto wrapper = p_tensor_param->GetTensorWrapper(); 193 wrapper->SetName(param->GetName()); 194 set_tensor(wrapper, params); 195 } else { 196 executorch::runtime::Error err = param->PopulateQnnParam(); 197 if (err != executorch::runtime::Error::Ok) { 198 QNN_EXECUTORCH_LOG_ERROR( 199 "Fail to get scalar parameter in online prepare stage"); 200 return py::array_t<char>(0); 201 } 202 Qnn_Param_t p = param->GetQnnParam(); 203 Qnn_Tensor_t t = QNN_TENSOR_INIT; 204 QNN_VER_PTR(t)->name = p.name; 205 QNN_VER_PTR(t)->dataType = p.scalarParam.dataType; 206 QNN_VER_PTR(t)->clientBuf.data = 207 static_cast<void*>(&p.scalarParam.uint8Value); 208 QNN_VER_PTR(t)->clientBuf.dataSize = 209 GetDataTypeSize(QNN_VER_PTR(t)->dataType); 210 params.push_back(tensors.size()); 211 tensors.emplace_back(ToTensor(t, &builder_)); 212 } 213 } 214 215 Qnn_OpConfig_t op_config = op_wrapper->GetOpConfig(); 216 operators.emplace_back(qcir::CreateOperatorDirect( 217 builder_, 218 QNN_VER_PTR(op_config)->name, 219 QNN_VER_PTR(op_config)->packageName, 220 QNN_VER_PTR(op_config)->typeName, 221 &inputs, 222 &outputs, 223 ¶ms)); 224 } 225 auto graph = qcir::CreateGraphDirect( 226 builder_, graph_name.c_str(), &operators, &tensors); 227 std::vector<flatbuffers::Offset<qcir::Graph>> graphs({graph}); 228 auto context = qcir::CreateContextDirect(builder_, &graphs); 229 builder_.Finish(context); 230 QnnExecuTorchContextBinary qcir_binary( 231 {builder_.GetBufferPointer(), builder_.GetSize()}); 232 binary_info = MakeBinaryInfo(qcir_binary); 233 } else { 234 if (qnn_manager_->Compile(graph_name, op_wrappers) != 235 executorch::runtime::Error::Ok) { 236 QNN_EXECUTORCH_LOG_ERROR("Fail to compile QNN graph"); 237 return py::array_t<char>(0); 238 } 239 if (qnn_manager_->GetContextBinary(binary_info) != 240 executorch::runtime::Error::Ok) { 241 return py::array_t<char>(0); 242 } 243 } 244 245 // allocate py::array (to pass the result of the C++ function to Python) 246 auto result = py::array_t<char>(binary_info.nbytes); 247 auto result_buffer = result.request(); 248 char* result_ptr = (char*)result_buffer.ptr; 249 std::memcpy(result_ptr, binary_info.buffer, binary_info.nbytes); 250 return result; 251 } 252 Destroy()253 void Destroy() { 254 return qnn_manager_->Destroy(); 255 } 256 IsAvailable()257 bool IsAvailable() { 258 return qnn_manager_->IsAvailable(); 259 } 260 IsTensorDump()261 bool IsTensorDump() { 262 return qnn_manager_->IsTensorDump(); 263 } 264 AllocateTensor(const std::string & graph_name)265 executorch::runtime::Error AllocateTensor(const std::string& graph_name) { 266 return qnn_manager_->AllocateTensor(graph_name); 267 } 268 GetGraphInputs(const std::string & graph_name)269 py::list GetGraphInputs(const std::string& graph_name) { 270 py::list ret; 271 for (const std::shared_ptr<TensorWrapper>& input : 272 qnn_manager_->GetGraphInputs(graph_name)) { 273 ret.append(PyQnnTensorWrapper(input)); 274 } 275 return ret; 276 } 277 GetGraphOutputs(const std::string & graph_name)278 py::list GetGraphOutputs(const std::string& graph_name) { 279 py::list ret; 280 for (const std::shared_ptr<TensorWrapper>& output : 281 qnn_manager_->GetGraphOutputs(graph_name)) { 282 ret.append(PyQnnTensorWrapper(output)); 283 } 284 return ret; 285 } 286 GetGraphNames()287 py::list GetGraphNames() { 288 py::list ret; 289 for (const std::string& graph_name : qnn_manager_->GetGraphNames()) { 290 ret.append(graph_name); 291 } 292 return ret; 293 } 294 GetSpillFillBufferSize()295 uint64_t GetSpillFillBufferSize() { 296 return qnn_manager_->GetSpillFillBufferSize(); 297 } 298 MakeBinaryInfo(const py::bytes & ctx_bin)299 py::array_t<char> MakeBinaryInfo(const py::bytes& ctx_bin) { 300 py::buffer_info info(py::buffer(ctx_bin).request()); 301 QnnExecuTorchContextBinary binary( 302 {info.ptr, static_cast<uint64_t>(info.size * info.itemsize)}); 303 auto binary_info = MakeBinaryInfo(binary); 304 auto result = py::array_t<char>(binary_info.nbytes); 305 auto result_buffer = result.request(); 306 std::memcpy(result_buffer.ptr, binary_info.buffer, binary_info.nbytes); 307 return result; 308 } 309 310 private: MakeBinaryInfo(const QnnExecuTorchContextBinary & ctx_bin)311 QnnExecuTorchContextBinary MakeBinaryInfo( 312 const QnnExecuTorchContextBinary& ctx_bin) { 313 auto signature = []() { 314 return std::to_string( 315 std::chrono::high_resolution_clock::now().time_since_epoch().count()); 316 }; 317 const uint8_t* base = static_cast<uint8_t*>(ctx_bin.buffer); 318 std::vector<uint8_t> data(base, base + ctx_bin.nbytes); 319 // add signature to binary for cache reuse in runtime 320 builder_.Reset(); 321 auto binary_info = qnn_delegate::CreateBinaryInfoDirect( 322 builder_, signature().c_str(), &data); 323 builder_.Finish(binary_info); 324 325 return QnnExecuTorchContextBinary( 326 {builder_.GetBufferPointer(), builder_.GetSize()}); 327 } 328 329 // Store the bytes object instead of a raw pointer so that this module will 330 // keep the bytes alive. 331 const py::bytes qnn_executorch_option_ptr_; 332 QnnExecuTorchContextBinary qnn_executorch_context_binary_; 333 std::shared_ptr<QnnManager> qnn_manager_; 334 flatbuffers::FlatBufferBuilder builder_; 335 }; 336 } // namespace qnn 337 } // namespace backends 338 } // namespace executorch 339