/* * Copyright (c) Qualcomm Innovation Center, Inc. * All rights reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include #include #include #include namespace executorch { namespace backends { namespace qnn { using namespace qnn_delegate; using executorch::runtime::ArrayRef; using executorch::runtime::BackendExecutionContext; using executorch::runtime::BackendInitContext; using executorch::runtime::CompileSpec; using executorch::runtime::DelegateHandle; using executorch::runtime::EValue; using executorch::runtime::FreeableBuffer; using executorch::runtime::MemoryAllocator; using executorch::runtime::Result; // ========== Public method implementations ========================= constexpr const char* QNN_COMPILE_SPEC = "qnn_compile_spec"; Result QnnExecuTorchBackend::init( BackendInitContext& context, FreeableBuffer* processed, ArrayRef compile_specs) const { // covert SizedBuffer to qnn ExecuTorch option QnnExecuTorchContextBinary qnn_context_blob; const qnn_delegate::QnnExecuTorchOptions* qnn_executorch_options = nullptr; qnn_context_blob.buffer = const_cast(processed->data()); qnn_context_blob.nbytes = processed->size(); // convert CompileSpec to qnn ExecuTorch option for (auto& compile_spec : compile_specs) { if (std::strcmp(compile_spec.key, QNN_COMPILE_SPEC) == 0) qnn_executorch_options = GetQnnExecuTorchOptions(compile_spec.value.buffer); else QNN_EXECUTORCH_LOG_WARN("unknown argument: %s", compile_spec.key); } // Create QnnManager MemoryAllocator* runtime_allocator = context.get_runtime_allocator(); QnnManager* qnn_manager = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(runtime_allocator, QnnManager); // NOTE: Since we use placement new and since this type is not trivially // destructible, we must call the destructor manually in destroy(). new (qnn_manager) QnnManager(qnn_executorch_options, qnn_context_blob); // TODO: this is a temporal solution for multi-graph support, will be // removed once framework starts to accept runtime configuration // --- // check if current context binary has already been initialized // return cached one for reducing memory footprint std::string signature = qnn_manager->GetBinarySignature(); auto iter = delegate_map_.find(signature); if (iter != delegate_map_.end()) { QNN_EXECUTORCH_LOG_INFO( "Use cached delegate handle for current method: %s", context.get_method_name()); return iter->second; } ET_CHECK_OR_RETURN_ERROR( qnn_manager->Init() == Error::Ok, Internal, "Fail to initialize Qnn Manager"); if (qnn_manager->IsOnlinePrepare()) { ET_CHECK_OR_RETURN_ERROR( qnn_manager->CompileQcir() == Error::Ok, Internal, "Fail to compile binary in qcir format"); } else { for (const std::string& graph_name : qnn_manager->GetGraphNames()) { ET_CHECK_OR_RETURN_ERROR( qnn_manager->AllocateTensor(graph_name) == Error::Ok, Internal, "Fail to allocate tensor"); } } add_cached_delegate(signature, qnn_manager); // This backend does not need its processed data after Init. processed->Free(); return qnn_manager; } Error QnnExecuTorchBackend::execute( BackendExecutionContext& context, DelegateHandle* handle, EValue** args) const { ET_CHECK_OR_RETURN_ERROR( delegate_map_rev_.count(handle) != 0, Internal, "DelegateHandle has been deleted"); QnnManager* qnn_manager = static_cast(handle); std::string method_name = context.get_method_name(); std::vector> input_tensors = qnn_manager->GetGraphInputs(method_name); std::vector> output_tensors = qnn_manager->GetGraphOutputs(method_name); std::vector input_tensor_structs; std::vector output_tensor_structs; input_tensor_structs.reserve(input_tensors.size()); for (int i = 0; i < input_tensors.size(); ++i) { if (qnn_manager->RegisterMem( args[i]->toTensor().mutable_data_ptr(), input_tensors[i]) != Error::Ok) { // update data ptr only should be fine input_tensors[i]->FillDataBuffer( args[i]->toTensor().const_data_ptr(), false /* copy_data */); } input_tensor_structs.push_back(input_tensors[i]->CloneTensorStruct()); } int output_index = input_tensors.size(); for (const auto& output_tensor : output_tensors) { // pos=0 limits the search to the prefix if (output_tensor->GetName().rfind("output_", 0) == 0) { void* mutable_data_ptr = args[output_index]->toTensor().mutable_data_ptr(); if (qnn_manager->RegisterMem(mutable_data_ptr, output_tensor) != Error::Ok) { output_tensor->FillDataBuffer(mutable_data_ptr, false /* copy_data */); } output_index++; } output_tensor_structs.push_back(output_tensor->CloneTensorStruct()); } ET_CHECK_OR_RETURN_ERROR( qnn_manager->Execute( method_name, input_tensor_structs, output_tensor_structs, context.event_tracer()) == Error::Ok, Internal, "Fail to execute graph"); ET_CHECK_OR_RETURN_ERROR( qnn_manager->ProfileExecuteData(method_name, context.event_tracer()) == Error::Ok, Internal, "Fail to profile graph"); return Error::Ok; } void QnnExecuTorchBackend::destroy(DelegateHandle* handle) const { if (handle != nullptr && delegate_map_rev_.count(handle)) { QnnManager* qnn_manager = static_cast(handle); qnn_manager->Destroy(); erase_cached_delegate(handle); } } bool QnnExecuTorchBackend::is_available() const { return true; } void QnnExecuTorchBackend::add_cached_delegate( const std::string& signature, executorch::runtime::DelegateHandle* handle) const { std::lock_guard guard(mutex_); delegate_map_[signature] = handle; delegate_map_rev_[handle] = signature; } void QnnExecuTorchBackend::erase_cached_delegate( executorch::runtime::DelegateHandle* handle) const { std::lock_guard guard(mutex_); auto iter = delegate_map_rev_.find(handle); if (iter == delegate_map_rev_.end()) { return; } delegate_map_.erase(iter->second); delegate_map_rev_.erase(handle); } namespace { auto cls = QnnExecuTorchBackend(); executorch::runtime::Backend backend{"QnnBackend", &cls}; static auto success_with_compiler = register_backend(backend); } // namespace } // namespace qnn } // namespace backends } // namespace executorch