xref: /aosp_15_r20/external/executorch/backends/mediatek/runtime/NeuronBackend.cpp (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) 2024 MediaTek Inc.
3  *
4  * Licensed under the BSD License (the "License"); you may not use this file
5  * except in compliance with the License. See the license file in the root
6  * directory of this source tree for more details.
7  */
8 
9 #include "NeuronBackend.h"
10 #include "NeuronBufferAllocator.h"
11 #include "NeuronLog.h"
12 #include "NeuronPayloadHeader.h"
13 #include "api/NeuronAdapter.h"
14 
15 #include "executorch/runtime/core/error.h"
16 
17 #include <algorithm>
18 #include <memory>
19 #include <new>
20 #include <unordered_set>
21 
22 namespace executorch {
23 namespace backends {
24 namespace neuron {
25 
26 using executorch::runtime::ArrayRef;
27 using executorch::runtime::BackendExecutionContext;
28 using executorch::runtime::BackendInitContext;
29 using executorch::runtime::CompileSpec;
30 using executorch::runtime::DelegateHandle;
31 using executorch::runtime::Error;
32 using executorch::runtime::EValue;
33 using executorch::runtime::FreeableBuffer;
34 using executorch::runtime::MemoryAllocator;
35 using executorch::runtime::Result;
36 
37 const char kHighAddrKey[] = "HighAddr";
38 const char kImportForeverKey[] = "ImportForever";
39 
init(BackendInitContext & context,FreeableBuffer * processed,ArrayRef<CompileSpec> compile_specs) const40 Result<DelegateHandle*> NeuronBackend::init(
41     BackendInitContext& context,
42     FreeableBuffer* processed,
43     ArrayRef<CompileSpec> compile_specs) const {
44   NeuronDelegateSetting setting;
45   for (auto& compile_spec : compile_specs) {
46     if (std::strcmp(compile_spec.key, kHighAddrKey) == 0) {
47       setting.mHighAddr = *static_cast<char*>(compile_spec.value.buffer);
48       LogInfo("NeuronBackend", "IsHighAddr Enable : %d", setting.mHighAddr);
49     } else if (std::strcmp(compile_spec.key, kImportForeverKey) == 0) {
50       setting.mImportForever = *static_cast<char*>(compile_spec.value.buffer);
51       LogInfo(
52           "NeuronBackend",
53           "IsImportForever Enable : %d",
54           setting.mImportForever);
55     } else {
56       LogWarn("NeuronBackend", "unknown compile spec: %s", compile_spec.key);
57     }
58   }
59   auto Payload = NeuronPayload(processed->data(), processed->size());
60   LogInfo(
61       "NeuronBackend",
62       "version %u, input %u, output %u, length %u, payload size: %zu",
63       Payload.Header.Version,
64       Payload.Header.InputCount,
65       Payload.Header.OutputCount,
66       Payload.Header.DataLen,
67       processed->size());
68 
69   MemoryAllocator* runtime_allocator = context.get_runtime_allocator();
70   NeuronExecuTorchDelegate* delegate = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(
71       runtime_allocator, NeuronExecuTorchDelegate);
72   new (delegate) NeuronExecuTorchDelegate();
73 
74   if (delegate == nullptr) {
75     return nullptr;
76   }
77   auto res = delegate->LoadCompiledNetwork(Payload, setting);
78   return res == NEURON_NO_ERROR ? delegate : nullptr;
79 }
80 
execute(ET_UNUSED BackendExecutionContext & context,DelegateHandle * handle,EValue ** args) const81 Error NeuronBackend::execute(
82     ET_UNUSED BackendExecutionContext& context,
83     DelegateHandle* handle,
84     EValue** args) const {
85   NeuronExecuTorchDelegate* delegate =
86       reinterpret_cast<NeuronExecuTorchDelegate*>(handle);
87   return delegate->execute(context, args);
88 }
89 
destroy(DelegateHandle * handle) const90 void NeuronBackend::destroy(DelegateHandle* handle) const {
91   if (handle != nullptr) {
92     NeuronExecuTorchDelegate* delegate =
93         reinterpret_cast<NeuronExecuTorchDelegate*>(handle);
94     delegate->~NeuronExecuTorchDelegate();
95   }
96 }
97 
is_available() const98 bool NeuronBackend::is_available() const {
99   return true;
100 }
101 
execute(BackendExecutionContext & context,EValue ** args) const102 Error NeuronExecuTorchDelegate::execute(
103     BackendExecutionContext& context,
104     EValue** args) const {
105   if (HintNeuronBackend(args) != NEURON_NO_ERROR) {
106     return Error::InvalidState;
107   };
108 
109   auto allocator = dynamic_cast<torch::executor::neuron::BufferAllocator*>(
110       context.get_temp_allocator());
111   size_t inputCount = mInputSizes.size(), outputCount = mOutputSizes.size();
112 
113   for (int i = 0; i < inputCount; i++) {
114     auto data_ptr = args[i]->toTensor().data_ptr();
115     auto data_size = args[i]->toTensor().nbytes();
116     if (IsCached</*isInput=*/true>(i, data_ptr)) {
117       continue;
118     };
119     auto unit = allocator != nullptr ? allocator->Find(data_ptr) : nullptr;
120     if (unit) {
121       UpdateCache<true>(i, data_ptr);
122       size_t offset = (char*)data_ptr - (char*)unit->GetAddress();
123       mExecutor.SetInputOutputFromMemory</*isInput*/ true>(
124           i, unit->GetNeuronMemory(), offset, data_size);
125     } else {
126       mExecutor.SetInputOutput</*isInput=*/true>(i, data_ptr, data_size);
127     }
128   }
129 
130   for (int o = inputCount; o < inputCount + outputCount; o++) {
131     auto data_ptr = args[o]->toTensor().data_ptr();
132     auto data_size = args[o]->toTensor().nbytes();
133     auto output_index = o - inputCount;
134     if (IsCached</*isInput=*/false>(output_index, data_ptr)) {
135       continue;
136     };
137     auto unit = allocator != nullptr ? allocator->Find(data_ptr) : nullptr;
138     if (unit) {
139       UpdateCache</*isInput=*/false>(output_index, data_ptr);
140       size_t offset = (char*)data_ptr - (char*)unit->GetAddress();
141       mExecutor.SetInputOutputFromMemory</*isInput*/ false>(
142           output_index, unit->GetNeuronMemory(), offset, data_size);
143     } else {
144       mExecutor.SetInputOutput</*isInput=*/false>(
145           output_index, data_ptr, data_size);
146     }
147   }
148 
149   return mExecutor.Compute() == NEURON_NO_ERROR ? Error::Ok
150                                                 : Error::InvalidState;
151 };
152 
HintNeuronBackend(EValue ** args) const153 int NeuronExecuTorchDelegate::HintNeuronBackend(EValue** args) const {
154   auto HintImportForever = [this](EValue** args) -> int {
155     auto& allocator = GET_NEURON_ALLOCATOR;
156     size_t inputCount = mInputSizes.size(), outputCount = mOutputSizes.size();
157     for (int i = 0; i < inputCount; i++) {
158       auto data_ptr = args[i]->toTensor().data_ptr();
159       if (mHasImported.count(data_ptr)) {
160         continue;
161       }
162       auto unit = allocator.Find(data_ptr);
163       if (unit) {
164         mExecutor.SetInputOutputFromMemory</*isInput*/ true>(
165             i, unit->GetNeuronMemory(), 0, unit->GetSize());
166         mHasImported.insert(data_ptr);
167       }
168     }
169     for (int o = inputCount; o < inputCount + outputCount; o++) {
170       auto data_ptr = args[o]->toTensor().data_ptr();
171       if (mHasImported.count(data_ptr)) {
172         continue;
173       }
174       auto output_index = o - inputCount;
175       auto unit = allocator.Find(data_ptr);
176       if (unit) {
177         mExecutor.SetInputOutputFromMemory</*isInput*/ false>(
178             output_index, unit->GetNeuronMemory(), 0, unit->GetSize());
179         mHasImported.insert(data_ptr);
180       }
181     }
182     return NEURON_NO_ERROR;
183   };
184   if (mSettings.mImportForever) {
185     CHECK_NO_ERROR(HintImportForever(args));
186   }
187   return NEURON_NO_ERROR;
188 }
189 
190 } // namespace neuron
191 } // namespace backends
192 } // namespace executorch
193 
194 namespace {
195 auto cls = executorch::backends::neuron::NeuronBackend();
196 executorch::runtime::Backend backend{"NeuropilotBackend", &cls};
197 static auto success_with_compiler =
198     executorch::runtime::register_backend(backend);
199 } // namespace
200