1 /*
2 * Copyright (c) 2024 MediaTek Inc.
3 *
4 * Licensed under the BSD License (the "License"); you may not use this file
5 * except in compliance with the License. See the license file in the root
6 * directory of this source tree for more details.
7 */
8
9 #include "NeuronBackend.h"
10 #include "NeuronBufferAllocator.h"
11 #include "NeuronLog.h"
12 #include "NeuronPayloadHeader.h"
13 #include "api/NeuronAdapter.h"
14
15 #include "executorch/runtime/core/error.h"
16
17 #include <algorithm>
18 #include <memory>
19 #include <new>
20 #include <unordered_set>
21
22 namespace executorch {
23 namespace backends {
24 namespace neuron {
25
26 using executorch::runtime::ArrayRef;
27 using executorch::runtime::BackendExecutionContext;
28 using executorch::runtime::BackendInitContext;
29 using executorch::runtime::CompileSpec;
30 using executorch::runtime::DelegateHandle;
31 using executorch::runtime::Error;
32 using executorch::runtime::EValue;
33 using executorch::runtime::FreeableBuffer;
34 using executorch::runtime::MemoryAllocator;
35 using executorch::runtime::Result;
36
37 const char kHighAddrKey[] = "HighAddr";
38 const char kImportForeverKey[] = "ImportForever";
39
init(BackendInitContext & context,FreeableBuffer * processed,ArrayRef<CompileSpec> compile_specs) const40 Result<DelegateHandle*> NeuronBackend::init(
41 BackendInitContext& context,
42 FreeableBuffer* processed,
43 ArrayRef<CompileSpec> compile_specs) const {
44 NeuronDelegateSetting setting;
45 for (auto& compile_spec : compile_specs) {
46 if (std::strcmp(compile_spec.key, kHighAddrKey) == 0) {
47 setting.mHighAddr = *static_cast<char*>(compile_spec.value.buffer);
48 LogInfo("NeuronBackend", "IsHighAddr Enable : %d", setting.mHighAddr);
49 } else if (std::strcmp(compile_spec.key, kImportForeverKey) == 0) {
50 setting.mImportForever = *static_cast<char*>(compile_spec.value.buffer);
51 LogInfo(
52 "NeuronBackend",
53 "IsImportForever Enable : %d",
54 setting.mImportForever);
55 } else {
56 LogWarn("NeuronBackend", "unknown compile spec: %s", compile_spec.key);
57 }
58 }
59 auto Payload = NeuronPayload(processed->data(), processed->size());
60 LogInfo(
61 "NeuronBackend",
62 "version %u, input %u, output %u, length %u, payload size: %zu",
63 Payload.Header.Version,
64 Payload.Header.InputCount,
65 Payload.Header.OutputCount,
66 Payload.Header.DataLen,
67 processed->size());
68
69 MemoryAllocator* runtime_allocator = context.get_runtime_allocator();
70 NeuronExecuTorchDelegate* delegate = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(
71 runtime_allocator, NeuronExecuTorchDelegate);
72 new (delegate) NeuronExecuTorchDelegate();
73
74 if (delegate == nullptr) {
75 return nullptr;
76 }
77 auto res = delegate->LoadCompiledNetwork(Payload, setting);
78 return res == NEURON_NO_ERROR ? delegate : nullptr;
79 }
80
execute(ET_UNUSED BackendExecutionContext & context,DelegateHandle * handle,EValue ** args) const81 Error NeuronBackend::execute(
82 ET_UNUSED BackendExecutionContext& context,
83 DelegateHandle* handle,
84 EValue** args) const {
85 NeuronExecuTorchDelegate* delegate =
86 reinterpret_cast<NeuronExecuTorchDelegate*>(handle);
87 return delegate->execute(context, args);
88 }
89
destroy(DelegateHandle * handle) const90 void NeuronBackend::destroy(DelegateHandle* handle) const {
91 if (handle != nullptr) {
92 NeuronExecuTorchDelegate* delegate =
93 reinterpret_cast<NeuronExecuTorchDelegate*>(handle);
94 delegate->~NeuronExecuTorchDelegate();
95 }
96 }
97
is_available() const98 bool NeuronBackend::is_available() const {
99 return true;
100 }
101
execute(BackendExecutionContext & context,EValue ** args) const102 Error NeuronExecuTorchDelegate::execute(
103 BackendExecutionContext& context,
104 EValue** args) const {
105 if (HintNeuronBackend(args) != NEURON_NO_ERROR) {
106 return Error::InvalidState;
107 };
108
109 auto allocator = dynamic_cast<torch::executor::neuron::BufferAllocator*>(
110 context.get_temp_allocator());
111 size_t inputCount = mInputSizes.size(), outputCount = mOutputSizes.size();
112
113 for (int i = 0; i < inputCount; i++) {
114 auto data_ptr = args[i]->toTensor().data_ptr();
115 auto data_size = args[i]->toTensor().nbytes();
116 if (IsCached</*isInput=*/true>(i, data_ptr)) {
117 continue;
118 };
119 auto unit = allocator != nullptr ? allocator->Find(data_ptr) : nullptr;
120 if (unit) {
121 UpdateCache<true>(i, data_ptr);
122 size_t offset = (char*)data_ptr - (char*)unit->GetAddress();
123 mExecutor.SetInputOutputFromMemory</*isInput*/ true>(
124 i, unit->GetNeuronMemory(), offset, data_size);
125 } else {
126 mExecutor.SetInputOutput</*isInput=*/true>(i, data_ptr, data_size);
127 }
128 }
129
130 for (int o = inputCount; o < inputCount + outputCount; o++) {
131 auto data_ptr = args[o]->toTensor().data_ptr();
132 auto data_size = args[o]->toTensor().nbytes();
133 auto output_index = o - inputCount;
134 if (IsCached</*isInput=*/false>(output_index, data_ptr)) {
135 continue;
136 };
137 auto unit = allocator != nullptr ? allocator->Find(data_ptr) : nullptr;
138 if (unit) {
139 UpdateCache</*isInput=*/false>(output_index, data_ptr);
140 size_t offset = (char*)data_ptr - (char*)unit->GetAddress();
141 mExecutor.SetInputOutputFromMemory</*isInput*/ false>(
142 output_index, unit->GetNeuronMemory(), offset, data_size);
143 } else {
144 mExecutor.SetInputOutput</*isInput=*/false>(
145 output_index, data_ptr, data_size);
146 }
147 }
148
149 return mExecutor.Compute() == NEURON_NO_ERROR ? Error::Ok
150 : Error::InvalidState;
151 };
152
HintNeuronBackend(EValue ** args) const153 int NeuronExecuTorchDelegate::HintNeuronBackend(EValue** args) const {
154 auto HintImportForever = [this](EValue** args) -> int {
155 auto& allocator = GET_NEURON_ALLOCATOR;
156 size_t inputCount = mInputSizes.size(), outputCount = mOutputSizes.size();
157 for (int i = 0; i < inputCount; i++) {
158 auto data_ptr = args[i]->toTensor().data_ptr();
159 if (mHasImported.count(data_ptr)) {
160 continue;
161 }
162 auto unit = allocator.Find(data_ptr);
163 if (unit) {
164 mExecutor.SetInputOutputFromMemory</*isInput*/ true>(
165 i, unit->GetNeuronMemory(), 0, unit->GetSize());
166 mHasImported.insert(data_ptr);
167 }
168 }
169 for (int o = inputCount; o < inputCount + outputCount; o++) {
170 auto data_ptr = args[o]->toTensor().data_ptr();
171 if (mHasImported.count(data_ptr)) {
172 continue;
173 }
174 auto output_index = o - inputCount;
175 auto unit = allocator.Find(data_ptr);
176 if (unit) {
177 mExecutor.SetInputOutputFromMemory</*isInput*/ false>(
178 output_index, unit->GetNeuronMemory(), 0, unit->GetSize());
179 mHasImported.insert(data_ptr);
180 }
181 }
182 return NEURON_NO_ERROR;
183 };
184 if (mSettings.mImportForever) {
185 CHECK_NO_ERROR(HintImportForever(args));
186 }
187 return NEURON_NO_ERROR;
188 }
189
190 } // namespace neuron
191 } // namespace backends
192 } // namespace executorch
193
194 namespace {
195 auto cls = executorch::backends::neuron::NeuronBackend();
196 executorch::runtime::Backend backend{"NeuropilotBackend", &cls};
197 static auto success_with_compiler =
198 executorch::runtime::register_backend(backend);
199 } // namespace
200