xref: /aosp_15_r20/external/android-nn-driver/ArmnnPreparedModel.cpp (revision 3e777be0405cee09af5d5785ff37f7cfb5bee59a)
1*3e777be0SXin Li //
2*3e777be0SXin Li // Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
3*3e777be0SXin Li // SPDX-License-Identifier: MIT
4*3e777be0SXin Li //
5*3e777be0SXin Li 
6*3e777be0SXin Li #define LOG_TAG "ArmnnDriver"
7*3e777be0SXin Li 
8*3e777be0SXin Li #include "ArmnnPreparedModel.hpp"
9*3e777be0SXin Li #include "Utils.hpp"
10*3e777be0SXin Li 
11*3e777be0SXin Li #include <armnn/Types.hpp>
12*3e777be0SXin Li 
13*3e777be0SXin Li #include <log/log.h>
14*3e777be0SXin Li #include <OperationsUtils.h>
15*3e777be0SXin Li #include <ValidateHal.h>
16*3e777be0SXin Li 
17*3e777be0SXin Li #include <chrono>
18*3e777be0SXin Li #include <cinttypes>
19*3e777be0SXin Li 
20*3e777be0SXin Li #ifdef ARMNN_ANDROID_S
21*3e777be0SXin Li #include <LegacyUtils.h>
22*3e777be0SXin Li #endif
23*3e777be0SXin Li 
24*3e777be0SXin Li using namespace android;
25*3e777be0SXin Li 
26*3e777be0SXin Li namespace
27*3e777be0SXin Li {
28*3e777be0SXin Li using namespace armnn_driver;
29*3e777be0SXin Li 
NotifyCallbackAndCheck(const::android::sp<V1_0::IExecutionCallback> & callback,V1_0::ErrorStatus errorStatus,std::string callingFunction)30*3e777be0SXin Li void NotifyCallbackAndCheck(const ::android::sp<V1_0::IExecutionCallback>& callback, V1_0::ErrorStatus errorStatus,
31*3e777be0SXin Li                             std::string callingFunction)
32*3e777be0SXin Li {
33*3e777be0SXin Li     Return<void> returned = callback->notify(errorStatus);
34*3e777be0SXin Li     // This check is required, if the callback fails and it isn't checked it will bring down the service
35*3e777be0SXin Li     if (!returned.isOk())
36*3e777be0SXin Li     {
37*3e777be0SXin Li         ALOGE("ArmnnDriver::%s: hidl callback failed to return properly: %s",
38*3e777be0SXin Li             callingFunction.c_str(), returned.description().c_str());
39*3e777be0SXin Li     }
40*3e777be0SXin Li }
41*3e777be0SXin Li 
ValidateRequestArgument(const V1_0::RequestArgument & requestArg,const armnn::TensorInfo & tensorInfo)42*3e777be0SXin Li bool ValidateRequestArgument(const V1_0::RequestArgument& requestArg, const armnn::TensorInfo& tensorInfo)
43*3e777be0SXin Li {
44*3e777be0SXin Li     if (requestArg.dimensions.size() != 0)
45*3e777be0SXin Li     {
46*3e777be0SXin Li         if (requestArg.dimensions.size() != tensorInfo.GetNumDimensions())
47*3e777be0SXin Li         {
48*3e777be0SXin Li             ALOGE("Mismatched dimensions (request argument: %zu, expected: %u)",
49*3e777be0SXin Li                   requestArg.dimensions.size(), tensorInfo.GetNumDimensions());
50*3e777be0SXin Li             return false;
51*3e777be0SXin Li         }
52*3e777be0SXin Li 
53*3e777be0SXin Li         for (unsigned int d = 0; d < tensorInfo.GetNumDimensions(); ++d)
54*3e777be0SXin Li         {
55*3e777be0SXin Li             if (requestArg.dimensions[d] != 0 && requestArg.dimensions[d] != tensorInfo.GetShape()[d])
56*3e777be0SXin Li             {
57*3e777be0SXin Li                 ALOGE("Mismatched size for dimension %d (request argument: %u, expected %u)",
58*3e777be0SXin Li                     d, requestArg.dimensions[d], tensorInfo.GetShape()[d]);
59*3e777be0SXin Li                 return false;
60*3e777be0SXin Li             }
61*3e777be0SXin Li         }
62*3e777be0SXin Li     }
63*3e777be0SXin Li 
64*3e777be0SXin Li     return true;
65*3e777be0SXin Li }
66*3e777be0SXin Li 
GetTensorForRequestArgument(const V1_0::RequestArgument & requestArg,const armnn::TensorInfo & tensorInfo,const std::vector<::android::nn::RunTimePoolInfo> & requestPools)67*3e777be0SXin Li armnn::Tensor GetTensorForRequestArgument(const V1_0::RequestArgument& requestArg,
68*3e777be0SXin Li     const armnn::TensorInfo& tensorInfo,
69*3e777be0SXin Li     const std::vector<::android::nn::RunTimePoolInfo>& requestPools)
70*3e777be0SXin Li {
71*3e777be0SXin Li     if (!ValidateRequestArgument(requestArg, tensorInfo))
72*3e777be0SXin Li     {
73*3e777be0SXin Li         return armnn::Tensor();
74*3e777be0SXin Li     }
75*3e777be0SXin Li 
76*3e777be0SXin Li     return armnn::Tensor(tensorInfo, GetMemoryFromPool(requestArg.location, requestPools));
77*3e777be0SXin Li }
78*3e777be0SXin Li 
BuildTensorName(const char * tensorNamePrefix,std::size_t index)79*3e777be0SXin Li inline std::string BuildTensorName(const char* tensorNamePrefix, std::size_t index)
80*3e777be0SXin Li {
81*3e777be0SXin Li     return tensorNamePrefix + std::to_string(index);
82*3e777be0SXin Li }
83*3e777be0SXin Li 
84*3e777be0SXin Li } // anonymous namespace
85*3e777be0SXin Li 
86*3e777be0SXin Li using namespace android::hardware;
87*3e777be0SXin Li 
88*3e777be0SXin Li namespace armnn_driver
89*3e777be0SXin Li {
90*3e777be0SXin Li template<typename HalVersion>
91*3e777be0SXin Li RequestThread<ArmnnPreparedModel, HalVersion, CallbackContext_1_0>
92*3e777be0SXin Li     ArmnnPreparedModel<HalVersion>::m_RequestThread;
93*3e777be0SXin Li 
94*3e777be0SXin Li template<typename HalVersion>
95*3e777be0SXin Li std::unique_ptr<armnn::Threadpool> ArmnnPreparedModel<HalVersion>::m_Threadpool(nullptr);
96*3e777be0SXin Li 
97*3e777be0SXin Li template<typename HalVersion>
98*3e777be0SXin Li template <typename TensorBindingCollection>
DumpTensorsIfRequired(char const * tensorNamePrefix,const TensorBindingCollection & tensorBindings)99*3e777be0SXin Li void ArmnnPreparedModel<HalVersion>::DumpTensorsIfRequired(char const* tensorNamePrefix,
100*3e777be0SXin Li                                                            const TensorBindingCollection& tensorBindings)
101*3e777be0SXin Li {
102*3e777be0SXin Li     if (!m_RequestInputsAndOutputsDumpDir.empty())
103*3e777be0SXin Li     {
104*3e777be0SXin Li         const std::string requestName = std::to_string(m_NetworkId) + "_" + std::to_string(m_RequestCount) + ".dump";
105*3e777be0SXin Li         for (std::size_t i = 0u; i < tensorBindings.size(); ++i)
106*3e777be0SXin Li         {
107*3e777be0SXin Li             DumpTensor(m_RequestInputsAndOutputsDumpDir,
108*3e777be0SXin Li                 requestName,
109*3e777be0SXin Li                 BuildTensorName(tensorNamePrefix, i),
110*3e777be0SXin Li                 tensorBindings[i].second);
111*3e777be0SXin Li         }
112*3e777be0SXin Li     }
113*3e777be0SXin Li }
114*3e777be0SXin Li 
115*3e777be0SXin Li template<typename HalVersion>
ArmnnPreparedModel(armnn::NetworkId networkId,armnn::IRuntime * runtime,const HalModel & model,const std::string & requestInputsAndOutputsDumpDir,const bool gpuProfilingEnabled,const bool asyncModelExecutionEnabled,const unsigned int numberOfThreads,const bool importEnabled,const bool exportEnabled)116*3e777be0SXin Li ArmnnPreparedModel<HalVersion>::ArmnnPreparedModel(armnn::NetworkId networkId,
117*3e777be0SXin Li                                                    armnn::IRuntime* runtime,
118*3e777be0SXin Li                                                    const HalModel& model,
119*3e777be0SXin Li                                                    const std::string& requestInputsAndOutputsDumpDir,
120*3e777be0SXin Li                                                    const bool gpuProfilingEnabled,
121*3e777be0SXin Li                                                    const bool asyncModelExecutionEnabled,
122*3e777be0SXin Li                                                    const unsigned int numberOfThreads,
123*3e777be0SXin Li                                                    const bool importEnabled,
124*3e777be0SXin Li                                                    const bool exportEnabled)
125*3e777be0SXin Li     : m_NetworkId(networkId)
126*3e777be0SXin Li     , m_Runtime(runtime)
127*3e777be0SXin Li     , m_Model(model)
128*3e777be0SXin Li     , m_RequestCount(0)
129*3e777be0SXin Li     , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
130*3e777be0SXin Li     , m_GpuProfilingEnabled(gpuProfilingEnabled)
131*3e777be0SXin Li     , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled)
132*3e777be0SXin Li     , m_EnableImport(importEnabled)
133*3e777be0SXin Li     , m_EnableExport(exportEnabled)
134*3e777be0SXin Li {
135*3e777be0SXin Li     // Enable profiling if required.
136*3e777be0SXin Li     m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
137*3e777be0SXin Li 
138*3e777be0SXin Li     if (m_AsyncModelExecutionEnabled)
139*3e777be0SXin Li     {
140*3e777be0SXin Li         std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
141*3e777be0SXin Li         for (unsigned int i=0; i < numberOfThreads; ++i)
142*3e777be0SXin Li         {
143*3e777be0SXin Li             memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
144*3e777be0SXin Li         }
145*3e777be0SXin Li 
146*3e777be0SXin Li         if (!m_Threadpool)
147*3e777be0SXin Li         {
148*3e777be0SXin Li             m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
149*3e777be0SXin Li         }
150*3e777be0SXin Li         else
151*3e777be0SXin Li         {
152*3e777be0SXin Li             m_Threadpool->LoadMemHandles(memHandles);
153*3e777be0SXin Li         }
154*3e777be0SXin Li 
155*3e777be0SXin Li         m_WorkingMemHandle = memHandles.back();
156*3e777be0SXin Li     }
157*3e777be0SXin Li }
158*3e777be0SXin Li 
159*3e777be0SXin Li template<typename HalVersion>
~ArmnnPreparedModel()160*3e777be0SXin Li ArmnnPreparedModel<HalVersion>::~ArmnnPreparedModel()
161*3e777be0SXin Li {
162*3e777be0SXin Li     // Get a hold of the profiler used by this model.
163*3e777be0SXin Li     std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
164*3e777be0SXin Li     if (profiler && m_GpuProfilingEnabled)
165*3e777be0SXin Li     {
166*3e777be0SXin Li         // Dump the profiling info to a file if required.
167*3e777be0SXin Li         DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId,
168*3e777be0SXin Li                                     profiler.get());
169*3e777be0SXin Li     }
170*3e777be0SXin Li 
171*3e777be0SXin Li     // Unload the network associated with this model.
172*3e777be0SXin Li     m_Runtime->UnloadNetwork(m_NetworkId);
173*3e777be0SXin Li 
174*3e777be0SXin Li     // Unload the network memhandles from the threadpool
175*3e777be0SXin Li     if (m_AsyncModelExecutionEnabled)
176*3e777be0SXin Li     {
177*3e777be0SXin Li         m_Threadpool->UnloadMemHandles(m_NetworkId);
178*3e777be0SXin Li     }
179*3e777be0SXin Li }
180*3e777be0SXin Li 
181*3e777be0SXin Li template<typename HalVersion>
execute(const V1_0::Request & request,const::android::sp<V1_0::IExecutionCallback> & callback)182*3e777be0SXin Li Return<V1_0::ErrorStatus> ArmnnPreparedModel<HalVersion>::execute(
183*3e777be0SXin Li     const V1_0::Request& request,
184*3e777be0SXin Li     const ::android::sp<V1_0::IExecutionCallback>& callback)
185*3e777be0SXin Li {
186*3e777be0SXin Li     ALOGV("ArmnnPreparedModel::execute(): %s", GetModelSummary(m_Model).c_str());
187*3e777be0SXin Li     m_RequestCount++;
188*3e777be0SXin Li 
189*3e777be0SXin Li     if (callback.get() == nullptr) {
190*3e777be0SXin Li         ALOGE("ArmnnPreparedModel::execute invalid callback passed");
191*3e777be0SXin Li         return V1_0::ErrorStatus::INVALID_ARGUMENT;
192*3e777be0SXin Li     }
193*3e777be0SXin Li 
194*3e777be0SXin Li     if (!android::nn::validateRequest(request, m_Model))
195*3e777be0SXin Li     {
196*3e777be0SXin Li         NotifyCallbackAndCheck(callback, V1_0::ErrorStatus::INVALID_ARGUMENT, "ArmnnPreparedModel::execute");
197*3e777be0SXin Li         return V1_0::ErrorStatus::INVALID_ARGUMENT;
198*3e777be0SXin Li     }
199*3e777be0SXin Li 
200*3e777be0SXin Li     if (!m_RequestInputsAndOutputsDumpDir.empty())
201*3e777be0SXin Li     {
202*3e777be0SXin Li         ALOGD("Dumping inputs and outputs for request %" PRIuPTR, reinterpret_cast<std::uintptr_t>(callback.get()));
203*3e777be0SXin Li     }
204*3e777be0SXin Li 
205*3e777be0SXin Li     // allocate the tensors on the heap, as they are passed to the request thread
206*3e777be0SXin Li     auto pInputTensors = std::make_shared<armnn::InputTensors>();
207*3e777be0SXin Li     auto pOutputTensors = std::make_shared<armnn::OutputTensors>();
208*3e777be0SXin Li 
209*3e777be0SXin Li     // map the memory pool into shared pointers
210*3e777be0SXin Li     // use a shared memory pools vector on the heap, as it is passed to the request thread
211*3e777be0SXin Li     auto pMemPools = std::make_shared<std::vector<android::nn::RunTimePoolInfo>>();
212*3e777be0SXin Li #if !defined(ARMNN_ANDROID_S)
213*3e777be0SXin Li     if (!setRunTimePoolInfosFromHidlMemories(pMemPools.get(), request.pools))
214*3e777be0SXin Li #else
215*3e777be0SXin Li     if (!setRunTimePoolInfosFromCanonicalMemories(pMemPools.get(), uncheckedConvert(request.pools)))
216*3e777be0SXin Li #endif
217*3e777be0SXin Li     {
218*3e777be0SXin Li         NotifyCallbackAndCheck(callback, V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::execute");
219*3e777be0SXin Li         return V1_0::ErrorStatus::GENERAL_FAILURE;
220*3e777be0SXin Li     }
221*3e777be0SXin Li 
222*3e777be0SXin Li     // add the inputs and outputs with their data
223*3e777be0SXin Li     try
224*3e777be0SXin Li     {
225*3e777be0SXin Li         pInputTensors->reserve(request.inputs.size());
226*3e777be0SXin Li         for (unsigned int i = 0; i < request.inputs.size(); i++)
227*3e777be0SXin Li         {
228*3e777be0SXin Li             const auto& inputArg = request.inputs[i];
229*3e777be0SXin Li             armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
230*3e777be0SXin Li             // pInputTensors (of type InputTensors) is composed of a vector of ConstTensors.
231*3e777be0SXin Li             // Therefore, set all TensorInfo isConstant parameters of input Tensors to true.
232*3e777be0SXin Li             inputTensorInfo.SetConstant();
233*3e777be0SXin Li             auto result = ValidateRequestArgument<V1_0::ErrorStatus, V1_0::Request>(request,
234*3e777be0SXin Li                                                                                     inputTensorInfo,
235*3e777be0SXin Li                                                                                     inputArg,
236*3e777be0SXin Li                                                                                     "input");
237*3e777be0SXin Li             if (result != V1_0::ErrorStatus::NONE)
238*3e777be0SXin Li             {
239*3e777be0SXin Li                 return result;
240*3e777be0SXin Li             }
241*3e777be0SXin Li 
242*3e777be0SXin Li             const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, *pMemPools);
243*3e777be0SXin Li             if (inputTensor.GetMemoryArea() == nullptr)
244*3e777be0SXin Li             {
245*3e777be0SXin Li                 ALOGE("Cannot execute request. Error converting request input %u to tensor", i);
246*3e777be0SXin Li                 return V1_0::ErrorStatus::GENERAL_FAILURE;
247*3e777be0SXin Li             }
248*3e777be0SXin Li 
249*3e777be0SXin Li             pInputTensors->emplace_back(i, inputTensor);
250*3e777be0SXin Li         }
251*3e777be0SXin Li 
252*3e777be0SXin Li         pOutputTensors->reserve(request.outputs.size());
253*3e777be0SXin Li         for (unsigned int i = 0; i < request.outputs.size(); i++)
254*3e777be0SXin Li         {
255*3e777be0SXin Li             const auto& outputArg = request.outputs[i];
256*3e777be0SXin Li             const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
257*3e777be0SXin Li             auto result = ValidateRequestArgument<V1_0::ErrorStatus, V1_0::Request>(request,
258*3e777be0SXin Li                                                                                     outputTensorInfo,
259*3e777be0SXin Li                                                                                     outputArg,
260*3e777be0SXin Li                                                                                     "output");
261*3e777be0SXin Li 
262*3e777be0SXin Li             if (result != V1_0::ErrorStatus::NONE)
263*3e777be0SXin Li             {
264*3e777be0SXin Li                 return result;
265*3e777be0SXin Li             }
266*3e777be0SXin Li 
267*3e777be0SXin Li             const armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, *pMemPools);
268*3e777be0SXin Li             if (outputTensor.GetMemoryArea() == nullptr)
269*3e777be0SXin Li             {
270*3e777be0SXin Li                 ALOGE("Cannot execute request. Error converting request output %u to tensor", i);
271*3e777be0SXin Li                 return V1_0::ErrorStatus::GENERAL_FAILURE;
272*3e777be0SXin Li             }
273*3e777be0SXin Li 
274*3e777be0SXin Li             pOutputTensors->emplace_back(i, outputTensor);
275*3e777be0SXin Li         }
276*3e777be0SXin Li     }
277*3e777be0SXin Li     catch (armnn::Exception& e)
278*3e777be0SXin Li     {
279*3e777be0SXin Li         ALOGW("armnn::Exception caught while preparing for EnqueueWorkload: %s", e.what());
280*3e777be0SXin Li         NotifyCallbackAndCheck(callback, V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::execute");
281*3e777be0SXin Li         return V1_0::ErrorStatus::GENERAL_FAILURE;
282*3e777be0SXin Li     }
283*3e777be0SXin Li     catch (std::exception& e)
284*3e777be0SXin Li     {
285*3e777be0SXin Li         ALOGE("std::exception caught while preparing for EnqueueWorkload: %s", e.what());
286*3e777be0SXin Li         NotifyCallbackAndCheck(callback, V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::execute");
287*3e777be0SXin Li         return V1_0::ErrorStatus::GENERAL_FAILURE;
288*3e777be0SXin Li     }
289*3e777be0SXin Li 
290*3e777be0SXin Li     auto cb = [callback](V1_0::ErrorStatus errorStatus, std::string callingFunction)
291*3e777be0SXin Li     {
292*3e777be0SXin Li         NotifyCallbackAndCheck(callback, errorStatus, callingFunction);
293*3e777be0SXin Li     };
294*3e777be0SXin Li 
295*3e777be0SXin Li     CallbackContext_1_0 armnnCb;
296*3e777be0SXin Li     armnnCb.callback = cb;
297*3e777be0SXin Li 
298*3e777be0SXin Li     if (m_AsyncModelExecutionEnabled)
299*3e777be0SXin Li     {
300*3e777be0SXin Li         ALOGV("ArmnnPreparedModel::execute(...) before ScheduleGraphForExecution");
301*3e777be0SXin Li         ScheduleGraphForExecution(pMemPools, pInputTensors, pOutputTensors, armnnCb);
302*3e777be0SXin Li         ALOGV("ArmnnPreparedModel::execute(...) after ScheduleGraphForExecution");
303*3e777be0SXin Li         return V1_0::ErrorStatus::NONE;
304*3e777be0SXin Li     }
305*3e777be0SXin Li 
306*3e777be0SXin Li     // post the request for asynchronous execution
307*3e777be0SXin Li     ALOGV("ArmnnPreparedModel::execute(...) before PostMsg");
308*3e777be0SXin Li     m_RequestThread.PostMsg(this, pMemPools, pInputTensors, pOutputTensors, armnnCb);
309*3e777be0SXin Li     ALOGV("ArmnnPreparedModel::execute(...) after PostMsg");
310*3e777be0SXin Li     return V1_0::ErrorStatus::NONE; // successfully queued
311*3e777be0SXin Li }
312*3e777be0SXin Li 
313*3e777be0SXin Li template<typename HalVersion>
ExecuteGraph(std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> & pMemPools,armnn::InputTensors & inputTensors,armnn::OutputTensors & outputTensors,CallbackContext_1_0 cb)314*3e777be0SXin Li void ArmnnPreparedModel<HalVersion>::ExecuteGraph(
315*3e777be0SXin Li         std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
316*3e777be0SXin Li         armnn::InputTensors& inputTensors,
317*3e777be0SXin Li         armnn::OutputTensors& outputTensors,
318*3e777be0SXin Li         CallbackContext_1_0 cb)
319*3e777be0SXin Li {
320*3e777be0SXin Li     ALOGV("ArmnnPreparedModel::ExecuteGraph(...)");
321*3e777be0SXin Li     // Capture the graph execution start time.
322*3e777be0SXin Li     std::chrono::time_point<std::chrono::system_clock> graphExecutionStart = std::chrono::system_clock::now();
323*3e777be0SXin Li 
324*3e777be0SXin Li     DumpTensorsIfRequired("Input", inputTensors);
325*3e777be0SXin Li 
326*3e777be0SXin Li     // run it
327*3e777be0SXin Li     try
328*3e777be0SXin Li     {
329*3e777be0SXin Li         armnn::Status status;
330*3e777be0SXin Li         if (m_AsyncModelExecutionEnabled)
331*3e777be0SXin Li         {
332*3e777be0SXin Li             ALOGW("ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled true");
333*3e777be0SXin Li             status = m_Runtime->Execute(*m_WorkingMemHandle, inputTensors, outputTensors);
334*3e777be0SXin Li         }
335*3e777be0SXin Li         else
336*3e777be0SXin Li         {
337*3e777be0SXin Li             ALOGW("ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled false");
338*3e777be0SXin Li             // Create a vector of Input and Output Ids which can be imported. An empty vector means all will be copied.
339*3e777be0SXin Li             std::vector<armnn::ImportedInputId> importedInputIds;
340*3e777be0SXin Li             if (m_EnableImport)
341*3e777be0SXin Li             {
342*3e777be0SXin Li                 importedInputIds =  m_Runtime->ImportInputs(m_NetworkId, inputTensors, armnn::MemorySource::Malloc);
343*3e777be0SXin Li             }
344*3e777be0SXin Li             std::vector<armnn::ImportedOutputId> importedOutputIds;
345*3e777be0SXin Li             if (m_EnableExport)
346*3e777be0SXin Li             {
347*3e777be0SXin Li                 importedOutputIds = m_Runtime->ImportOutputs(m_NetworkId, outputTensors, armnn::MemorySource::Malloc);
348*3e777be0SXin Li             }
349*3e777be0SXin Li             status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors,
350*3e777be0SXin Li                                                 importedInputIds, importedOutputIds);
351*3e777be0SXin Li         }
352*3e777be0SXin Li         if (status != armnn::Status::Success)
353*3e777be0SXin Li         {
354*3e777be0SXin Li             ALOGW("EnqueueWorkload failed");
355*3e777be0SXin Li             cb.callback(V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph");
356*3e777be0SXin Li             return;
357*3e777be0SXin Li         }
358*3e777be0SXin Li     }
359*3e777be0SXin Li     catch (armnn::Exception& e)
360*3e777be0SXin Li     {
361*3e777be0SXin Li         ALOGW("armnn::Exception caught from EnqueueWorkload: %s", e.what());
362*3e777be0SXin Li         cb.callback(V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph");
363*3e777be0SXin Li         return;
364*3e777be0SXin Li     }
365*3e777be0SXin Li     catch (std::exception& e)
366*3e777be0SXin Li     {
367*3e777be0SXin Li         ALOGE("std::exception caught from EnqueueWorkload: %s", e.what());
368*3e777be0SXin Li         cb.callback(V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph");
369*3e777be0SXin Li         return;
370*3e777be0SXin Li     }
371*3e777be0SXin Li 
372*3e777be0SXin Li     DumpTensorsIfRequired("Output", outputTensors);
373*3e777be0SXin Li 
374*3e777be0SXin Li     // Commit output buffers.
375*3e777be0SXin Li     // Note that we update *all* pools, even if they aren't actually used as outputs -
376*3e777be0SXin Li     // this is simpler and is what the CpuExecutor does.
377*3e777be0SXin Li     for (android::nn::RunTimePoolInfo& pool : *pMemPools)
378*3e777be0SXin Li     {
379*3e777be0SXin Li         // Type android::nn::RunTimePoolInfo has changed between Android P & Q and Android R, where
380*3e777be0SXin Li         // update() has been removed and flush() added.
381*3e777be0SXin Li         #if defined(ARMNN_ANDROID_R) || defined(ARMNN_ANDROID_S) // Use the new Android implementation.
382*3e777be0SXin Li             pool.flush();
383*3e777be0SXin Li         #else
384*3e777be0SXin Li             pool.update();
385*3e777be0SXin Li         #endif
386*3e777be0SXin Li     }
387*3e777be0SXin Li 
388*3e777be0SXin Li     // Log the total time in this call. This is a good number to compare to that printed out by
389*3e777be0SXin Li     // RuntimeImpl::EnqueueWorkload. The difference should be the execution overhead of the driver.
390*3e777be0SXin Li     ALOGI("ArmnnPreparedModel::ExecuteGraph Execution time = %lld µs",
391*3e777be0SXin Li            std::chrono::duration_cast<std::chrono::microseconds>
392*3e777be0SXin Li           (std::chrono::system_clock::now() - graphExecutionStart).count());
393*3e777be0SXin Li 
394*3e777be0SXin Li     cb.callback(V1_0::ErrorStatus::NONE, "ExecuteGraph");
395*3e777be0SXin Li }
396*3e777be0SXin Li 
397*3e777be0SXin Li template<typename HalVersion>
ExecuteWithDummyInputs()398*3e777be0SXin Li bool ArmnnPreparedModel<HalVersion>::ExecuteWithDummyInputs()
399*3e777be0SXin Li {
400*3e777be0SXin Li     std::vector<std::vector<char>> storage;
401*3e777be0SXin Li     armnn::InputTensors inputTensors;
402*3e777be0SXin Li     for (unsigned int i = 0; i < getMainModel(m_Model).inputIndexes.size(); i++)
403*3e777be0SXin Li     {
404*3e777be0SXin Li         armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
405*3e777be0SXin Li         // pInputTensors (of type InputTensors) is composed of a vector of ConstTensors.
406*3e777be0SXin Li         // Therefore, set all TensorInfo isConstant parameters of input Tensors to true.
407*3e777be0SXin Li         inputTensorInfo.SetConstant();
408*3e777be0SXin Li 
409*3e777be0SXin Li         storage.emplace_back(inputTensorInfo.GetNumBytes());
410*3e777be0SXin Li         const armnn::ConstTensor inputTensor(inputTensorInfo, storage.back().data());
411*3e777be0SXin Li 
412*3e777be0SXin Li         inputTensors.emplace_back(i, inputTensor);
413*3e777be0SXin Li     }
414*3e777be0SXin Li 
415*3e777be0SXin Li     armnn::OutputTensors outputTensors;
416*3e777be0SXin Li     for (unsigned int i = 0; i < getMainModel(m_Model).outputIndexes.size(); i++)
417*3e777be0SXin Li     {
418*3e777be0SXin Li         const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
419*3e777be0SXin Li         storage.emplace_back(outputTensorInfo.GetNumBytes());
420*3e777be0SXin Li         const armnn::Tensor outputTensor(outputTensorInfo, storage.back().data());
421*3e777be0SXin Li 
422*3e777be0SXin Li         outputTensors.emplace_back(i, outputTensor);
423*3e777be0SXin Li     }
424*3e777be0SXin Li 
425*3e777be0SXin Li     try
426*3e777be0SXin Li     {
427*3e777be0SXin Li         armnn::Status status;
428*3e777be0SXin Li         if (m_AsyncModelExecutionEnabled)
429*3e777be0SXin Li         {
430*3e777be0SXin Li             ALOGW("ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled true");
431*3e777be0SXin Li             status = m_Runtime->Execute(*m_WorkingMemHandle, inputTensors, outputTensors);
432*3e777be0SXin Li         }
433*3e777be0SXin Li         else
434*3e777be0SXin Li         {
435*3e777be0SXin Li             ALOGW("ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled false");
436*3e777be0SXin Li             // Create a vector of Input and Output Ids which can be imported. An empty vector means all will be copied.
437*3e777be0SXin Li             std::vector<armnn::ImportedInputId> importedInputIds;
438*3e777be0SXin Li             if (m_EnableImport)
439*3e777be0SXin Li             {
440*3e777be0SXin Li                 importedInputIds =  m_Runtime->ImportInputs(m_NetworkId, inputTensors, armnn::MemorySource::Malloc);
441*3e777be0SXin Li             }
442*3e777be0SXin Li             std::vector<armnn::ImportedOutputId> importedOutputIds;
443*3e777be0SXin Li             if (m_EnableExport)
444*3e777be0SXin Li             {
445*3e777be0SXin Li                 importedOutputIds = m_Runtime->ImportOutputs(m_NetworkId, outputTensors, armnn::MemorySource::Malloc);
446*3e777be0SXin Li             }
447*3e777be0SXin Li             status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors,
448*3e777be0SXin Li                                                 importedInputIds, importedOutputIds);
449*3e777be0SXin Li         }
450*3e777be0SXin Li         if (status != armnn::Status::Success)
451*3e777be0SXin Li         {
452*3e777be0SXin Li             ALOGW("ExecuteWithDummyInputs: EnqueueWorkload failed");
453*3e777be0SXin Li             return false;
454*3e777be0SXin Li         }
455*3e777be0SXin Li     }
456*3e777be0SXin Li     catch (armnn::Exception& e)
457*3e777be0SXin Li     {
458*3e777be0SXin Li         ALOGW("ExecuteWithDummyInputs: armnn::Exception caught from EnqueueWorkload: %s", e.what());
459*3e777be0SXin Li         return false;
460*3e777be0SXin Li     }
461*3e777be0SXin Li     catch (std::exception& e)
462*3e777be0SXin Li     {
463*3e777be0SXin Li         ALOGE("ExecuteWithDummyInputs: std::exception caught from EnqueueWorkload: %s", e.what());
464*3e777be0SXin Li         return false;
465*3e777be0SXin Li     }
466*3e777be0SXin Li     return true;
467*3e777be0SXin Li }
468*3e777be0SXin Li 
469*3e777be0SXin Li /// Schedule the graph prepared from the request for execution
470*3e777be0SXin Li template<typename HalVersion>
471*3e777be0SXin Li template<typename CallbackContext>
ScheduleGraphForExecution(std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> & pMemPools,std::shared_ptr<armnn::InputTensors> & inputTensors,std::shared_ptr<armnn::OutputTensors> & outputTensors,CallbackContext callbackContext)472*3e777be0SXin Li void ArmnnPreparedModel<HalVersion>::ScheduleGraphForExecution(
473*3e777be0SXin Li         std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
474*3e777be0SXin Li         std::shared_ptr<armnn::InputTensors>& inputTensors,
475*3e777be0SXin Li         std::shared_ptr<armnn::OutputTensors>& outputTensors,
476*3e777be0SXin Li         CallbackContext callbackContext)
477*3e777be0SXin Li {
478*3e777be0SXin Li     ALOGV("ArmnnPreparedModel::ScheduleGraphForExecution(...)");
479*3e777be0SXin Li 
480*3e777be0SXin Li     DumpTensorsIfRequired("Input", *inputTensors);
481*3e777be0SXin Li 
482*3e777be0SXin Li 
483*3e777be0SXin Li     auto tpCb = std::make_shared<
484*3e777be0SXin Li                 ArmnnThreadPoolCallback<CallbackContext_1_0>>(this,
485*3e777be0SXin Li                                                               pMemPools,
486*3e777be0SXin Li                                                               inputTensors,
487*3e777be0SXin Li                                                               outputTensors,
488*3e777be0SXin Li                                                               callbackContext);
489*3e777be0SXin Li 
490*3e777be0SXin Li     m_Threadpool->Schedule(m_NetworkId,
491*3e777be0SXin Li                            *tpCb->m_InputTensors,
492*3e777be0SXin Li                            *tpCb->m_OutputTensors,
493*3e777be0SXin Li                            armnn::QosExecPriority::Medium,
494*3e777be0SXin Li                            tpCb);
495*3e777be0SXin Li     ALOGV("ArmnnPreparedModel::ScheduleGraphForExecution end");
496*3e777be0SXin Li }
497*3e777be0SXin Li 
498*3e777be0SXin Li template<typename HalVersion>
499*3e777be0SXin Li template <typename CallbackContext>
Notify(armnn::Status status,armnn::InferenceTimingPair timeTaken)500*3e777be0SXin Li void ArmnnPreparedModel<HalVersion>::ArmnnThreadPoolCallback<CallbackContext>::Notify(
501*3e777be0SXin Li         armnn::Status status, armnn::InferenceTimingPair timeTaken)
502*3e777be0SXin Li {
503*3e777be0SXin Li     armnn::IgnoreUnused(status, timeTaken);
504*3e777be0SXin Li     ALOGV("ArmnnPreparedModel::ArmnnThreadPoolCallback_1_2 Notify");
505*3e777be0SXin Li 
506*3e777be0SXin Li     m_Model->DumpTensorsIfRequired("Output", *m_OutputTensors);
507*3e777be0SXin Li 
508*3e777be0SXin Li     // Commit output buffers.
509*3e777be0SXin Li     // Note that we update *all* pools, even if they aren't actually used as outputs -
510*3e777be0SXin Li     // this is simpler and is what the CpuExecutor does.
511*3e777be0SXin Li     for (android::nn::RunTimePoolInfo& pool : *m_MemPools)
512*3e777be0SXin Li     {
513*3e777be0SXin Li         // Type android::nn::RunTimePoolInfo has changed between Android P & Q and Android R, where
514*3e777be0SXin Li         // update() has been removed and flush() added.
515*3e777be0SXin Li         #if defined(ARMNN_ANDROID_R) || defined(ARMNN_ANDROID_S) // Use the new Android implementation.
516*3e777be0SXin Li             pool.flush();
517*3e777be0SXin Li         #else
518*3e777be0SXin Li             pool.update();
519*3e777be0SXin Li         #endif
520*3e777be0SXin Li     }
521*3e777be0SXin Li 
522*3e777be0SXin Li     m_CallbackContext.callback(V1_0::ErrorStatus::NONE, "ArmnnPreparedModel::ArmnnThreadPoolCallback_1_2 Notify");
523*3e777be0SXin Li     return;
524*3e777be0SXin Li }
525*3e777be0SXin Li 
526*3e777be0SXin Li ///
527*3e777be0SXin Li /// Class template specializations
528*3e777be0SXin Li ///
529*3e777be0SXin Li 
530*3e777be0SXin Li template class ArmnnPreparedModel<hal_1_0::HalPolicy>;
531*3e777be0SXin Li template void ArmnnPreparedModel<hal_1_0::HalPolicy>::ScheduleGraphForExecution<CallbackContext_1_0>(
532*3e777be0SXin Li         std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
533*3e777be0SXin Li         std::shared_ptr<armnn::InputTensors>& inputTensors,
534*3e777be0SXin Li         std::shared_ptr<armnn::OutputTensors>& outputTensors,
535*3e777be0SXin Li         CallbackContext_1_0 callbackContext);
536*3e777be0SXin Li 
537*3e777be0SXin Li #ifdef ARMNN_ANDROID_NN_V1_1
538*3e777be0SXin Li template class ArmnnPreparedModel<hal_1_1::HalPolicy>;
539*3e777be0SXin Li #endif
540*3e777be0SXin Li 
541*3e777be0SXin Li #ifdef ARMNN_ANDROID_NN_V1_2
542*3e777be0SXin Li template class ArmnnPreparedModel<hal_1_1::HalPolicy>;
543*3e777be0SXin Li template class ArmnnPreparedModel<hal_1_2::HalPolicy>;
544*3e777be0SXin Li #endif
545*3e777be0SXin Li 
546*3e777be0SXin Li #ifdef ARMNN_ANDROID_NN_V1_3
547*3e777be0SXin Li template class ArmnnPreparedModel<hal_1_1::HalPolicy>;
548*3e777be0SXin Li template class ArmnnPreparedModel<hal_1_2::HalPolicy>;
549*3e777be0SXin Li template class ArmnnPreparedModel<hal_1_3::HalPolicy>;
550*3e777be0SXin Li #endif
551*3e777be0SXin Li } // namespace armnn_driver
552