1*3e777be0SXin Li //
2*3e777be0SXin Li // Copyright © 2020, 2023 Arm Ltd. All rights reserved.
3*3e777be0SXin Li // SPDX-License-Identifier: MIT
4*3e777be0SXin Li //
5*3e777be0SXin Li
6*3e777be0SXin Li #include "ArmnnDriverImpl.hpp"
7*3e777be0SXin Li #include "../ArmnnPreparedModel_1_3.hpp"
8*3e777be0SXin Li #include "../ModelToINetworkConverter.hpp"
9*3e777be0SXin Li #include "../SystemPropertiesUtils.hpp"
10*3e777be0SXin Li
11*3e777be0SXin Li #include <armnnDeserializer/IDeserializer.hpp>
12*3e777be0SXin Li
13*3e777be0SXin Li #include <log/log.h>
14*3e777be0SXin Li
15*3e777be0SXin Li #include <sys/stat.h>
16*3e777be0SXin Li #include <chrono>
17*3e777be0SXin Li
18*3e777be0SXin Li namespace
19*3e777be0SXin Li {
20*3e777be0SXin Li const char *g_RelaxedFloat32toFloat16PerformanceExecTime = "ArmNN.relaxedFloat32toFloat16Performance.execTime";
21*3e777be0SXin Li const char *g_RelaxedFloat32toFloat16PerformancePowerUsage = "ArmNN.relaxedFloat32toFloat16Performance.powerUsage";
22*3e777be0SXin Li
23*3e777be0SXin Li const char *g_ifPerformanceExecTime = "ArmNN.ifPerformance.execTime";
24*3e777be0SXin Li const char *g_ifPerformancePowerUsage = "ArmNN.ifPerformance.powerUsage";
25*3e777be0SXin Li
26*3e777be0SXin Li const char *g_whilePerformanceExecTime = "ArmNN.whilePerformance.execTime";
27*3e777be0SXin Li const char *g_whilePerformancePowerUsage = "ArmNN.whilePerformance.powerUsage";
28*3e777be0SXin Li
29*3e777be0SXin Li const char *g_OperandTypeTensorFloat32PerformanceExecTime = "Armnn.operandTypeTensorFloat32Performance.execTime";
30*3e777be0SXin Li const char *g_OperandTypeTensorFloat32PerformancePowerUsage = "Armnn.operandTypeTensorFloat32Performance.powerUsage";
31*3e777be0SXin Li
32*3e777be0SXin Li const char *g_OperandTypeFloat32PerformanceExecTime = "Armnn.operandTypeFloat32Performance.execTime";
33*3e777be0SXin Li const char *g_OperandTypeFloat32PerformancePowerUsage = "Armnn.operandTypeFloat32Performance.powerUsage";
34*3e777be0SXin Li
35*3e777be0SXin Li const char *g_OperandTypeTensorFloat16PerformanceExecTime = "Armnn.operandTypeTensorFloat16Performance.execTime";
36*3e777be0SXin Li const char *g_OperandTypeTensorFloat16PerformancePowerUsage = "Armnn.operandTypeTensorFloat16Performance.powerUsage";
37*3e777be0SXin Li
38*3e777be0SXin Li const char *g_OperandTypeFloat16PerformanceExecTime = "Armnn.operandTypeFloat16Performance.execTime";
39*3e777be0SXin Li const char *g_OperandTypeFloat16PerformancePowerUsage = "Armnn.operandTypeFloat16Performance.powerUsage";
40*3e777be0SXin Li
41*3e777be0SXin Li const char *g_OperandTypeTensorQuant8AsymmPerformanceExecTime =
42*3e777be0SXin Li "Armnn.operandTypeTensorQuant8AsymmPerformance.execTime";
43*3e777be0SXin Li const char *g_OperandTypeTensorQuant8AsymmPerformancePowerUsage =
44*3e777be0SXin Li "Armnn.operandTypeTensorQuant8AsymmPerformance.powerUsage";
45*3e777be0SXin Li
46*3e777be0SXin Li const char *g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime =
47*3e777be0SXin Li "Armnn.operandTypeTensorQuant8AsymmSignedPerformance.execTime";
48*3e777be0SXin Li const char *g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage =
49*3e777be0SXin Li "Armnn.operandTypeTensorQuant8AsymmSignedPerformance.powerUsage";
50*3e777be0SXin Li
51*3e777be0SXin Li const char *g_OperandTypeTensorQuant16SymmPerformanceExecTime =
52*3e777be0SXin Li "Armnn.operandTypeTensorQuant16SymmPerformance.execTime";
53*3e777be0SXin Li const char *g_OperandTypeTensorQuant16SymmPerformancePowerUsage =
54*3e777be0SXin Li "Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage";
55*3e777be0SXin Li
56*3e777be0SXin Li const char *g_OperandTypeTensorQuant8SymmPerformanceExecTime =
57*3e777be0SXin Li "Armnn.operandTypeTensorQuant8SymmPerformance.execTime";
58*3e777be0SXin Li const char *g_OperandTypeTensorQuant8SymmPerformancePowerUsage =
59*3e777be0SXin Li "Armnn.operandTypeTensorQuant8SymmPerformance.powerUsage";
60*3e777be0SXin Li
61*3e777be0SXin Li const char *g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime =
62*3e777be0SXin Li "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.execTime";
63*3e777be0SXin Li const char *g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage =
64*3e777be0SXin Li "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.powerUsage";
65*3e777be0SXin Li
66*3e777be0SXin Li
67*3e777be0SXin Li const char *g_OperandTypeTensorInt32PerformanceExecTime = "Armnn.operandTypeTensorInt32Performance.execTime";
68*3e777be0SXin Li const char *g_OperandTypeTensorInt32PerformancePowerUsage = "Armnn.operandTypeTensorInt32Performance.powerUsage";
69*3e777be0SXin Li
70*3e777be0SXin Li const char *g_OperandTypeInt32PerformanceExecTime = "Armnn.operandTypeInt32Performance.execTime";
71*3e777be0SXin Li const char *g_OperandTypeInt32PerformancePowerUsage = "Armnn.operandTypeInt32Performance.powerUsage";
72*3e777be0SXin Li
73*3e777be0SXin Li
NotifyCallbackAndCheck(const android::sp<V1_3::IPreparedModelCallback> & callback,V1_3::ErrorStatus errorStatus,const android::sp<V1_3::IPreparedModel> & preparedModelPtr)74*3e777be0SXin Li void NotifyCallbackAndCheck(const android::sp<V1_3::IPreparedModelCallback>& callback,
75*3e777be0SXin Li V1_3::ErrorStatus errorStatus,
76*3e777be0SXin Li const android::sp<V1_3::IPreparedModel>& preparedModelPtr)
77*3e777be0SXin Li {
78*3e777be0SXin Li Return<void> returned = callback->notify_1_3(errorStatus, preparedModelPtr);
79*3e777be0SXin Li // This check is required, if the callback fails and it isn't checked it will bring down the service
80*3e777be0SXin Li if (!returned.isOk())
81*3e777be0SXin Li {
82*3e777be0SXin Li ALOGE("ArmnnDriverImpl::prepareModel: hidl callback failed to return properly: %s ",
83*3e777be0SXin Li returned.description().c_str());
84*3e777be0SXin Li }
85*3e777be0SXin Li }
86*3e777be0SXin Li
FailPrepareModel(V1_3::ErrorStatus error,const std::string & message,const android::sp<V1_3::IPreparedModelCallback> & callback)87*3e777be0SXin Li Return<V1_3::ErrorStatus> FailPrepareModel(V1_3::ErrorStatus error,
88*3e777be0SXin Li const std::string& message,
89*3e777be0SXin Li const android::sp<V1_3::IPreparedModelCallback>& callback)
90*3e777be0SXin Li {
91*3e777be0SXin Li ALOGW("ArmnnDriverImpl::prepareModel: %s", message.c_str());
92*3e777be0SXin Li NotifyCallbackAndCheck(callback, error, nullptr);
93*3e777be0SXin Li return error;
94*3e777be0SXin Li }
95*3e777be0SXin Li
96*3e777be0SXin Li } // anonymous namespace
97*3e777be0SXin Li
98*3e777be0SXin Li namespace armnn_driver
99*3e777be0SXin Li {
100*3e777be0SXin Li namespace hal_1_3
101*3e777be0SXin Li {
102*3e777be0SXin Li
prepareArmnnModel_1_3(const armnn::IRuntimePtr & runtime,const armnn::IGpuAccTunedParametersPtr & clTunedParameters,const DriverOptions & options,const V1_3::Model & model,const android::hardware::hidl_vec<android::hardware::hidl_handle> & modelCacheHandle,const android::hardware::hidl_vec<android::hardware::hidl_handle> & dataCacheHandle,const HidlToken & token,const android::sp<V1_3::IPreparedModelCallback> & cb,bool float32ToFloat16,V1_3::Priority priority)103*3e777be0SXin Li Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
104*3e777be0SXin Li const armnn::IRuntimePtr& runtime,
105*3e777be0SXin Li const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
106*3e777be0SXin Li const DriverOptions& options,
107*3e777be0SXin Li const V1_3::Model& model,
108*3e777be0SXin Li const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
109*3e777be0SXin Li const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
110*3e777be0SXin Li const HidlToken& token,
111*3e777be0SXin Li const android::sp<V1_3::IPreparedModelCallback>& cb,
112*3e777be0SXin Li bool float32ToFloat16,
113*3e777be0SXin Li V1_3::Priority priority)
114*3e777be0SXin Li {
115*3e777be0SXin Li ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_3()");
116*3e777be0SXin Li
117*3e777be0SXin Li std::chrono::time_point<std::chrono::system_clock> prepareModelTimepoint = std::chrono::system_clock::now();
118*3e777be0SXin Li
119*3e777be0SXin Li if (cb.get() == nullptr)
120*3e777be0SXin Li {
121*3e777be0SXin Li ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");
122*3e777be0SXin Li return V1_3::ErrorStatus::INVALID_ARGUMENT;
123*3e777be0SXin Li }
124*3e777be0SXin Li
125*3e777be0SXin Li if (!runtime)
126*3e777be0SXin Li {
127*3e777be0SXin Li return FailPrepareModel(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb);
128*3e777be0SXin Li }
129*3e777be0SXin Li
130*3e777be0SXin Li if (!android::nn::validateModel(model))
131*3e777be0SXin Li {
132*3e777be0SXin Li return FailPrepareModel(V1_3::ErrorStatus::INVALID_ARGUMENT, "Invalid model passed as input", cb);
133*3e777be0SXin Li }
134*3e777be0SXin Li
135*3e777be0SXin Li // Deliberately ignore any unsupported operations requested by the options -
136*3e777be0SXin Li // at this point we're being asked to prepare a model that we've already declared support for
137*3e777be0SXin Li // and the operation indices may be different to those in getSupportedOperations anyway.
138*3e777be0SXin Li std::set<unsigned int> unsupportedOperations;
139*3e777be0SXin Li ModelToINetworkConverter<HalPolicy> modelConverter(options.GetBackends(),
140*3e777be0SXin Li model,
141*3e777be0SXin Li unsupportedOperations);
142*3e777be0SXin Li
143*3e777be0SXin Li if (modelConverter.GetConversionResult() != ConversionResult::Success)
144*3e777be0SXin Li {
145*3e777be0SXin Li FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb);
146*3e777be0SXin Li return V1_3::ErrorStatus::NONE;
147*3e777be0SXin Li }
148*3e777be0SXin Li
149*3e777be0SXin Li // Serialize the network graph to a .armnn file if an output directory
150*3e777be0SXin Li // has been specified in the drivers' arguments.
151*3e777be0SXin Li std::vector<uint8_t> dataCacheData;
152*3e777be0SXin Li bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
153*3e777be0SXin Li auto serializedNetworkFileName =
154*3e777be0SXin Li SerializeNetwork(*modelConverter.GetINetwork(),
155*3e777be0SXin Li options.GetRequestInputsAndOutputsDumpDir(),
156*3e777be0SXin Li dataCacheData,
157*3e777be0SXin Li serializeToFile);
158*3e777be0SXin Li
159*3e777be0SXin Li // Optimize the network
160*3e777be0SXin Li armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
161*3e777be0SXin Li armnn::OptimizerOptionsOpaque OptOptions;
162*3e777be0SXin Li OptOptions.SetReduceFp32ToFp16(float32ToFloat16);
163*3e777be0SXin Li OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());
164*3e777be0SXin Li
165*3e777be0SXin Li int cachedFd = -1;
166*3e777be0SXin Li bool saveCachedNetwork = options.SaveCachedNetwork();
167*3e777be0SXin Li
168*3e777be0SXin Li unsigned int numberOfCachedModelFiles = 0;
169*3e777be0SXin Li if (modelCacheHandle.size() > 0)
170*3e777be0SXin Li {
171*3e777be0SXin Li unsigned int index = 0;
172*3e777be0SXin Li for (auto& backend : options.GetBackends())
173*3e777be0SXin Li {
174*3e777be0SXin Li // modelCacheHandle size should be equal to numberOfCachedModelFiles
175*3e777be0SXin Li // modelCacheHandle vector should be in same order as backends
176*3e777be0SXin Li auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
177*3e777be0SXin Li if (numberOfCacheFiles > 0)
178*3e777be0SXin Li {
179*3e777be0SXin Li numberOfCachedModelFiles += numberOfCacheFiles;
180*3e777be0SXin Li if (modelCacheHandle[index]->numFds == 1)
181*3e777be0SXin Li {
182*3e777be0SXin Li // For GpuAcc numberOfCachedFiles is 1
183*3e777be0SXin Li if (backend == armnn::Compute::GpuAcc)
184*3e777be0SXin Li {
185*3e777be0SXin Li cachedFd = modelCacheHandle[index]->data[0];
186*3e777be0SXin Li saveCachedNetwork = true;
187*3e777be0SXin Li }
188*3e777be0SXin Li }
189*3e777be0SXin Li index += numberOfCachedModelFiles;
190*3e777be0SXin Li }
191*3e777be0SXin Li }
192*3e777be0SXin Li }
193*3e777be0SXin Li
194*3e777be0SXin Li armnn::BackendOptions gpuAcc("GpuAcc",
195*3e777be0SXin Li {
196*3e777be0SXin Li { "FastMathEnabled", options.IsFastMathEnabled() },
197*3e777be0SXin Li { "SaveCachedNetwork", saveCachedNetwork },
198*3e777be0SXin Li { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
199*3e777be0SXin Li { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
200*3e777be0SXin Li { "CachedFileDescriptor", cachedFd }
201*3e777be0SXin Li });
202*3e777be0SXin Li
203*3e777be0SXin Li armnn::BackendOptions cpuAcc("CpuAcc",
204*3e777be0SXin Li {
205*3e777be0SXin Li { "FastMathEnabled", options.IsFastMathEnabled() },
206*3e777be0SXin Li { "NumberOfThreads", options.GetNumberOfThreads() }
207*3e777be0SXin Li });
208*3e777be0SXin Li OptOptions.AddModelOption(gpuAcc);
209*3e777be0SXin Li OptOptions.AddModelOption(cpuAcc);
210*3e777be0SXin Li
211*3e777be0SXin Li std::vector<std::string> errMessages;
212*3e777be0SXin Li try
213*3e777be0SXin Li {
214*3e777be0SXin Li optNet = armnn::Optimize(*modelConverter.GetINetwork(),
215*3e777be0SXin Li options.GetBackends(),
216*3e777be0SXin Li runtime->GetDeviceSpec(),
217*3e777be0SXin Li OptOptions,
218*3e777be0SXin Li errMessages);
219*3e777be0SXin Li }
220*3e777be0SXin Li catch (std::exception& e)
221*3e777be0SXin Li {
222*3e777be0SXin Li std::stringstream message;
223*3e777be0SXin Li message << "Exception (" << e.what() << ") caught from optimize.";
224*3e777be0SXin Li FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
225*3e777be0SXin Li return V1_3::ErrorStatus::NONE;
226*3e777be0SXin Li }
227*3e777be0SXin Li
228*3e777be0SXin Li // Check that the optimized network is valid.
229*3e777be0SXin Li if (!optNet)
230*3e777be0SXin Li {
231*3e777be0SXin Li std::stringstream message;
232*3e777be0SXin Li message << "Invalid optimized network";
233*3e777be0SXin Li for (const std::string& msg : errMessages)
234*3e777be0SXin Li {
235*3e777be0SXin Li message << "\n" << msg;
236*3e777be0SXin Li }
237*3e777be0SXin Li FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
238*3e777be0SXin Li return V1_3::ErrorStatus::NONE;
239*3e777be0SXin Li }
240*3e777be0SXin Li
241*3e777be0SXin Li // Export the optimized network graph to a dot file if an output dump directory
242*3e777be0SXin Li // has been specified in the drivers' arguments.
243*3e777be0SXin Li std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
244*3e777be0SXin Li options.GetRequestInputsAndOutputsDumpDir());
245*3e777be0SXin Li
246*3e777be0SXin Li // Load it into the runtime.
247*3e777be0SXin Li armnn::NetworkId netId = 0;
248*3e777be0SXin Li std::string msg;
249*3e777be0SXin Li armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
250*3e777be0SXin Li MemorySource::Undefined,
251*3e777be0SXin Li MemorySource::Undefined,
252*3e777be0SXin Li options.IsGpuProfilingEnabled());
253*3e777be0SXin Li
254*3e777be0SXin Li auto numInputs = getMainModel(model).inputIndexes.size();
255*3e777be0SXin Li auto numOutputs = getMainModel(model).outputIndexes.size();
256*3e777be0SXin Li try
257*3e777be0SXin Li {
258*3e777be0SXin Li if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
259*3e777be0SXin Li {
260*3e777be0SXin Li return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);
261*3e777be0SXin Li }
262*3e777be0SXin Li }
263*3e777be0SXin Li catch (std::exception& e)
264*3e777be0SXin Li {
265*3e777be0SXin Li std::stringstream message;
266*3e777be0SXin Li message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
267*3e777be0SXin Li FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
268*3e777be0SXin Li return V1_3::ErrorStatus::NONE;
269*3e777be0SXin Li }
270*3e777be0SXin Li
271*3e777be0SXin Li // Now that we have a networkId for the graph rename the exported files to use it
272*3e777be0SXin Li // so that we can associate the graph file and the input/output tensor exported files
273*3e777be0SXin Li RenameExportedFiles(serializedNetworkFileName,
274*3e777be0SXin Li dotGraphFileName,
275*3e777be0SXin Li options.GetRequestInputsAndOutputsDumpDir(),
276*3e777be0SXin Li netId);
277*3e777be0SXin Li
278*3e777be0SXin Li std::unique_ptr<ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>> preparedModel(
279*3e777be0SXin Li new ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>(
280*3e777be0SXin Li netId,
281*3e777be0SXin Li runtime.get(),
282*3e777be0SXin Li model,
283*3e777be0SXin Li options.GetRequestInputsAndOutputsDumpDir(),
284*3e777be0SXin Li options.IsGpuProfilingEnabled(),
285*3e777be0SXin Li priority,
286*3e777be0SXin Li options.isAsyncModelExecutionEnabled(),
287*3e777be0SXin Li options.getNoOfArmnnThreads(),
288*3e777be0SXin Li options.isImportEnabled(),
289*3e777be0SXin Li options.isExportEnabled()));
290*3e777be0SXin Li
291*3e777be0SXin Li // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
292*3e777be0SXin Li // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
293*3e777be0SXin Li // Only run this if the GpuAcc backend has been added to options
294*3e777be0SXin Li if (std::find(options.GetBackends().begin(),
295*3e777be0SXin Li options.GetBackends().end(),
296*3e777be0SXin Li armnn::Compute::GpuAcc) != options.GetBackends().end())
297*3e777be0SXin Li {
298*3e777be0SXin Li if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
299*3e777be0SXin Li {
300*3e777be0SXin Li return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
301*3e777be0SXin Li }
302*3e777be0SXin Li
303*3e777be0SXin Li if (clTunedParameters &&
304*3e777be0SXin Li options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
305*3e777be0SXin Li {
306*3e777be0SXin Li // Now that we've done one inference the CL kernel parameters will have been tuned,
307*3e777be0SXin Li // so save the updated file.
308*3e777be0SXin Li try
309*3e777be0SXin Li {
310*3e777be0SXin Li clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
311*3e777be0SXin Li }
312*3e777be0SXin Li catch (std::exception& error)
313*3e777be0SXin Li {
314*3e777be0SXin Li ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
315*3e777be0SXin Li options.GetClTunedParametersFile().c_str(), error.what());
316*3e777be0SXin Li }
317*3e777be0SXin Li }
318*3e777be0SXin Li }
319*3e777be0SXin Li size_t hashValue = 0;
320*3e777be0SXin Li // Cache the model
321*3e777be0SXin Li if (dataCacheHandle.size() > 0)
322*3e777be0SXin Li {
323*3e777be0SXin Li // Cache the Arm NN model
324*3e777be0SXin Li if (dataCacheHandle.size() != 1)
325*3e777be0SXin Li {
326*3e777be0SXin Li NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
327*3e777be0SXin Li return V1_3::ErrorStatus::NONE;
328*3e777be0SXin Li }
329*3e777be0SXin Li
330*3e777be0SXin Li if (dataCacheHandle[0]->numFds != 1)
331*3e777be0SXin Li {
332*3e777be0SXin Li ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, numFds != 1.");
333*3e777be0SXin Li NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
334*3e777be0SXin Li return V1_3::ErrorStatus::NONE;
335*3e777be0SXin Li }
336*3e777be0SXin Li
337*3e777be0SXin Li if (dataCacheHandle[0]->data[0] < 0)
338*3e777be0SXin Li {
339*3e777be0SXin Li ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, fd < 0");
340*3e777be0SXin Li NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
341*3e777be0SXin Li return V1_3::ErrorStatus::NONE;
342*3e777be0SXin Li }
343*3e777be0SXin Li
344*3e777be0SXin Li int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
345*3e777be0SXin Li if (dataCacheFileAccessMode != O_RDWR)
346*3e777be0SXin Li {
347*3e777be0SXin Li ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Invalid Access Mode.");
348*3e777be0SXin Li NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
349*3e777be0SXin Li return V1_3::ErrorStatus::NONE;
350*3e777be0SXin Li }
351*3e777be0SXin Li
352*3e777be0SXin Li write(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size());
353*3e777be0SXin Li hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
354*3e777be0SXin Li }
355*3e777be0SXin Li
356*3e777be0SXin Li // Cache the model data
357*3e777be0SXin Li if (modelCacheHandle.size() > 0)
358*3e777be0SXin Li {
359*3e777be0SXin Li if (modelCacheHandle.size() != numberOfCachedModelFiles)
360*3e777be0SXin Li {
361*3e777be0SXin Li NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
362*3e777be0SXin Li return V1_3::ErrorStatus::NONE;
363*3e777be0SXin Li }
364*3e777be0SXin Li
365*3e777be0SXin Li for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
366*3e777be0SXin Li {
367*3e777be0SXin Li if (modelCacheHandle[i]->numFds == 1)
368*3e777be0SXin Li {
369*3e777be0SXin Li int modelCacheFileAccessMode = fcntl(modelCacheHandle[i]->data[0], F_GETFL) & O_ACCMODE;
370*3e777be0SXin Li if (modelCacheFileAccessMode != O_RDONLY)
371*3e777be0SXin Li {
372*3e777be0SXin Li struct stat statBuffer;
373*3e777be0SXin Li if (fstat(modelCacheHandle[i]->data[0], &statBuffer) == 0)
374*3e777be0SXin Li {
375*3e777be0SXin Li long modelDataSize = statBuffer.st_size;
376*3e777be0SXin Li if (modelDataSize > 0)
377*3e777be0SXin Li {
378*3e777be0SXin Li std::vector<uint8_t> modelData(modelDataSize);
379*3e777be0SXin Li pread(modelCacheHandle[i]->data[0], modelData.data(), modelData.size(), 0);
380*3e777be0SXin Li hashValue ^= CacheDataHandlerInstance().Hash(modelData);
381*3e777be0SXin Li }
382*3e777be0SXin Li }
383*3e777be0SXin Li }
384*3e777be0SXin Li }
385*3e777be0SXin Li }
386*3e777be0SXin Li }
387*3e777be0SXin Li if (hashValue != 0)
388*3e777be0SXin Li {
389*3e777be0SXin Li CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size());
390*3e777be0SXin Li }
391*3e777be0SXin Li
392*3e777be0SXin Li NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
393*3e777be0SXin Li
394*3e777be0SXin Li ALOGV("ArmnnDriverImpl::prepareModel cache timing = %lld µs", std::chrono::duration_cast<std::chrono::microseconds>
395*3e777be0SXin Li (std::chrono::system_clock::now() - prepareModelTimepoint).count());
396*3e777be0SXin Li
397*3e777be0SXin Li
398*3e777be0SXin Li return V1_3::ErrorStatus::NONE;
399*3e777be0SXin Li }
400*3e777be0SXin Li
prepareModelFromCache_1_3(const armnn::IRuntimePtr & runtime,const DriverOptions & options,const android::hardware::hidl_vec<android::hardware::hidl_handle> & modelCacheHandle,const android::hardware::hidl_vec<android::hardware::hidl_handle> & dataCacheHandle,const HidlToken & token,const android::sp<V1_3::IPreparedModelCallback> & cb)401*3e777be0SXin Li Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareModelFromCache_1_3(
402*3e777be0SXin Li const armnn::IRuntimePtr& runtime,
403*3e777be0SXin Li const DriverOptions& options,
404*3e777be0SXin Li const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
405*3e777be0SXin Li const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
406*3e777be0SXin Li const HidlToken& token,
407*3e777be0SXin Li const android::sp<V1_3::IPreparedModelCallback>& cb)
408*3e777be0SXin Li {
409*3e777be0SXin Li ALOGV("ArmnnDriverImpl::prepareModelFromCache_1_3()");
410*3e777be0SXin Li std::chrono::time_point<std::chrono::system_clock> modelFromCacheTimepoint = std::chrono::system_clock::now();
411*3e777be0SXin Li
412*3e777be0SXin Li if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
413*3e777be0SXin Li {
414*3e777be0SXin Li cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
415*3e777be0SXin Li return V1_3::ErrorStatus::GENERAL_FAILURE;
416*3e777be0SXin Li }
417*3e777be0SXin Li
418*3e777be0SXin Li if (cb.get() == nullptr)
419*3e777be0SXin Li {
420*3e777be0SXin Li ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid callback passed to prepareModelFromCache_1_3");
421*3e777be0SXin Li return V1_3::ErrorStatus::INVALID_ARGUMENT;
422*3e777be0SXin Li }
423*3e777be0SXin Li
424*3e777be0SXin Li if (!runtime)
425*3e777be0SXin Li {
426*3e777be0SXin Li ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Device unavailable");
427*3e777be0SXin Li return V1_3::ErrorStatus::DEVICE_UNAVAILABLE;
428*3e777be0SXin Li }
429*3e777be0SXin Li
430*3e777be0SXin Li // DataCacheHandle size should always be 1
431*3e777be0SXin Li // Arm NN model
432*3e777be0SXin Li if (dataCacheHandle.size() != 1)
433*3e777be0SXin Li {
434*3e777be0SXin Li cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
435*3e777be0SXin Li return V1_3::ErrorStatus::GENERAL_FAILURE;
436*3e777be0SXin Li }
437*3e777be0SXin Li
438*3e777be0SXin Li // Check if model files cached they match the expected value
439*3e777be0SXin Li unsigned int numberOfCachedModelFiles = 0;
440*3e777be0SXin Li for (auto& backend : options.GetBackends())
441*3e777be0SXin Li {
442*3e777be0SXin Li numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
443*3e777be0SXin Li }
444*3e777be0SXin Li if (modelCacheHandle.size() != numberOfCachedModelFiles)
445*3e777be0SXin Li {
446*3e777be0SXin Li cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
447*3e777be0SXin Li return V1_3::ErrorStatus::GENERAL_FAILURE;
448*3e777be0SXin Li }
449*3e777be0SXin Li
450*3e777be0SXin Li if (dataCacheHandle[0]->numFds != 1)
451*3e777be0SXin Li {
452*3e777be0SXin Li ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the cache data, numFds != 1.");
453*3e777be0SXin Li cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
454*3e777be0SXin Li return V1_3::ErrorStatus::GENERAL_FAILURE;
455*3e777be0SXin Li }
456*3e777be0SXin Li
457*3e777be0SXin Li if (dataCacheHandle[0]->data[0] < 0)
458*3e777be0SXin Li {
459*3e777be0SXin Li ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the cache data, fd < 0");
460*3e777be0SXin Li cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
461*3e777be0SXin Li return V1_3::ErrorStatus::GENERAL_FAILURE;
462*3e777be0SXin Li }
463*3e777be0SXin Li
464*3e777be0SXin Li int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
465*3e777be0SXin Li if (dataCacheFileAccessMode != O_RDWR)
466*3e777be0SXin Li {
467*3e777be0SXin Li cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
468*3e777be0SXin Li return V1_3::ErrorStatus::GENERAL_FAILURE;
469*3e777be0SXin Li }
470*3e777be0SXin Li
471*3e777be0SXin Li auto dataSize = CacheDataHandlerInstance().GetCacheSize(token);
472*3e777be0SXin Li if (dataSize == 0)
473*3e777be0SXin Li {
474*3e777be0SXin Li ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!");
475*3e777be0SXin Li cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
476*3e777be0SXin Li return V1_3::ErrorStatus::GENERAL_FAILURE;
477*3e777be0SXin Li }
478*3e777be0SXin Li
479*3e777be0SXin Li int offset = 0;
480*3e777be0SXin Li {
481*3e777be0SXin Li struct stat statBuffer;
482*3e777be0SXin Li if (fstat(dataCacheHandle[0]->data[0], &statBuffer) == 0)
483*3e777be0SXin Li {
484*3e777be0SXin Li unsigned long bufferSize = statBuffer.st_size;
485*3e777be0SXin Li if (bufferSize != dataSize)
486*3e777be0SXin Li {
487*3e777be0SXin Li ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!");
488*3e777be0SXin Li cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
489*3e777be0SXin Li return V1_3::ErrorStatus::GENERAL_FAILURE;
490*3e777be0SXin Li }
491*3e777be0SXin Li }
492*3e777be0SXin Li }
493*3e777be0SXin Li std::vector<uint8_t> dataCacheData(dataSize);
494*3e777be0SXin Li pread(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size(), offset);
495*3e777be0SXin Li auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
496*3e777be0SXin Li
497*3e777be0SXin Li int gpuAccCachedFd = -1;
498*3e777be0SXin Li bool saveCachedNetwork = false;
499*3e777be0SXin Li if (modelCacheHandle.size() > 0)
500*3e777be0SXin Li {
501*3e777be0SXin Li unsigned int index = 0;
502*3e777be0SXin Li for (auto& backend : options.GetBackends())
503*3e777be0SXin Li {
504*3e777be0SXin Li // modelCacheHandle size should be equal to numberOfCachedModelFiles
505*3e777be0SXin Li // modelCacheHandle vector should be in same order as backends
506*3e777be0SXin Li auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
507*3e777be0SXin Li if (numberOfCacheFiles > 0)
508*3e777be0SXin Li {
509*3e777be0SXin Li if (modelCacheHandle[index]->numFds != 1)
510*3e777be0SXin Li {
511*3e777be0SXin Li ALOGW(
512*3e777be0SXin Li "ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the model cache, numFds != 1.");
513*3e777be0SXin Li cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
514*3e777be0SXin Li return V1_3::ErrorStatus::GENERAL_FAILURE;
515*3e777be0SXin Li }
516*3e777be0SXin Li auto cachedFd = modelCacheHandle[index]->data[0];
517*3e777be0SXin Li
518*3e777be0SXin Li int modelCacheFileAccessMode = fcntl(cachedFd, F_GETFL) & O_ACCMODE;
519*3e777be0SXin Li if (modelCacheFileAccessMode != O_RDWR)
520*3e777be0SXin Li {
521*3e777be0SXin Li cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
522*3e777be0SXin Li return V1_3::ErrorStatus::GENERAL_FAILURE;
523*3e777be0SXin Li }
524*3e777be0SXin Li
525*3e777be0SXin Li struct stat statBuffer;
526*3e777be0SXin Li if (cachedFd != -1 && fstat(cachedFd, &statBuffer) == 0)
527*3e777be0SXin Li {
528*3e777be0SXin Li long modelDataSize = statBuffer.st_size;
529*3e777be0SXin Li if (modelDataSize <= 0)
530*3e777be0SXin Li {
531*3e777be0SXin Li ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Wrong cached model size!");
532*3e777be0SXin Li cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
533*3e777be0SXin Li return V1_3::ErrorStatus::NONE;
534*3e777be0SXin Li }
535*3e777be0SXin Li std::vector<uint8_t> modelData(modelDataSize);
536*3e777be0SXin Li pread(cachedFd, modelData.data(), modelData.size(), 0);
537*3e777be0SXin Li hashValue ^= CacheDataHandlerInstance().Hash(modelData);
538*3e777be0SXin Li
539*3e777be0SXin Li // For GpuAcc numberOfCachedFiles is 1
540*3e777be0SXin Li if (backend == armnn::Compute::GpuAcc)
541*3e777be0SXin Li {
542*3e777be0SXin Li gpuAccCachedFd = cachedFd;
543*3e777be0SXin Li }
544*3e777be0SXin Li }
545*3e777be0SXin Li index += numberOfCacheFiles;
546*3e777be0SXin Li }
547*3e777be0SXin Li }
548*3e777be0SXin Li }
549*3e777be0SXin Li
550*3e777be0SXin Li if (!CacheDataHandlerInstance().Validate(token, hashValue, dataCacheData.size()))
551*3e777be0SXin Li {
552*3e777be0SXin Li ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: ValidateHash() failed!");
553*3e777be0SXin Li cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
554*3e777be0SXin Li return V1_3::ErrorStatus::GENERAL_FAILURE;
555*3e777be0SXin Li }
556*3e777be0SXin Li
557*3e777be0SXin Li // Deserialize the network..
558*3e777be0SXin Li armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){});
559*3e777be0SXin Li try
560*3e777be0SXin Li {
561*3e777be0SXin Li network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
562*3e777be0SXin Li }
563*3e777be0SXin Li catch (std::exception&)
564*3e777be0SXin Li {
565*3e777be0SXin Li ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Exception caught from Deserializer!");
566*3e777be0SXin Li cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
567*3e777be0SXin Li return V1_3::ErrorStatus::GENERAL_FAILURE;
568*3e777be0SXin Li }
569*3e777be0SXin Li
570*3e777be0SXin Li // Optimize the network
571*3e777be0SXin Li armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
572*3e777be0SXin Li armnn::OptimizerOptionsOpaque OptOptions;
573*3e777be0SXin Li OptOptions.SetReduceFp32ToFp16(options.GetFp16Enabled());
574*3e777be0SXin Li OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());
575*3e777be0SXin Li
576*3e777be0SXin Li armnn::BackendOptions gpuAcc("GpuAcc",
577*3e777be0SXin Li {
578*3e777be0SXin Li {"FastMathEnabled", options.IsFastMathEnabled()},
579*3e777be0SXin Li {"SaveCachedNetwork", saveCachedNetwork},
580*3e777be0SXin Li {"CachedNetworkFilePath", options.GetCachedNetworkFilePath()},
581*3e777be0SXin Li {"MLGOTuningFilePath", options.GetClMLGOTunedParametersFile()},
582*3e777be0SXin Li {"CachedFileDescriptor", gpuAccCachedFd}
583*3e777be0SXin Li });
584*3e777be0SXin Li
585*3e777be0SXin Li armnn::BackendOptions cpuAcc("CpuAcc",
586*3e777be0SXin Li {
587*3e777be0SXin Li {"FastMathEnabled", options.IsFastMathEnabled()},
588*3e777be0SXin Li {"NumberOfThreads", options.GetNumberOfThreads()}
589*3e777be0SXin Li });
590*3e777be0SXin Li OptOptions.AddModelOption(gpuAcc);
591*3e777be0SXin Li OptOptions.AddModelOption(cpuAcc);
592*3e777be0SXin Li
593*3e777be0SXin Li std::vector<std::string> errMessages;
594*3e777be0SXin Li try
595*3e777be0SXin Li {
596*3e777be0SXin Li optNet = armnn::Optimize(*network.get(),
597*3e777be0SXin Li options.GetBackends(),
598*3e777be0SXin Li runtime->GetDeviceSpec(),
599*3e777be0SXin Li OptOptions,
600*3e777be0SXin Li errMessages);
601*3e777be0SXin Li }
602*3e777be0SXin Li catch (std::exception& e)
603*3e777be0SXin Li {
604*3e777be0SXin Li std::stringstream message;
605*3e777be0SXin Li message << "Exception (" << e.what() << ") caught from optimize.";
606*3e777be0SXin Li FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
607*3e777be0SXin Li return V1_3::ErrorStatus::NONE;
608*3e777be0SXin Li }
609*3e777be0SXin Li
610*3e777be0SXin Li // Check that the optimized network is valid.
611*3e777be0SXin Li if (!optNet)
612*3e777be0SXin Li {
613*3e777be0SXin Li std::stringstream message;
614*3e777be0SXin Li message << "Invalid optimized network";
615*3e777be0SXin Li for (const std::string& msg : errMessages)
616*3e777be0SXin Li {
617*3e777be0SXin Li message << "\n" << msg;
618*3e777be0SXin Li }
619*3e777be0SXin Li FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
620*3e777be0SXin Li return V1_3::ErrorStatus::NONE;
621*3e777be0SXin Li }
622*3e777be0SXin Li
623*3e777be0SXin Li // Export the optimized network graph to a dot file if an output dump directory
624*3e777be0SXin Li // has been specified in the drivers' arguments.
625*3e777be0SXin Li std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
626*3e777be0SXin Li options.GetRequestInputsAndOutputsDumpDir());
627*3e777be0SXin Li
628*3e777be0SXin Li // Load it into the runtime.
629*3e777be0SXin Li armnn::NetworkId netId = 0;
630*3e777be0SXin Li std::string msg;
631*3e777be0SXin Li armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
632*3e777be0SXin Li MemorySource::Undefined,
633*3e777be0SXin Li MemorySource::Undefined,
634*3e777be0SXin Li options.IsGpuProfilingEnabled());
635*3e777be0SXin Li
636*3e777be0SXin Li try
637*3e777be0SXin Li {
638*3e777be0SXin Li if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
639*3e777be0SXin Li {
640*3e777be0SXin Li return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, msg, cb);
641*3e777be0SXin Li }
642*3e777be0SXin Li }
643*3e777be0SXin Li catch (std::exception& e)
644*3e777be0SXin Li {
645*3e777be0SXin Li std::stringstream message;
646*3e777be0SXin Li message << "Exception (" << e.what() << ") caught from LoadNetwork.";
647*3e777be0SXin Li FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
648*3e777be0SXin Li return V1_3::ErrorStatus::NONE;
649*3e777be0SXin Li }
650*3e777be0SXin Li
651*3e777be0SXin Li std::unique_ptr<ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>> preparedModel(
652*3e777be0SXin Li new ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>(netId,
653*3e777be0SXin Li runtime.get(),
654*3e777be0SXin Li options.GetRequestInputsAndOutputsDumpDir(),
655*3e777be0SXin Li options.IsGpuProfilingEnabled(),
656*3e777be0SXin Li V1_3::Priority::MEDIUM,
657*3e777be0SXin Li options.isAsyncModelExecutionEnabled(),
658*3e777be0SXin Li options.getNoOfArmnnThreads(),
659*3e777be0SXin Li options.isImportEnabled(),
660*3e777be0SXin Li options.isExportEnabled(),
661*3e777be0SXin Li true));
662*3e777be0SXin Li
663*3e777be0SXin Li NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
664*3e777be0SXin Li
665*3e777be0SXin Li ALOGV("ArmnnDriverImpl::prepareModelFromCache timing = %lld µs",
666*3e777be0SXin Li std::chrono::duration_cast<std::chrono::microseconds>
667*3e777be0SXin Li (std::chrono::system_clock::now() - modelFromCacheTimepoint).count());
668*3e777be0SXin Li
669*3e777be0SXin Li return V1_3::ErrorStatus::NONE;
670*3e777be0SXin Li }
671*3e777be0SXin Li
getCapabilities_1_3(const armnn::IRuntimePtr & runtime,V1_3::IDevice::getCapabilities_1_3_cb cb)672*3e777be0SXin Li Return<void> ArmnnDriverImpl::getCapabilities_1_3(const armnn::IRuntimePtr& runtime,
673*3e777be0SXin Li V1_3::IDevice::getCapabilities_1_3_cb cb)
674*3e777be0SXin Li {
675*3e777be0SXin Li ALOGV("hal_1_3::ArmnnDriverImpl::getCapabilities()");
676*3e777be0SXin Li
677*3e777be0SXin Li V1_3::Capabilities capabilities;
678*3e777be0SXin Li
679*3e777be0SXin Li float defaultValue = .1f;
680*3e777be0SXin Li
681*3e777be0SXin Li if (runtime)
682*3e777be0SXin Li {
683*3e777be0SXin Li capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime =
684*3e777be0SXin Li ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
685*3e777be0SXin Li
686*3e777be0SXin Li capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage =
687*3e777be0SXin Li ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
688*3e777be0SXin Li
689*3e777be0SXin Li capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime =
690*3e777be0SXin Li ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
691*3e777be0SXin Li
692*3e777be0SXin Li capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage =
693*3e777be0SXin Li ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
694*3e777be0SXin Li
695*3e777be0SXin Li capabilities.ifPerformance.execTime =
696*3e777be0SXin Li ParseSystemProperty(g_ifPerformanceExecTime, defaultValue);
697*3e777be0SXin Li
698*3e777be0SXin Li capabilities.ifPerformance.powerUsage =
699*3e777be0SXin Li ParseSystemProperty(g_ifPerformancePowerUsage, defaultValue);
700*3e777be0SXin Li
701*3e777be0SXin Li capabilities.whilePerformance.execTime =
702*3e777be0SXin Li ParseSystemProperty(g_whilePerformanceExecTime, defaultValue);
703*3e777be0SXin Li
704*3e777be0SXin Li capabilities.whilePerformance.powerUsage =
705*3e777be0SXin Li ParseSystemProperty(g_whilePerformancePowerUsage, defaultValue);
706*3e777be0SXin Li
707*3e777be0SXin Li // Set the base value for all operand types
708*3e777be0SXin Li capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({FLT_MAX, FLT_MAX});
709*3e777be0SXin Li
710*3e777be0SXin Li // Load supported operand types
711*3e777be0SXin Li update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT32,
712*3e777be0SXin Li {
713*3e777be0SXin Li .execTime = ParseSystemProperty(g_OperandTypeTensorFloat32PerformanceExecTime, defaultValue),
714*3e777be0SXin Li .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat32PerformancePowerUsage, defaultValue)
715*3e777be0SXin Li });
716*3e777be0SXin Li
717*3e777be0SXin Li update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT32,
718*3e777be0SXin Li {
719*3e777be0SXin Li .execTime = ParseSystemProperty(g_OperandTypeFloat32PerformanceExecTime, defaultValue),
720*3e777be0SXin Li .powerUsage = ParseSystemProperty(g_OperandTypeFloat32PerformancePowerUsage, defaultValue)
721*3e777be0SXin Li });
722*3e777be0SXin Li
723*3e777be0SXin Li update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT16,
724*3e777be0SXin Li {
725*3e777be0SXin Li .execTime = ParseSystemProperty(g_OperandTypeTensorFloat16PerformanceExecTime, defaultValue),
726*3e777be0SXin Li .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat16PerformancePowerUsage, defaultValue)
727*3e777be0SXin Li });
728*3e777be0SXin Li
729*3e777be0SXin Li update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT16,
730*3e777be0SXin Li {
731*3e777be0SXin Li .execTime = ParseSystemProperty(g_OperandTypeFloat16PerformanceExecTime, defaultValue),
732*3e777be0SXin Li .powerUsage = ParseSystemProperty(g_OperandTypeFloat16PerformancePowerUsage, defaultValue)
733*3e777be0SXin Li });
734*3e777be0SXin Li
735*3e777be0SXin Li update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM,
736*3e777be0SXin Li {
737*3e777be0SXin Li .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformanceExecTime, defaultValue),
738*3e777be0SXin Li .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformancePowerUsage, defaultValue)
739*3e777be0SXin Li });
740*3e777be0SXin Li
741*3e777be0SXin Li update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM,
742*3e777be0SXin Li {
743*3e777be0SXin Li .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformanceExecTime, defaultValue),
744*3e777be0SXin Li .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformancePowerUsage, defaultValue)
745*3e777be0SXin Li });
746*3e777be0SXin Li update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
747*3e777be0SXin Li {
748*3e777be0SXin Li .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime,
749*3e777be0SXin Li defaultValue),
750*3e777be0SXin Li .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage,
751*3e777be0SXin Li defaultValue)
752*3e777be0SXin Li });
753*3e777be0SXin Li
754*3e777be0SXin Li update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT16_SYMM,
755*3e777be0SXin Li {
756*3e777be0SXin Li .execTime = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformanceExecTime, defaultValue),
757*3e777be0SXin Li .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformancePowerUsage, defaultValue)
758*3e777be0SXin Li });
759*3e777be0SXin Li
760*3e777be0SXin Li update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
761*3e777be0SXin Li {
762*3e777be0SXin Li .execTime =
763*3e777be0SXin Li ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime, defaultValue),
764*3e777be0SXin Li .powerUsage =
765*3e777be0SXin Li ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage, defaultValue)
766*3e777be0SXin Li });
767*3e777be0SXin Li
768*3e777be0SXin Li update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_INT32,
769*3e777be0SXin Li {
770*3e777be0SXin Li .execTime = ParseSystemProperty(g_OperandTypeTensorInt32PerformanceExecTime, defaultValue),
771*3e777be0SXin Li .powerUsage = ParseSystemProperty(g_OperandTypeTensorInt32PerformancePowerUsage, defaultValue)
772*3e777be0SXin Li });
773*3e777be0SXin Li
774*3e777be0SXin Li update(&capabilities.operandPerformance, V1_3::OperandType::INT32,
775*3e777be0SXin Li {
776*3e777be0SXin Li .execTime = ParseSystemProperty(g_OperandTypeInt32PerformanceExecTime, defaultValue),
777*3e777be0SXin Li .powerUsage = ParseSystemProperty(g_OperandTypeInt32PerformancePowerUsage, defaultValue)
778*3e777be0SXin Li });
779*3e777be0SXin Li
780*3e777be0SXin Li cb(V1_3::ErrorStatus::NONE, capabilities);
781*3e777be0SXin Li }
782*3e777be0SXin Li else
783*3e777be0SXin Li {
784*3e777be0SXin Li capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime = 0;
785*3e777be0SXin Li capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage = 0;
786*3e777be0SXin Li capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime = 0;
787*3e777be0SXin Li capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage = 0;
788*3e777be0SXin Li capabilities.ifPerformance.execTime = 0;
789*3e777be0SXin Li capabilities.ifPerformance.powerUsage = 0;
790*3e777be0SXin Li capabilities.whilePerformance.execTime = 0;
791*3e777be0SXin Li capabilities.whilePerformance.powerUsage = 0;
792*3e777be0SXin Li
793*3e777be0SXin Li // Set the base value for all operand types
794*3e777be0SXin Li capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({0.f, 0.0f});
795*3e777be0SXin Li
796*3e777be0SXin Li cb(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, capabilities);
797*3e777be0SXin Li }
798*3e777be0SXin Li
799*3e777be0SXin Li return Void();
800*3e777be0SXin Li }
801*3e777be0SXin Li
802*3e777be0SXin Li } // namespace hal_1_3
803*3e777be0SXin Li } // namespace armnn_driver