xref: /aosp_15_r20/external/android-nn-driver/1.3/ArmnnDriverImpl.cpp (revision 3e777be0405cee09af5d5785ff37f7cfb5bee59a)
1*3e777be0SXin Li //
2*3e777be0SXin Li // Copyright © 2020, 2023 Arm Ltd. All rights reserved.
3*3e777be0SXin Li // SPDX-License-Identifier: MIT
4*3e777be0SXin Li //
5*3e777be0SXin Li 
6*3e777be0SXin Li #include "ArmnnDriverImpl.hpp"
7*3e777be0SXin Li #include "../ArmnnPreparedModel_1_3.hpp"
8*3e777be0SXin Li #include "../ModelToINetworkConverter.hpp"
9*3e777be0SXin Li #include "../SystemPropertiesUtils.hpp"
10*3e777be0SXin Li 
11*3e777be0SXin Li #include <armnnDeserializer/IDeserializer.hpp>
12*3e777be0SXin Li 
13*3e777be0SXin Li #include <log/log.h>
14*3e777be0SXin Li 
15*3e777be0SXin Li #include <sys/stat.h>
16*3e777be0SXin Li #include <chrono>
17*3e777be0SXin Li 
18*3e777be0SXin Li namespace
19*3e777be0SXin Li {
20*3e777be0SXin Li const char *g_RelaxedFloat32toFloat16PerformanceExecTime    = "ArmNN.relaxedFloat32toFloat16Performance.execTime";
21*3e777be0SXin Li const char *g_RelaxedFloat32toFloat16PerformancePowerUsage  = "ArmNN.relaxedFloat32toFloat16Performance.powerUsage";
22*3e777be0SXin Li 
23*3e777be0SXin Li const char *g_ifPerformanceExecTime                         = "ArmNN.ifPerformance.execTime";
24*3e777be0SXin Li const char *g_ifPerformancePowerUsage                       = "ArmNN.ifPerformance.powerUsage";
25*3e777be0SXin Li 
26*3e777be0SXin Li const char *g_whilePerformanceExecTime                      = "ArmNN.whilePerformance.execTime";
27*3e777be0SXin Li const char *g_whilePerformancePowerUsage                    = "ArmNN.whilePerformance.powerUsage";
28*3e777be0SXin Li 
29*3e777be0SXin Li const char *g_OperandTypeTensorFloat32PerformanceExecTime   = "Armnn.operandTypeTensorFloat32Performance.execTime";
30*3e777be0SXin Li const char *g_OperandTypeTensorFloat32PerformancePowerUsage = "Armnn.operandTypeTensorFloat32Performance.powerUsage";
31*3e777be0SXin Li 
32*3e777be0SXin Li const char *g_OperandTypeFloat32PerformanceExecTime         = "Armnn.operandTypeFloat32Performance.execTime";
33*3e777be0SXin Li const char *g_OperandTypeFloat32PerformancePowerUsage       = "Armnn.operandTypeFloat32Performance.powerUsage";
34*3e777be0SXin Li 
35*3e777be0SXin Li const char *g_OperandTypeTensorFloat16PerformanceExecTime   = "Armnn.operandTypeTensorFloat16Performance.execTime";
36*3e777be0SXin Li const char *g_OperandTypeTensorFloat16PerformancePowerUsage = "Armnn.operandTypeTensorFloat16Performance.powerUsage";
37*3e777be0SXin Li 
38*3e777be0SXin Li const char *g_OperandTypeFloat16PerformanceExecTime         = "Armnn.operandTypeFloat16Performance.execTime";
39*3e777be0SXin Li const char *g_OperandTypeFloat16PerformancePowerUsage       = "Armnn.operandTypeFloat16Performance.powerUsage";
40*3e777be0SXin Li 
41*3e777be0SXin Li const char *g_OperandTypeTensorQuant8AsymmPerformanceExecTime =
42*3e777be0SXin Li         "Armnn.operandTypeTensorQuant8AsymmPerformance.execTime";
43*3e777be0SXin Li const char *g_OperandTypeTensorQuant8AsymmPerformancePowerUsage =
44*3e777be0SXin Li         "Armnn.operandTypeTensorQuant8AsymmPerformance.powerUsage";
45*3e777be0SXin Li 
46*3e777be0SXin Li const char *g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime =
47*3e777be0SXin Li     "Armnn.operandTypeTensorQuant8AsymmSignedPerformance.execTime";
48*3e777be0SXin Li const char *g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage =
49*3e777be0SXin Li     "Armnn.operandTypeTensorQuant8AsymmSignedPerformance.powerUsage";
50*3e777be0SXin Li 
51*3e777be0SXin Li const char *g_OperandTypeTensorQuant16SymmPerformanceExecTime =
52*3e777be0SXin Li         "Armnn.operandTypeTensorQuant16SymmPerformance.execTime";
53*3e777be0SXin Li const char *g_OperandTypeTensorQuant16SymmPerformancePowerUsage =
54*3e777be0SXin Li         "Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage";
55*3e777be0SXin Li 
56*3e777be0SXin Li const char *g_OperandTypeTensorQuant8SymmPerformanceExecTime =
57*3e777be0SXin Li         "Armnn.operandTypeTensorQuant8SymmPerformance.execTime";
58*3e777be0SXin Li const char *g_OperandTypeTensorQuant8SymmPerformancePowerUsage =
59*3e777be0SXin Li         "Armnn.operandTypeTensorQuant8SymmPerformance.powerUsage";
60*3e777be0SXin Li 
61*3e777be0SXin Li const char *g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime =
62*3e777be0SXin Li     "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.execTime";
63*3e777be0SXin Li const char *g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage =
64*3e777be0SXin Li     "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.powerUsage";
65*3e777be0SXin Li 
66*3e777be0SXin Li 
67*3e777be0SXin Li const char *g_OperandTypeTensorInt32PerformanceExecTime     = "Armnn.operandTypeTensorInt32Performance.execTime";
68*3e777be0SXin Li const char *g_OperandTypeTensorInt32PerformancePowerUsage   = "Armnn.operandTypeTensorInt32Performance.powerUsage";
69*3e777be0SXin Li 
70*3e777be0SXin Li const char *g_OperandTypeInt32PerformanceExecTime           = "Armnn.operandTypeInt32Performance.execTime";
71*3e777be0SXin Li const char *g_OperandTypeInt32PerformancePowerUsage         = "Armnn.operandTypeInt32Performance.powerUsage";
72*3e777be0SXin Li 
73*3e777be0SXin Li 
NotifyCallbackAndCheck(const android::sp<V1_3::IPreparedModelCallback> & callback,V1_3::ErrorStatus errorStatus,const android::sp<V1_3::IPreparedModel> & preparedModelPtr)74*3e777be0SXin Li void NotifyCallbackAndCheck(const android::sp<V1_3::IPreparedModelCallback>& callback,
75*3e777be0SXin Li                             V1_3::ErrorStatus errorStatus,
76*3e777be0SXin Li                             const android::sp<V1_3::IPreparedModel>& preparedModelPtr)
77*3e777be0SXin Li {
78*3e777be0SXin Li     Return<void> returned = callback->notify_1_3(errorStatus, preparedModelPtr);
79*3e777be0SXin Li     // This check is required, if the callback fails and it isn't checked it will bring down the service
80*3e777be0SXin Li     if (!returned.isOk())
81*3e777be0SXin Li     {
82*3e777be0SXin Li         ALOGE("ArmnnDriverImpl::prepareModel: hidl callback failed to return properly: %s ",
83*3e777be0SXin Li               returned.description().c_str());
84*3e777be0SXin Li     }
85*3e777be0SXin Li }
86*3e777be0SXin Li 
FailPrepareModel(V1_3::ErrorStatus error,const std::string & message,const android::sp<V1_3::IPreparedModelCallback> & callback)87*3e777be0SXin Li Return<V1_3::ErrorStatus> FailPrepareModel(V1_3::ErrorStatus error,
88*3e777be0SXin Li                                            const std::string& message,
89*3e777be0SXin Li                                            const android::sp<V1_3::IPreparedModelCallback>& callback)
90*3e777be0SXin Li {
91*3e777be0SXin Li     ALOGW("ArmnnDriverImpl::prepareModel: %s", message.c_str());
92*3e777be0SXin Li     NotifyCallbackAndCheck(callback, error, nullptr);
93*3e777be0SXin Li     return error;
94*3e777be0SXin Li }
95*3e777be0SXin Li 
96*3e777be0SXin Li } // anonymous namespace
97*3e777be0SXin Li 
98*3e777be0SXin Li namespace armnn_driver
99*3e777be0SXin Li {
100*3e777be0SXin Li namespace hal_1_3
101*3e777be0SXin Li {
102*3e777be0SXin Li 
prepareArmnnModel_1_3(const armnn::IRuntimePtr & runtime,const armnn::IGpuAccTunedParametersPtr & clTunedParameters,const DriverOptions & options,const V1_3::Model & model,const android::hardware::hidl_vec<android::hardware::hidl_handle> & modelCacheHandle,const android::hardware::hidl_vec<android::hardware::hidl_handle> & dataCacheHandle,const HidlToken & token,const android::sp<V1_3::IPreparedModelCallback> & cb,bool float32ToFloat16,V1_3::Priority priority)103*3e777be0SXin Li Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
104*3e777be0SXin Li        const armnn::IRuntimePtr& runtime,
105*3e777be0SXin Li        const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
106*3e777be0SXin Li        const DriverOptions& options,
107*3e777be0SXin Li        const V1_3::Model& model,
108*3e777be0SXin Li        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
109*3e777be0SXin Li        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
110*3e777be0SXin Li        const HidlToken& token,
111*3e777be0SXin Li        const android::sp<V1_3::IPreparedModelCallback>& cb,
112*3e777be0SXin Li        bool float32ToFloat16,
113*3e777be0SXin Li        V1_3::Priority priority)
114*3e777be0SXin Li {
115*3e777be0SXin Li     ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_3()");
116*3e777be0SXin Li 
117*3e777be0SXin Li     std::chrono::time_point<std::chrono::system_clock> prepareModelTimepoint = std::chrono::system_clock::now();
118*3e777be0SXin Li 
119*3e777be0SXin Li     if (cb.get() == nullptr)
120*3e777be0SXin Li     {
121*3e777be0SXin Li         ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");
122*3e777be0SXin Li         return V1_3::ErrorStatus::INVALID_ARGUMENT;
123*3e777be0SXin Li     }
124*3e777be0SXin Li 
125*3e777be0SXin Li     if (!runtime)
126*3e777be0SXin Li     {
127*3e777be0SXin Li         return FailPrepareModel(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb);
128*3e777be0SXin Li     }
129*3e777be0SXin Li 
130*3e777be0SXin Li     if (!android::nn::validateModel(model))
131*3e777be0SXin Li     {
132*3e777be0SXin Li         return FailPrepareModel(V1_3::ErrorStatus::INVALID_ARGUMENT, "Invalid model passed as input", cb);
133*3e777be0SXin Li     }
134*3e777be0SXin Li 
135*3e777be0SXin Li     // Deliberately ignore any unsupported operations requested by the options -
136*3e777be0SXin Li     // at this point we're being asked to prepare a model that we've already declared support for
137*3e777be0SXin Li     // and the operation indices may be different to those in getSupportedOperations anyway.
138*3e777be0SXin Li     std::set<unsigned int> unsupportedOperations;
139*3e777be0SXin Li     ModelToINetworkConverter<HalPolicy> modelConverter(options.GetBackends(),
140*3e777be0SXin Li                                                        model,
141*3e777be0SXin Li                                                        unsupportedOperations);
142*3e777be0SXin Li 
143*3e777be0SXin Li     if (modelConverter.GetConversionResult() != ConversionResult::Success)
144*3e777be0SXin Li     {
145*3e777be0SXin Li         FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb);
146*3e777be0SXin Li         return V1_3::ErrorStatus::NONE;
147*3e777be0SXin Li     }
148*3e777be0SXin Li 
149*3e777be0SXin Li     // Serialize the network graph to a .armnn file if an output directory
150*3e777be0SXin Li     // has been specified in the drivers' arguments.
151*3e777be0SXin Li     std::vector<uint8_t> dataCacheData;
152*3e777be0SXin Li     bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
153*3e777be0SXin Li     auto serializedNetworkFileName =
154*3e777be0SXin Li         SerializeNetwork(*modelConverter.GetINetwork(),
155*3e777be0SXin Li                          options.GetRequestInputsAndOutputsDumpDir(),
156*3e777be0SXin Li                          dataCacheData,
157*3e777be0SXin Li                          serializeToFile);
158*3e777be0SXin Li 
159*3e777be0SXin Li     // Optimize the network
160*3e777be0SXin Li     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
161*3e777be0SXin Li     armnn::OptimizerOptionsOpaque OptOptions;
162*3e777be0SXin Li     OptOptions.SetReduceFp32ToFp16(float32ToFloat16);
163*3e777be0SXin Li     OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());
164*3e777be0SXin Li 
165*3e777be0SXin Li     int cachedFd = -1;
166*3e777be0SXin Li     bool saveCachedNetwork = options.SaveCachedNetwork();
167*3e777be0SXin Li 
168*3e777be0SXin Li     unsigned int numberOfCachedModelFiles = 0;
169*3e777be0SXin Li     if (modelCacheHandle.size() > 0)
170*3e777be0SXin Li     {
171*3e777be0SXin Li         unsigned int index = 0;
172*3e777be0SXin Li         for (auto& backend : options.GetBackends())
173*3e777be0SXin Li         {
174*3e777be0SXin Li             // modelCacheHandle size should be equal to numberOfCachedModelFiles
175*3e777be0SXin Li             // modelCacheHandle vector should be in same order as backends
176*3e777be0SXin Li             auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
177*3e777be0SXin Li             if (numberOfCacheFiles > 0)
178*3e777be0SXin Li             {
179*3e777be0SXin Li                 numberOfCachedModelFiles += numberOfCacheFiles;
180*3e777be0SXin Li                 if (modelCacheHandle[index]->numFds == 1)
181*3e777be0SXin Li                 {
182*3e777be0SXin Li                     // For GpuAcc numberOfCachedFiles is 1
183*3e777be0SXin Li                     if (backend == armnn::Compute::GpuAcc)
184*3e777be0SXin Li                     {
185*3e777be0SXin Li                         cachedFd = modelCacheHandle[index]->data[0];
186*3e777be0SXin Li                         saveCachedNetwork = true;
187*3e777be0SXin Li                     }
188*3e777be0SXin Li                 }
189*3e777be0SXin Li                 index += numberOfCachedModelFiles;
190*3e777be0SXin Li             }
191*3e777be0SXin Li         }
192*3e777be0SXin Li     }
193*3e777be0SXin Li 
194*3e777be0SXin Li     armnn::BackendOptions gpuAcc("GpuAcc",
195*3e777be0SXin Li     {
196*3e777be0SXin Li         { "FastMathEnabled", options.IsFastMathEnabled() },
197*3e777be0SXin Li         { "SaveCachedNetwork", saveCachedNetwork },
198*3e777be0SXin Li         { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
199*3e777be0SXin Li         { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
200*3e777be0SXin Li         { "CachedFileDescriptor", cachedFd }
201*3e777be0SXin Li     });
202*3e777be0SXin Li 
203*3e777be0SXin Li     armnn::BackendOptions cpuAcc("CpuAcc",
204*3e777be0SXin Li     {
205*3e777be0SXin Li         { "FastMathEnabled", options.IsFastMathEnabled() },
206*3e777be0SXin Li         { "NumberOfThreads", options.GetNumberOfThreads() }
207*3e777be0SXin Li     });
208*3e777be0SXin Li     OptOptions.AddModelOption(gpuAcc);
209*3e777be0SXin Li     OptOptions.AddModelOption(cpuAcc);
210*3e777be0SXin Li 
211*3e777be0SXin Li     std::vector<std::string> errMessages;
212*3e777be0SXin Li     try
213*3e777be0SXin Li     {
214*3e777be0SXin Li         optNet = armnn::Optimize(*modelConverter.GetINetwork(),
215*3e777be0SXin Li                                  options.GetBackends(),
216*3e777be0SXin Li                                  runtime->GetDeviceSpec(),
217*3e777be0SXin Li                                  OptOptions,
218*3e777be0SXin Li                                  errMessages);
219*3e777be0SXin Li     }
220*3e777be0SXin Li     catch (std::exception& e)
221*3e777be0SXin Li     {
222*3e777be0SXin Li         std::stringstream message;
223*3e777be0SXin Li         message << "Exception (" << e.what() << ") caught from optimize.";
224*3e777be0SXin Li         FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
225*3e777be0SXin Li         return V1_3::ErrorStatus::NONE;
226*3e777be0SXin Li     }
227*3e777be0SXin Li 
228*3e777be0SXin Li     // Check that the optimized network is valid.
229*3e777be0SXin Li     if (!optNet)
230*3e777be0SXin Li     {
231*3e777be0SXin Li         std::stringstream message;
232*3e777be0SXin Li         message << "Invalid optimized network";
233*3e777be0SXin Li         for (const std::string& msg : errMessages)
234*3e777be0SXin Li         {
235*3e777be0SXin Li             message << "\n" << msg;
236*3e777be0SXin Li         }
237*3e777be0SXin Li         FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
238*3e777be0SXin Li         return V1_3::ErrorStatus::NONE;
239*3e777be0SXin Li     }
240*3e777be0SXin Li 
241*3e777be0SXin Li     // Export the optimized network graph to a dot file if an output dump directory
242*3e777be0SXin Li     // has been specified in the drivers' arguments.
243*3e777be0SXin Li     std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
244*3e777be0SXin Li                                                                options.GetRequestInputsAndOutputsDumpDir());
245*3e777be0SXin Li 
246*3e777be0SXin Li     // Load it into the runtime.
247*3e777be0SXin Li     armnn::NetworkId netId = 0;
248*3e777be0SXin Li     std::string msg;
249*3e777be0SXin Li     armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
250*3e777be0SXin Li                                                 MemorySource::Undefined,
251*3e777be0SXin Li                                                 MemorySource::Undefined,
252*3e777be0SXin Li                                                 options.IsGpuProfilingEnabled());
253*3e777be0SXin Li 
254*3e777be0SXin Li     auto numInputs  = getMainModel(model).inputIndexes.size();
255*3e777be0SXin Li     auto numOutputs = getMainModel(model).outputIndexes.size();
256*3e777be0SXin Li     try
257*3e777be0SXin Li     {
258*3e777be0SXin Li         if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
259*3e777be0SXin Li         {
260*3e777be0SXin Li             return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);
261*3e777be0SXin Li         }
262*3e777be0SXin Li     }
263*3e777be0SXin Li     catch (std::exception& e)
264*3e777be0SXin Li     {
265*3e777be0SXin Li         std::stringstream message;
266*3e777be0SXin Li         message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
267*3e777be0SXin Li         FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
268*3e777be0SXin Li         return V1_3::ErrorStatus::NONE;
269*3e777be0SXin Li     }
270*3e777be0SXin Li 
271*3e777be0SXin Li     // Now that we have a networkId for the graph rename the exported files to use it
272*3e777be0SXin Li     // so that we can associate the graph file and the input/output tensor exported files
273*3e777be0SXin Li     RenameExportedFiles(serializedNetworkFileName,
274*3e777be0SXin Li                         dotGraphFileName,
275*3e777be0SXin Li                         options.GetRequestInputsAndOutputsDumpDir(),
276*3e777be0SXin Li                         netId);
277*3e777be0SXin Li 
278*3e777be0SXin Li     std::unique_ptr<ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>> preparedModel(
279*3e777be0SXin Li             new ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>(
280*3e777be0SXin Li                     netId,
281*3e777be0SXin Li                     runtime.get(),
282*3e777be0SXin Li                     model,
283*3e777be0SXin Li                     options.GetRequestInputsAndOutputsDumpDir(),
284*3e777be0SXin Li                     options.IsGpuProfilingEnabled(),
285*3e777be0SXin Li                     priority,
286*3e777be0SXin Li                     options.isAsyncModelExecutionEnabled(),
287*3e777be0SXin Li                     options.getNoOfArmnnThreads(),
288*3e777be0SXin Li                     options.isImportEnabled(),
289*3e777be0SXin Li                     options.isExportEnabled()));
290*3e777be0SXin Li 
291*3e777be0SXin Li     // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
292*3e777be0SXin Li     // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
293*3e777be0SXin Li     // Only run this if the GpuAcc backend has been added to options
294*3e777be0SXin Li     if (std::find(options.GetBackends().begin(),
295*3e777be0SXin Li                   options.GetBackends().end(),
296*3e777be0SXin Li                   armnn::Compute::GpuAcc) != options.GetBackends().end())
297*3e777be0SXin Li     {
298*3e777be0SXin Li         if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
299*3e777be0SXin Li         {
300*3e777be0SXin Li             return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
301*3e777be0SXin Li         }
302*3e777be0SXin Li 
303*3e777be0SXin Li         if (clTunedParameters &&
304*3e777be0SXin Li             options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
305*3e777be0SXin Li         {
306*3e777be0SXin Li             // Now that we've done one inference the CL kernel parameters will have been tuned,
307*3e777be0SXin Li             // so save the updated file.
308*3e777be0SXin Li             try
309*3e777be0SXin Li             {
310*3e777be0SXin Li                 clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
311*3e777be0SXin Li             }
312*3e777be0SXin Li             catch (std::exception& error)
313*3e777be0SXin Li             {
314*3e777be0SXin Li                 ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
315*3e777be0SXin Li                       options.GetClTunedParametersFile().c_str(), error.what());
316*3e777be0SXin Li             }
317*3e777be0SXin Li         }
318*3e777be0SXin Li     }
319*3e777be0SXin Li     size_t hashValue = 0;
320*3e777be0SXin Li     // Cache the model
321*3e777be0SXin Li     if (dataCacheHandle.size() > 0)
322*3e777be0SXin Li     {
323*3e777be0SXin Li         // Cache the Arm NN model
324*3e777be0SXin Li         if (dataCacheHandle.size() != 1)
325*3e777be0SXin Li         {
326*3e777be0SXin Li             NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
327*3e777be0SXin Li             return V1_3::ErrorStatus::NONE;
328*3e777be0SXin Li         }
329*3e777be0SXin Li 
330*3e777be0SXin Li         if (dataCacheHandle[0]->numFds != 1)
331*3e777be0SXin Li         {
332*3e777be0SXin Li             ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, numFds != 1.");
333*3e777be0SXin Li             NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
334*3e777be0SXin Li             return V1_3::ErrorStatus::NONE;
335*3e777be0SXin Li         }
336*3e777be0SXin Li 
337*3e777be0SXin Li         if (dataCacheHandle[0]->data[0] < 0)
338*3e777be0SXin Li         {
339*3e777be0SXin Li             ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, fd < 0");
340*3e777be0SXin Li             NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
341*3e777be0SXin Li             return V1_3::ErrorStatus::NONE;
342*3e777be0SXin Li         }
343*3e777be0SXin Li 
344*3e777be0SXin Li         int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
345*3e777be0SXin Li         if (dataCacheFileAccessMode != O_RDWR)
346*3e777be0SXin Li         {
347*3e777be0SXin Li             ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Invalid Access Mode.");
348*3e777be0SXin Li             NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
349*3e777be0SXin Li             return V1_3::ErrorStatus::NONE;
350*3e777be0SXin Li         }
351*3e777be0SXin Li 
352*3e777be0SXin Li         write(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size());
353*3e777be0SXin Li         hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
354*3e777be0SXin Li     }
355*3e777be0SXin Li 
356*3e777be0SXin Li     // Cache the model data
357*3e777be0SXin Li     if (modelCacheHandle.size() > 0)
358*3e777be0SXin Li     {
359*3e777be0SXin Li         if (modelCacheHandle.size() != numberOfCachedModelFiles)
360*3e777be0SXin Li         {
361*3e777be0SXin Li             NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
362*3e777be0SXin Li             return V1_3::ErrorStatus::NONE;
363*3e777be0SXin Li         }
364*3e777be0SXin Li 
365*3e777be0SXin Li         for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
366*3e777be0SXin Li         {
367*3e777be0SXin Li             if (modelCacheHandle[i]->numFds == 1)
368*3e777be0SXin Li             {
369*3e777be0SXin Li                 int modelCacheFileAccessMode = fcntl(modelCacheHandle[i]->data[0], F_GETFL) & O_ACCMODE;
370*3e777be0SXin Li                 if (modelCacheFileAccessMode != O_RDONLY)
371*3e777be0SXin Li                 {
372*3e777be0SXin Li                     struct stat statBuffer;
373*3e777be0SXin Li                     if (fstat(modelCacheHandle[i]->data[0], &statBuffer) == 0)
374*3e777be0SXin Li                     {
375*3e777be0SXin Li                         long modelDataSize = statBuffer.st_size;
376*3e777be0SXin Li                         if (modelDataSize > 0)
377*3e777be0SXin Li                         {
378*3e777be0SXin Li                             std::vector<uint8_t> modelData(modelDataSize);
379*3e777be0SXin Li                             pread(modelCacheHandle[i]->data[0], modelData.data(), modelData.size(), 0);
380*3e777be0SXin Li                             hashValue ^= CacheDataHandlerInstance().Hash(modelData);
381*3e777be0SXin Li                         }
382*3e777be0SXin Li                     }
383*3e777be0SXin Li                 }
384*3e777be0SXin Li             }
385*3e777be0SXin Li         }
386*3e777be0SXin Li     }
387*3e777be0SXin Li     if (hashValue != 0)
388*3e777be0SXin Li     {
389*3e777be0SXin Li         CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size());
390*3e777be0SXin Li     }
391*3e777be0SXin Li 
392*3e777be0SXin Li     NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
393*3e777be0SXin Li 
394*3e777be0SXin Li     ALOGV("ArmnnDriverImpl::prepareModel cache timing = %lld µs", std::chrono::duration_cast<std::chrono::microseconds>
395*3e777be0SXin Li          (std::chrono::system_clock::now() - prepareModelTimepoint).count());
396*3e777be0SXin Li 
397*3e777be0SXin Li 
398*3e777be0SXin Li     return V1_3::ErrorStatus::NONE;
399*3e777be0SXin Li }
400*3e777be0SXin Li 
prepareModelFromCache_1_3(const armnn::IRuntimePtr & runtime,const DriverOptions & options,const android::hardware::hidl_vec<android::hardware::hidl_handle> & modelCacheHandle,const android::hardware::hidl_vec<android::hardware::hidl_handle> & dataCacheHandle,const HidlToken & token,const android::sp<V1_3::IPreparedModelCallback> & cb)401*3e777be0SXin Li Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareModelFromCache_1_3(
402*3e777be0SXin Li     const armnn::IRuntimePtr& runtime,
403*3e777be0SXin Li     const DriverOptions& options,
404*3e777be0SXin Li     const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
405*3e777be0SXin Li     const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
406*3e777be0SXin Li     const HidlToken& token,
407*3e777be0SXin Li     const android::sp<V1_3::IPreparedModelCallback>& cb)
408*3e777be0SXin Li {
409*3e777be0SXin Li     ALOGV("ArmnnDriverImpl::prepareModelFromCache_1_3()");
410*3e777be0SXin Li     std::chrono::time_point<std::chrono::system_clock> modelFromCacheTimepoint = std::chrono::system_clock::now();
411*3e777be0SXin Li 
412*3e777be0SXin Li     if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
413*3e777be0SXin Li     {
414*3e777be0SXin Li         cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
415*3e777be0SXin Li         return V1_3::ErrorStatus::GENERAL_FAILURE;
416*3e777be0SXin Li     }
417*3e777be0SXin Li 
418*3e777be0SXin Li     if (cb.get() == nullptr)
419*3e777be0SXin Li     {
420*3e777be0SXin Li         ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid callback passed to prepareModelFromCache_1_3");
421*3e777be0SXin Li         return V1_3::ErrorStatus::INVALID_ARGUMENT;
422*3e777be0SXin Li     }
423*3e777be0SXin Li 
424*3e777be0SXin Li     if (!runtime)
425*3e777be0SXin Li     {
426*3e777be0SXin Li         ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Device unavailable");
427*3e777be0SXin Li         return V1_3::ErrorStatus::DEVICE_UNAVAILABLE;
428*3e777be0SXin Li     }
429*3e777be0SXin Li 
430*3e777be0SXin Li     // DataCacheHandle size should always be 1
431*3e777be0SXin Li     // Arm NN model
432*3e777be0SXin Li     if (dataCacheHandle.size() != 1)
433*3e777be0SXin Li     {
434*3e777be0SXin Li         cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
435*3e777be0SXin Li         return V1_3::ErrorStatus::GENERAL_FAILURE;
436*3e777be0SXin Li     }
437*3e777be0SXin Li 
438*3e777be0SXin Li     // Check if model files cached they match the expected value
439*3e777be0SXin Li     unsigned int numberOfCachedModelFiles = 0;
440*3e777be0SXin Li     for (auto& backend : options.GetBackends())
441*3e777be0SXin Li     {
442*3e777be0SXin Li         numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
443*3e777be0SXin Li     }
444*3e777be0SXin Li     if (modelCacheHandle.size() != numberOfCachedModelFiles)
445*3e777be0SXin Li     {
446*3e777be0SXin Li         cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
447*3e777be0SXin Li         return V1_3::ErrorStatus::GENERAL_FAILURE;
448*3e777be0SXin Li     }
449*3e777be0SXin Li 
450*3e777be0SXin Li     if (dataCacheHandle[0]->numFds != 1)
451*3e777be0SXin Li     {
452*3e777be0SXin Li         ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the cache data, numFds != 1.");
453*3e777be0SXin Li         cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
454*3e777be0SXin Li         return V1_3::ErrorStatus::GENERAL_FAILURE;
455*3e777be0SXin Li     }
456*3e777be0SXin Li 
457*3e777be0SXin Li     if (dataCacheHandle[0]->data[0] < 0)
458*3e777be0SXin Li     {
459*3e777be0SXin Li         ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the cache data, fd < 0");
460*3e777be0SXin Li         cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
461*3e777be0SXin Li         return V1_3::ErrorStatus::GENERAL_FAILURE;
462*3e777be0SXin Li     }
463*3e777be0SXin Li 
464*3e777be0SXin Li     int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
465*3e777be0SXin Li     if (dataCacheFileAccessMode != O_RDWR)
466*3e777be0SXin Li     {
467*3e777be0SXin Li         cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
468*3e777be0SXin Li         return V1_3::ErrorStatus::GENERAL_FAILURE;
469*3e777be0SXin Li     }
470*3e777be0SXin Li 
471*3e777be0SXin Li     auto dataSize = CacheDataHandlerInstance().GetCacheSize(token);
472*3e777be0SXin Li     if (dataSize == 0)
473*3e777be0SXin Li     {
474*3e777be0SXin Li         ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!");
475*3e777be0SXin Li         cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
476*3e777be0SXin Li         return V1_3::ErrorStatus::GENERAL_FAILURE;
477*3e777be0SXin Li     }
478*3e777be0SXin Li 
479*3e777be0SXin Li     int offset = 0;
480*3e777be0SXin Li     {
481*3e777be0SXin Li         struct stat statBuffer;
482*3e777be0SXin Li         if (fstat(dataCacheHandle[0]->data[0], &statBuffer) == 0)
483*3e777be0SXin Li         {
484*3e777be0SXin Li             unsigned long bufferSize = statBuffer.st_size;
485*3e777be0SXin Li             if (bufferSize != dataSize)
486*3e777be0SXin Li             {
487*3e777be0SXin Li                 ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!");
488*3e777be0SXin Li                 cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
489*3e777be0SXin Li                 return V1_3::ErrorStatus::GENERAL_FAILURE;
490*3e777be0SXin Li             }
491*3e777be0SXin Li         }
492*3e777be0SXin Li     }
493*3e777be0SXin Li     std::vector<uint8_t> dataCacheData(dataSize);
494*3e777be0SXin Li     pread(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size(), offset);
495*3e777be0SXin Li     auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
496*3e777be0SXin Li 
497*3e777be0SXin Li     int gpuAccCachedFd = -1;
498*3e777be0SXin Li     bool saveCachedNetwork = false;
499*3e777be0SXin Li     if (modelCacheHandle.size() > 0)
500*3e777be0SXin Li     {
501*3e777be0SXin Li         unsigned int index = 0;
502*3e777be0SXin Li         for (auto& backend : options.GetBackends())
503*3e777be0SXin Li         {
504*3e777be0SXin Li             // modelCacheHandle size should be equal to numberOfCachedModelFiles
505*3e777be0SXin Li             // modelCacheHandle vector should be in same order as backends
506*3e777be0SXin Li             auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
507*3e777be0SXin Li             if (numberOfCacheFiles > 0)
508*3e777be0SXin Li             {
509*3e777be0SXin Li                 if (modelCacheHandle[index]->numFds != 1)
510*3e777be0SXin Li                 {
511*3e777be0SXin Li                     ALOGW(
512*3e777be0SXin Li                        "ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the model cache, numFds != 1.");
513*3e777be0SXin Li                     cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
514*3e777be0SXin Li                     return V1_3::ErrorStatus::GENERAL_FAILURE;
515*3e777be0SXin Li                 }
516*3e777be0SXin Li                 auto cachedFd = modelCacheHandle[index]->data[0];
517*3e777be0SXin Li 
518*3e777be0SXin Li                 int modelCacheFileAccessMode = fcntl(cachedFd, F_GETFL) & O_ACCMODE;
519*3e777be0SXin Li                 if (modelCacheFileAccessMode != O_RDWR)
520*3e777be0SXin Li                 {
521*3e777be0SXin Li                     cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
522*3e777be0SXin Li                     return V1_3::ErrorStatus::GENERAL_FAILURE;
523*3e777be0SXin Li                 }
524*3e777be0SXin Li 
525*3e777be0SXin Li                 struct stat statBuffer;
526*3e777be0SXin Li                 if (cachedFd != -1 && fstat(cachedFd, &statBuffer) == 0)
527*3e777be0SXin Li                 {
528*3e777be0SXin Li                     long modelDataSize = statBuffer.st_size;
529*3e777be0SXin Li                     if (modelDataSize <= 0)
530*3e777be0SXin Li                     {
531*3e777be0SXin Li                         ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Wrong cached model size!");
532*3e777be0SXin Li                         cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
533*3e777be0SXin Li                         return V1_3::ErrorStatus::NONE;
534*3e777be0SXin Li                     }
535*3e777be0SXin Li                     std::vector<uint8_t> modelData(modelDataSize);
536*3e777be0SXin Li                     pread(cachedFd, modelData.data(), modelData.size(), 0);
537*3e777be0SXin Li                     hashValue ^= CacheDataHandlerInstance().Hash(modelData);
538*3e777be0SXin Li 
539*3e777be0SXin Li                     // For GpuAcc numberOfCachedFiles is 1
540*3e777be0SXin Li                     if (backend == armnn::Compute::GpuAcc)
541*3e777be0SXin Li                     {
542*3e777be0SXin Li                         gpuAccCachedFd = cachedFd;
543*3e777be0SXin Li                     }
544*3e777be0SXin Li                 }
545*3e777be0SXin Li                 index += numberOfCacheFiles;
546*3e777be0SXin Li             }
547*3e777be0SXin Li         }
548*3e777be0SXin Li     }
549*3e777be0SXin Li 
550*3e777be0SXin Li     if (!CacheDataHandlerInstance().Validate(token, hashValue, dataCacheData.size()))
551*3e777be0SXin Li     {
552*3e777be0SXin Li         ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: ValidateHash() failed!");
553*3e777be0SXin Li         cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
554*3e777be0SXin Li         return V1_3::ErrorStatus::GENERAL_FAILURE;
555*3e777be0SXin Li     }
556*3e777be0SXin Li 
557*3e777be0SXin Li     // Deserialize the network..
558*3e777be0SXin Li     armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){});
559*3e777be0SXin Li     try
560*3e777be0SXin Li     {
561*3e777be0SXin Li         network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
562*3e777be0SXin Li     }
563*3e777be0SXin Li     catch (std::exception&)
564*3e777be0SXin Li     {
565*3e777be0SXin Li         ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Exception caught from Deserializer!");
566*3e777be0SXin Li         cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
567*3e777be0SXin Li         return V1_3::ErrorStatus::GENERAL_FAILURE;
568*3e777be0SXin Li     }
569*3e777be0SXin Li 
570*3e777be0SXin Li     // Optimize the network
571*3e777be0SXin Li     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
572*3e777be0SXin Li     armnn::OptimizerOptionsOpaque OptOptions;
573*3e777be0SXin Li     OptOptions.SetReduceFp32ToFp16(options.GetFp16Enabled());
574*3e777be0SXin Li     OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());
575*3e777be0SXin Li 
576*3e777be0SXin Li     armnn::BackendOptions gpuAcc("GpuAcc",
577*3e777be0SXin Li                                  {
578*3e777be0SXin Li                                          {"FastMathEnabled",       options.IsFastMathEnabled()},
579*3e777be0SXin Li                                          {"SaveCachedNetwork",     saveCachedNetwork},
580*3e777be0SXin Li                                          {"CachedNetworkFilePath", options.GetCachedNetworkFilePath()},
581*3e777be0SXin Li                                          {"MLGOTuningFilePath",    options.GetClMLGOTunedParametersFile()},
582*3e777be0SXin Li                                          {"CachedFileDescriptor",  gpuAccCachedFd}
583*3e777be0SXin Li                                  });
584*3e777be0SXin Li 
585*3e777be0SXin Li     armnn::BackendOptions cpuAcc("CpuAcc",
586*3e777be0SXin Li                                  {
587*3e777be0SXin Li                                          {"FastMathEnabled", options.IsFastMathEnabled()},
588*3e777be0SXin Li                                          {"NumberOfThreads", options.GetNumberOfThreads()}
589*3e777be0SXin Li                                  });
590*3e777be0SXin Li     OptOptions.AddModelOption(gpuAcc);
591*3e777be0SXin Li     OptOptions.AddModelOption(cpuAcc);
592*3e777be0SXin Li 
593*3e777be0SXin Li     std::vector<std::string> errMessages;
594*3e777be0SXin Li     try
595*3e777be0SXin Li     {
596*3e777be0SXin Li         optNet = armnn::Optimize(*network.get(),
597*3e777be0SXin Li                                  options.GetBackends(),
598*3e777be0SXin Li                                  runtime->GetDeviceSpec(),
599*3e777be0SXin Li                                  OptOptions,
600*3e777be0SXin Li                                  errMessages);
601*3e777be0SXin Li     }
602*3e777be0SXin Li     catch (std::exception& e)
603*3e777be0SXin Li     {
604*3e777be0SXin Li         std::stringstream message;
605*3e777be0SXin Li         message << "Exception (" << e.what() << ") caught from optimize.";
606*3e777be0SXin Li         FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
607*3e777be0SXin Li         return V1_3::ErrorStatus::NONE;
608*3e777be0SXin Li     }
609*3e777be0SXin Li 
610*3e777be0SXin Li     // Check that the optimized network is valid.
611*3e777be0SXin Li     if (!optNet)
612*3e777be0SXin Li     {
613*3e777be0SXin Li         std::stringstream message;
614*3e777be0SXin Li         message << "Invalid optimized network";
615*3e777be0SXin Li         for (const std::string& msg : errMessages)
616*3e777be0SXin Li         {
617*3e777be0SXin Li             message << "\n" << msg;
618*3e777be0SXin Li         }
619*3e777be0SXin Li         FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
620*3e777be0SXin Li         return V1_3::ErrorStatus::NONE;
621*3e777be0SXin Li     }
622*3e777be0SXin Li 
623*3e777be0SXin Li     // Export the optimized network graph to a dot file if an output dump directory
624*3e777be0SXin Li     // has been specified in the drivers' arguments.
625*3e777be0SXin Li     std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
626*3e777be0SXin Li                                                                options.GetRequestInputsAndOutputsDumpDir());
627*3e777be0SXin Li 
628*3e777be0SXin Li     // Load it into the runtime.
629*3e777be0SXin Li     armnn::NetworkId netId = 0;
630*3e777be0SXin Li     std::string msg;
631*3e777be0SXin Li     armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
632*3e777be0SXin Li                                                 MemorySource::Undefined,
633*3e777be0SXin Li                                                 MemorySource::Undefined,
634*3e777be0SXin Li                                                 options.IsGpuProfilingEnabled());
635*3e777be0SXin Li 
636*3e777be0SXin Li     try
637*3e777be0SXin Li     {
638*3e777be0SXin Li         if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
639*3e777be0SXin Li         {
640*3e777be0SXin Li             return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, msg, cb);
641*3e777be0SXin Li         }
642*3e777be0SXin Li     }
643*3e777be0SXin Li     catch (std::exception& e)
644*3e777be0SXin Li     {
645*3e777be0SXin Li         std::stringstream message;
646*3e777be0SXin Li         message << "Exception (" << e.what() << ") caught from LoadNetwork.";
647*3e777be0SXin Li         FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
648*3e777be0SXin Li         return V1_3::ErrorStatus::NONE;
649*3e777be0SXin Li     }
650*3e777be0SXin Li 
651*3e777be0SXin Li     std::unique_ptr<ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>> preparedModel(
652*3e777be0SXin Li             new ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>(netId,
653*3e777be0SXin Li                                                            runtime.get(),
654*3e777be0SXin Li                                                            options.GetRequestInputsAndOutputsDumpDir(),
655*3e777be0SXin Li                                                            options.IsGpuProfilingEnabled(),
656*3e777be0SXin Li                                                            V1_3::Priority::MEDIUM,
657*3e777be0SXin Li                                                            options.isAsyncModelExecutionEnabled(),
658*3e777be0SXin Li                                                            options.getNoOfArmnnThreads(),
659*3e777be0SXin Li                                                            options.isImportEnabled(),
660*3e777be0SXin Li                                                            options.isExportEnabled(),
661*3e777be0SXin Li                                                            true));
662*3e777be0SXin Li 
663*3e777be0SXin Li     NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
664*3e777be0SXin Li 
665*3e777be0SXin Li     ALOGV("ArmnnDriverImpl::prepareModelFromCache timing = %lld µs",
666*3e777be0SXin Li           std::chrono::duration_cast<std::chrono::microseconds>
667*3e777be0SXin Li           (std::chrono::system_clock::now() - modelFromCacheTimepoint).count());
668*3e777be0SXin Li 
669*3e777be0SXin Li     return V1_3::ErrorStatus::NONE;
670*3e777be0SXin Li }
671*3e777be0SXin Li 
getCapabilities_1_3(const armnn::IRuntimePtr & runtime,V1_3::IDevice::getCapabilities_1_3_cb cb)672*3e777be0SXin Li Return<void> ArmnnDriverImpl::getCapabilities_1_3(const armnn::IRuntimePtr& runtime,
673*3e777be0SXin Li                                                   V1_3::IDevice::getCapabilities_1_3_cb cb)
674*3e777be0SXin Li {
675*3e777be0SXin Li     ALOGV("hal_1_3::ArmnnDriverImpl::getCapabilities()");
676*3e777be0SXin Li 
677*3e777be0SXin Li     V1_3::Capabilities capabilities;
678*3e777be0SXin Li 
679*3e777be0SXin Li     float defaultValue = .1f;
680*3e777be0SXin Li 
681*3e777be0SXin Li     if (runtime)
682*3e777be0SXin Li     {
683*3e777be0SXin Li         capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime =
684*3e777be0SXin Li                 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
685*3e777be0SXin Li 
686*3e777be0SXin Li         capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage =
687*3e777be0SXin Li                 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
688*3e777be0SXin Li 
689*3e777be0SXin Li         capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime =
690*3e777be0SXin Li                 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
691*3e777be0SXin Li 
692*3e777be0SXin Li         capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage =
693*3e777be0SXin Li                 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
694*3e777be0SXin Li 
695*3e777be0SXin Li         capabilities.ifPerformance.execTime =
696*3e777be0SXin Li                 ParseSystemProperty(g_ifPerformanceExecTime, defaultValue);
697*3e777be0SXin Li 
698*3e777be0SXin Li         capabilities.ifPerformance.powerUsage =
699*3e777be0SXin Li                 ParseSystemProperty(g_ifPerformancePowerUsage, defaultValue);
700*3e777be0SXin Li 
701*3e777be0SXin Li         capabilities.whilePerformance.execTime =
702*3e777be0SXin Li                 ParseSystemProperty(g_whilePerformanceExecTime, defaultValue);
703*3e777be0SXin Li 
704*3e777be0SXin Li         capabilities.whilePerformance.powerUsage =
705*3e777be0SXin Li                 ParseSystemProperty(g_whilePerformancePowerUsage, defaultValue);
706*3e777be0SXin Li 
707*3e777be0SXin Li         // Set the base value for all operand types
708*3e777be0SXin Li         capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({FLT_MAX, FLT_MAX});
709*3e777be0SXin Li 
710*3e777be0SXin Li         // Load supported operand types
711*3e777be0SXin Li         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT32,
712*3e777be0SXin Li                 {
713*3e777be0SXin Li                     .execTime = ParseSystemProperty(g_OperandTypeTensorFloat32PerformanceExecTime, defaultValue),
714*3e777be0SXin Li                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat32PerformancePowerUsage, defaultValue)
715*3e777be0SXin Li                 });
716*3e777be0SXin Li 
717*3e777be0SXin Li         update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT32,
718*3e777be0SXin Li                 {
719*3e777be0SXin Li                     .execTime = ParseSystemProperty(g_OperandTypeFloat32PerformanceExecTime, defaultValue),
720*3e777be0SXin Li                     .powerUsage = ParseSystemProperty(g_OperandTypeFloat32PerformancePowerUsage, defaultValue)
721*3e777be0SXin Li                 });
722*3e777be0SXin Li 
723*3e777be0SXin Li         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT16,
724*3e777be0SXin Li                 {
725*3e777be0SXin Li                     .execTime = ParseSystemProperty(g_OperandTypeTensorFloat16PerformanceExecTime, defaultValue),
726*3e777be0SXin Li                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat16PerformancePowerUsage, defaultValue)
727*3e777be0SXin Li                 });
728*3e777be0SXin Li 
729*3e777be0SXin Li         update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT16,
730*3e777be0SXin Li                 {
731*3e777be0SXin Li                     .execTime = ParseSystemProperty(g_OperandTypeFloat16PerformanceExecTime, defaultValue),
732*3e777be0SXin Li                     .powerUsage = ParseSystemProperty(g_OperandTypeFloat16PerformancePowerUsage, defaultValue)
733*3e777be0SXin Li                 });
734*3e777be0SXin Li 
735*3e777be0SXin Li         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM,
736*3e777be0SXin Li                 {
737*3e777be0SXin Li                     .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformanceExecTime, defaultValue),
738*3e777be0SXin Li                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformancePowerUsage, defaultValue)
739*3e777be0SXin Li                 });
740*3e777be0SXin Li 
741*3e777be0SXin Li         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM,
742*3e777be0SXin Li                 {
743*3e777be0SXin Li                     .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformanceExecTime, defaultValue),
744*3e777be0SXin Li                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformancePowerUsage, defaultValue)
745*3e777be0SXin Li                 });
746*3e777be0SXin Li         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
747*3e777be0SXin Li                {
748*3e777be0SXin Li                    .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime,
749*3e777be0SXin Li                    defaultValue),
750*3e777be0SXin Li                    .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage,
751*3e777be0SXin Li                    defaultValue)
752*3e777be0SXin Li                });
753*3e777be0SXin Li 
754*3e777be0SXin Li         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT16_SYMM,
755*3e777be0SXin Li                 {
756*3e777be0SXin Li                     .execTime = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformanceExecTime, defaultValue),
757*3e777be0SXin Li                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformancePowerUsage, defaultValue)
758*3e777be0SXin Li                 });
759*3e777be0SXin Li 
760*3e777be0SXin Li         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
761*3e777be0SXin Li                {
762*3e777be0SXin Li                    .execTime =
763*3e777be0SXin Li                    ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime, defaultValue),
764*3e777be0SXin Li                    .powerUsage =
765*3e777be0SXin Li                    ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage, defaultValue)
766*3e777be0SXin Li                });
767*3e777be0SXin Li 
768*3e777be0SXin Li         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_INT32,
769*3e777be0SXin Li                 {
770*3e777be0SXin Li                     .execTime = ParseSystemProperty(g_OperandTypeTensorInt32PerformanceExecTime, defaultValue),
771*3e777be0SXin Li                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorInt32PerformancePowerUsage, defaultValue)
772*3e777be0SXin Li                 });
773*3e777be0SXin Li 
774*3e777be0SXin Li         update(&capabilities.operandPerformance, V1_3::OperandType::INT32,
775*3e777be0SXin Li                 {
776*3e777be0SXin Li                     .execTime = ParseSystemProperty(g_OperandTypeInt32PerformanceExecTime, defaultValue),
777*3e777be0SXin Li                     .powerUsage = ParseSystemProperty(g_OperandTypeInt32PerformancePowerUsage, defaultValue)
778*3e777be0SXin Li                 });
779*3e777be0SXin Li 
780*3e777be0SXin Li         cb(V1_3::ErrorStatus::NONE, capabilities);
781*3e777be0SXin Li     }
782*3e777be0SXin Li     else
783*3e777be0SXin Li     {
784*3e777be0SXin Li         capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime   = 0;
785*3e777be0SXin Li         capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage = 0;
786*3e777be0SXin Li         capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime   = 0;
787*3e777be0SXin Li         capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage = 0;
788*3e777be0SXin Li         capabilities.ifPerformance.execTime      = 0;
789*3e777be0SXin Li         capabilities.ifPerformance.powerUsage    = 0;
790*3e777be0SXin Li         capabilities.whilePerformance.execTime   = 0;
791*3e777be0SXin Li         capabilities.whilePerformance.powerUsage = 0;
792*3e777be0SXin Li 
793*3e777be0SXin Li         // Set the base value for all operand types
794*3e777be0SXin Li         capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({0.f, 0.0f});
795*3e777be0SXin Li 
796*3e777be0SXin Li         cb(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, capabilities);
797*3e777be0SXin Li     }
798*3e777be0SXin Li 
799*3e777be0SXin Li     return Void();
800*3e777be0SXin Li }
801*3e777be0SXin Li 
802*3e777be0SXin Li } // namespace hal_1_3
803*3e777be0SXin Li } // namespace armnn_driver