xref: /aosp_15_r20/external/android-nn-driver/1.2/ArmnnDriverImpl.cpp (revision 3e777be0405cee09af5d5785ff37f7cfb5bee59a)
1 //
2 // Copyright © 2017, 2023 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "ArmnnDriverImpl.hpp"
7 #include "../ArmnnPreparedModel_1_2.hpp"
8 #include "../ModelToINetworkConverter.hpp"
9 #include "../SystemPropertiesUtils.hpp"
10 
11 #include <armnnDeserializer/IDeserializer.hpp>
12 
13 #include <log/log.h>
14 #include <sys/stat.h>
15 #include <chrono>
16 
17 namespace
18 {
19 
20 const char *g_RelaxedFloat32toFloat16PerformanceExecTime    = "ArmNN.relaxedFloat32toFloat16Performance.execTime";
21 const char *g_RelaxedFloat32toFloat16PerformancePowerUsage  = "ArmNN.relaxedFloat32toFloat16Performance.powerUsage";
22 
23 const char *g_OperandTypeTensorFloat32PerformanceExecTime   = "Armnn.operandTypeTensorFloat32Performance.execTime";
24 const char *g_OperandTypeTensorFloat32PerformancePowerUsage = "Armnn.operandTypeTensorFloat32Performance.powerUsage";
25 
26 const char *g_OperandTypeFloat32PerformanceExecTime         = "Armnn.operandTypeFloat32Performance.execTime";
27 const char *g_OperandTypeFloat32PerformancePowerUsage       = "Armnn.operandTypeFloat32Performance.powerUsage";
28 
29 const char *g_OperandTypeTensorFloat16PerformanceExecTime   = "Armnn.operandTypeTensorFloat16Performance.execTime";
30 const char *g_OperandTypeTensorFloat16PerformancePowerUsage = "Armnn.operandTypeTensorFloat16Performance.powerUsage";
31 
32 const char *g_OperandTypeFloat16PerformanceExecTime         = "Armnn.operandTypeFloat16Performance.execTime";
33 const char *g_OperandTypeFloat16PerformancePowerUsage       = "Armnn.operandTypeFloat16Performance.powerUsage";
34 
35 const char *g_OperandTypeTensorQuant8AsymmPerformanceExecTime =
36         "Armnn.operandTypeTensorQuant8AsymmPerformance.execTime";
37 const char *g_OperandTypeTensorQuant8AsymmPerformancePowerUsage =
38         "Armnn.operandTypeTensorQuant8AsymmPerformance.powerUsage";
39 
40 const char *g_OperandTypeTensorQuant16SymmPerformanceExecTime =
41         "Armnn.operandTypeTensorQuant16SymmPerformance.execTime";
42 const char *g_OperandTypeTensorQuant16SymmPerformancePowerUsage =
43         "Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage";
44 
45 const char *g_OperandTypeTensorQuant8SymmPerformanceExecTime =
46         "Armnn.operandTypeTensorQuant8SymmPerformance.execTime";
47 const char *g_OperandTypeTensorQuant8SymmPerformancePowerUsage =
48         "Armnn.operandTypeTensorQuant8SymmPerformance.powerUsage";
49 
50 const char *g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime =
51     "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.execTime";
52 const char *g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage =
53     "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.powerUsage";
54 
55 
56 const char *g_OperandTypeTensorInt32PerformanceExecTime     = "Armnn.operandTypeTensorInt32Performance.execTime";
57 const char *g_OperandTypeTensorInt32PerformancePowerUsage   = "Armnn.operandTypeTensorInt32Performance.powerUsage";
58 
59 const char *g_OperandTypeInt32PerformanceExecTime           = "Armnn.operandTypeInt32Performance.execTime";
60 const char *g_OperandTypeInt32PerformancePowerUsage         = "Armnn.operandTypeInt32Performance.powerUsage";
61 
62 
NotifyCallbackAndCheck(const android::sp<V1_2::IPreparedModelCallback> & callback,V1_0::ErrorStatus errorStatus,const android::sp<V1_2::IPreparedModel> & preparedModelPtr)63 void NotifyCallbackAndCheck(const android::sp<V1_2::IPreparedModelCallback>& callback,
64                             V1_0::ErrorStatus errorStatus,
65                             const android::sp<V1_2::IPreparedModel>& preparedModelPtr)
66 {
67     Return<void> returned = callback->notify_1_2(errorStatus, preparedModelPtr);
68     // This check is required, if the callback fails and it isn't checked it will bring down the service
69     if (!returned.isOk())
70     {
71         ALOGE("ArmnnDriverImpl::prepareModel: hidl callback failed to return properly: %s ",
72               returned.description().c_str());
73     }
74 }
75 
FailPrepareModel(V1_0::ErrorStatus error,const std::string & message,const android::sp<V1_2::IPreparedModelCallback> & callback)76 Return<V1_0::ErrorStatus> FailPrepareModel(V1_0::ErrorStatus error,
77                                            const std::string& message,
78                                            const android::sp<V1_2::IPreparedModelCallback>& callback)
79 {
80     ALOGW("ArmnnDriverImpl::prepareModel: %s", message.c_str());
81     NotifyCallbackAndCheck(callback, error, nullptr);
82     return error;
83 }
84 
85 } // anonymous namespace
86 
87 namespace armnn_driver
88 {
89 namespace hal_1_2
90 {
91 
prepareArmnnModel_1_2(const armnn::IRuntimePtr & runtime,const armnn::IGpuAccTunedParametersPtr & clTunedParameters,const DriverOptions & options,const V1_2::Model & model,const android::hardware::hidl_vec<android::hardware::hidl_handle> & modelCacheHandle,const android::hardware::hidl_vec<android::hardware::hidl_handle> & dataCacheHandle,const HidlToken & token,const android::sp<V1_2::IPreparedModelCallback> & cb,bool float32ToFloat16)92 Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(
93        const armnn::IRuntimePtr& runtime,
94        const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
95        const DriverOptions& options,
96        const V1_2::Model& model,
97        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
98        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
99        const HidlToken& token,
100        const android::sp<V1_2::IPreparedModelCallback>& cb,
101        bool float32ToFloat16)
102 {
103     ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_2()");
104 
105     std::chrono::time_point<std::chrono::system_clock> prepareModelTimepoint = std::chrono::system_clock::now();
106 
107     if (cb.get() == nullptr)
108     {
109         ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");
110         return V1_0::ErrorStatus::INVALID_ARGUMENT;
111     }
112 
113     if (!runtime)
114     {
115         return FailPrepareModel(V1_0::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb);
116     }
117 
118     if (!android::nn::validateModel(model))
119     {
120         return FailPrepareModel(V1_0::ErrorStatus::INVALID_ARGUMENT, "Invalid model passed as input", cb);
121     }
122 
123     // Deliberately ignore any unsupported operations requested by the options -
124     // at this point we're being asked to prepare a model that we've already declared support for
125     // and the operation indices may be different to those in getSupportedOperations anyway.
126     std::set<unsigned int> unsupportedOperations;
127     ModelToINetworkConverter<HalPolicy> modelConverter(options.GetBackends(),
128                                                        model,
129                                                        unsupportedOperations);
130 
131     if (modelConverter.GetConversionResult() != ConversionResult::Success)
132     {
133         FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb);
134         return V1_0::ErrorStatus::NONE;
135     }
136 
137     // Serialize the network graph to a .armnn file if an output directory
138     // has been specified in the drivers' arguments.
139     std::vector<uint8_t> dataCacheData;
140     bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
141     auto serializedNetworkFileName =
142         SerializeNetwork(*modelConverter.GetINetwork(),
143                          options.GetRequestInputsAndOutputsDumpDir(),
144                          dataCacheData,
145                          serializeToFile);
146 
147     // Optimize the network
148     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
149     armnn::OptimizerOptionsOpaque OptOptions;
150     OptOptions.SetReduceFp32ToFp16(float32ToFloat16);
151     OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());
152 
153     int cachedFd = -1;
154     bool saveCachedNetwork = options.SaveCachedNetwork();
155 
156     unsigned int numberOfCachedModelFiles = 0;
157     if (modelCacheHandle.size() > 0)
158     {
159         unsigned int index = 0;
160         for (auto& backend : options.GetBackends())
161         {
162             // modelCacheHandle size should be equal to numberOfCachedModelFiles
163             // modelCacheHandle vector should be in same order as backends
164             auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
165             if (numberOfCacheFiles > 0)
166             {
167                 numberOfCachedModelFiles += numberOfCacheFiles;
168                 if (modelCacheHandle[index]->numFds == 1)
169                 {
170                     if (backend == armnn::Compute::GpuAcc)
171                     {
172                         cachedFd = modelCacheHandle[index]->data[0];
173                         saveCachedNetwork = true;
174                     }
175                 }
176                 index += numberOfCachedModelFiles;
177             }
178         }
179     }
180 
181     armnn::BackendOptions gpuAcc("GpuAcc",
182     {
183         { "FastMathEnabled", options.IsFastMathEnabled() },
184         { "SaveCachedNetwork", saveCachedNetwork },
185         { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
186         { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
187         { "CachedFileDescriptor", cachedFd }
188     });
189 
190     armnn::BackendOptions cpuAcc("CpuAcc",
191     {
192         { "FastMathEnabled", options.IsFastMathEnabled() },
193         { "NumberOfThreads", options.GetNumberOfThreads() }
194     });
195     OptOptions.AddModelOption(gpuAcc);
196     OptOptions.AddModelOption(cpuAcc);
197 
198     std::vector<std::string> errMessages;
199     try
200     {
201         optNet = armnn::Optimize(*modelConverter.GetINetwork(),
202                                  options.GetBackends(),
203                                  runtime->GetDeviceSpec(),
204                                  OptOptions,
205                                  errMessages);
206     }
207     catch (std::exception &e)
208     {
209         std::stringstream message;
210         message << "Exception (" << e.what() << ") caught from optimize.";
211         FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
212         return V1_0::ErrorStatus::NONE;
213     }
214 
215     // Check that the optimized network is valid.
216     if (!optNet)
217     {
218         std::stringstream message;
219         message << "Invalid optimized network";
220         for (const std::string& msg : errMessages)
221         {
222             message << "\n" << msg;
223         }
224         FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
225         return V1_0::ErrorStatus::NONE;
226     }
227 
228     // Export the optimized network graph to a dot file if an output dump directory
229     // has been specified in the drivers' arguments.
230     std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
231                                                                options.GetRequestInputsAndOutputsDumpDir());
232 
233     // Load it into the runtime.
234     armnn::NetworkId netId = 0;
235     std::string msg;
236     armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
237                                                 MemorySource::Undefined,
238                                                 MemorySource::Undefined,
239                                                 options.IsGpuProfilingEnabled());
240 
241     auto numInputs  = getMainModel(model).inputIndexes.size();
242     auto numOutputs = getMainModel(model).outputIndexes.size();
243     try
244     {
245         if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
246         {
247             return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, msg, cb);
248         }
249     }
250     catch (std::exception& e)
251     {
252         std::stringstream message;
253         message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
254         FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
255         return V1_0::ErrorStatus::NONE;
256     }
257 
258     // Now that we have a networkId for the graph rename the exported files to use it
259     // so that we can associate the graph file and the input/output tensor exported files
260     RenameExportedFiles(serializedNetworkFileName,
261                         dotGraphFileName,
262                         options.GetRequestInputsAndOutputsDumpDir(),
263                         netId);
264 
265     std::unique_ptr<ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>> preparedModel(
266             new ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>(
267                     netId,
268                     runtime.get(),
269                     model,
270                     options.GetRequestInputsAndOutputsDumpDir(),
271                     options.IsGpuProfilingEnabled(),
272                     options.isAsyncModelExecutionEnabled(),
273                     options.getNoOfArmnnThreads(),
274                     options.isImportEnabled(),
275                     options.isExportEnabled()));
276 
277     // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
278     // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
279     // Only run this if the GpuAcc backend has been added to options
280     if (std::find(options.GetBackends().begin(),
281                   options.GetBackends().end(),
282                   armnn::Compute::GpuAcc) != options.GetBackends().end())
283     {
284         if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
285         {
286             return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
287         }
288 
289         if (clTunedParameters &&
290             options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
291         {
292             // Now that we've done one inference the CL kernel parameters will have been tuned,
293             // so save the updated file.
294             try
295             {
296                 clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
297             }
298             catch (std::exception& error)
299             {
300                 ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
301                       options.GetClTunedParametersFile().c_str(), error.what());
302             }
303         }
304     }
305 
306     size_t hashValue = 0;
307     // Cache the model
308     if (dataCacheHandle.size() > 0)
309     {
310         // Cache the Arm NN model, should be only 1
311         if (dataCacheHandle.size() != 1)
312         {
313             NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
314             return V1_0::ErrorStatus::NONE;
315         }
316 
317         if (dataCacheHandle[0]->numFds != 1)
318         {
319             ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, numFds != 1.");
320             NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
321             return V1_0::ErrorStatus::NONE;
322         }
323 
324         if (dataCacheHandle[0]->data[0] < 0)
325         {
326             ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, fd < 0");
327             NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
328             return V1_0::ErrorStatus::NONE;
329         }
330 
331         int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
332         if (dataCacheFileAccessMode != O_RDWR)
333         {
334             ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_2(): Invalid Access Mode.");
335             NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
336             return V1_0::ErrorStatus::NONE;
337         }
338 
339         write(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size());
340         hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
341     }
342 
343     if (modelCacheHandle.size() > 0)
344     {
345         if (modelCacheHandle.size() != numberOfCachedModelFiles)
346         {
347             NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
348             return V1_0::ErrorStatus::NONE;
349         }
350         for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
351         {
352             if (modelCacheHandle[i]->numFds == 1)
353             {
354                 int modelCacheFileAccessMode = fcntl(modelCacheHandle[i]->data[0], F_GETFL) & O_ACCMODE;
355                 if (modelCacheFileAccessMode != O_RDONLY)
356                 {
357                     struct stat statBuffer;
358                     if (fstat(modelCacheHandle[i]->data[0], &statBuffer) == 0)
359                     {
360                         long modelDataSize = statBuffer.st_size;
361                         if (modelDataSize > 0)
362                         {
363                             std::vector <uint8_t> modelData(modelDataSize);
364                             pread(modelCacheHandle[i]->data[0], modelData.data(), modelData.size(), 0);
365                             hashValue ^= CacheDataHandlerInstance().Hash(modelData);
366                         }
367                     }
368                 }
369             }
370         }
371     }
372     if (hashValue != 0)
373     {
374         CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size());
375     }
376 
377     NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
378 
379     ALOGV("ArmnnDriverImpl::prepareModel cache timing = %lld µs", std::chrono::duration_cast<std::chrono::microseconds>
380          (std::chrono::system_clock::now() - prepareModelTimepoint).count());
381 
382     return V1_0::ErrorStatus::NONE;
383 }
384 
prepareModelFromCache(const armnn::IRuntimePtr & runtime,const DriverOptions & options,const android::hardware::hidl_vec<android::hardware::hidl_handle> & modelCacheHandle,const android::hardware::hidl_vec<android::hardware::hidl_handle> & dataCacheHandle,const HidlToken & token,const android::sp<V1_2::IPreparedModelCallback> & cb,bool float32ToFloat16)385 Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareModelFromCache(
386     const armnn::IRuntimePtr& runtime,
387     const DriverOptions& options,
388     const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
389     const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
390     const HidlToken& token,
391     const android::sp<V1_2::IPreparedModelCallback>& cb,
392     bool float32ToFloat16)
393 {
394     ALOGV("ArmnnDriverImpl::prepareModelFromCache()");
395     std::chrono::time_point<std::chrono::system_clock> modelFromCacheTimepoint = std::chrono::system_clock::now();
396 
397     if (cb.get() == nullptr)
398     {
399         ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid callback passed to prepareModel");
400         return V1_0::ErrorStatus::INVALID_ARGUMENT;
401     }
402 
403     if (!runtime)
404     {
405         return FailPrepareModel(V1_0::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb);
406     }
407 
408     if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
409     {
410         FailPrepareModel(V1_0::ErrorStatus::INVALID_ARGUMENT, "Invalid token passed!", cb);
411         return V1_0::ErrorStatus::INVALID_ARGUMENT;
412     }
413 
414     // DataCacheHandle size should always be 1
415     // Arm NN model
416     if (dataCacheHandle.size() != 1)
417     {
418         FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "No data cache!", cb);
419         return V1_0::ErrorStatus::GENERAL_FAILURE;
420     }
421 
422     // Check if model files cached they match the expected value
423     unsigned int numberOfCachedModelFiles = 0;
424     for (auto& backend : options.GetBackends())
425     {
426         numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
427     }
428     if (modelCacheHandle.size() != numberOfCachedModelFiles)
429     {
430         FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid model cache!", cb);
431         return V1_0::ErrorStatus::GENERAL_FAILURE;
432     }
433 
434     if (dataCacheHandle[0]->numFds != 1)
435     {
436         ALOGW("ArmnnDriverImpl::prepareModelFromCache: Cannot read from the cache data, numFds != 1.");
437         FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "No data cache!", cb);
438         return V1_0::ErrorStatus::GENERAL_FAILURE;
439     }
440 
441     if (dataCacheHandle[0]->data[0] < 0)
442     {
443         ALOGW("ArmnnDriverImpl::prepareModelFromCache: Cannot read from the cache data, fd < 0");
444         FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "No data cache!", cb);
445         return V1_0::ErrorStatus::GENERAL_FAILURE;
446     }
447 
448     int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
449     if (dataCacheFileAccessMode != O_RDWR)
450     {
451         FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid Access Mode!", cb);
452         return V1_0::ErrorStatus::GENERAL_FAILURE;
453     }
454 
455     auto dataSize = CacheDataHandlerInstance().GetCacheSize(token);
456     if (dataSize == 0)
457     {
458         ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid data to deserialize!");
459         FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid data to deserialize!", cb);
460         return V1_0::ErrorStatus::GENERAL_FAILURE;
461     }
462 
463     int offset = 0;
464     {
465         struct stat statBuffer;
466         if (fstat(dataCacheHandle[0]->data[0], &statBuffer) == 0)
467         {
468             unsigned long bufferSize = statBuffer.st_size;
469             if (bufferSize != dataSize)
470             {
471                 ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid data to deserialize!");
472                 FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid data to deserialize!", cb);
473                 return V1_0::ErrorStatus::GENERAL_FAILURE;
474             }
475         }
476     }
477     std::vector<uint8_t> dataCacheData(dataSize);
478     pread(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size(), offset);
479     auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
480 
481     int gpuAccCachedFd = -1;
482     bool saveCachedNetwork = false;
483     if (modelCacheHandle.size() > 0)
484     {
485         unsigned int index = 0;
486         for (auto& backend : options.GetBackends())
487         {
488             // modelCacheHandle size should be equal to numberOfCachedModelFiles
489             // modelCacheHandle vector should be in same order as backends
490             auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
491             if (numberOfCacheFiles > 0)
492             {
493                 if (modelCacheHandle[index]->numFds != 1)
494                 {
495                     ALOGW("ArmnnDriverImpl::prepareModelFromCache: Cannot read from the model cache, numFds != 1.");
496                     FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE,
497                                      "Cannot read from the model cache, numFds != 1.", cb);
498                     return V1_0::ErrorStatus::GENERAL_FAILURE;
499                 }
500                 auto cachedFd = modelCacheHandle[index]->data[0];
501 
502                 int modelCacheFileAccessMode = fcntl(cachedFd, F_GETFL) & O_ACCMODE;
503                 if (modelCacheFileAccessMode != O_RDWR)
504                 {
505                     FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid Access Mode!", cb);
506                     return V1_0::ErrorStatus::GENERAL_FAILURE;
507                 }
508 
509                 struct stat statBuffer;
510                 if (cachedFd != -1 && fstat(cachedFd, &statBuffer) == 0)
511                 {
512                     long modelDataSize = statBuffer.st_size;
513                     if (modelDataSize <= 0)
514                     {
515                         FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Wrong cached model size!", cb);
516                         return V1_0::ErrorStatus::NONE;
517                     }
518                     std::vector<uint8_t> modelData(modelDataSize);
519                     pread(cachedFd, modelData.data(), modelData.size(), 0);
520                     hashValue ^= CacheDataHandlerInstance().Hash(modelData);
521 
522                     // For GpuAcc numberOfCachedFiles is 1
523                     if (backend == armnn::Compute::GpuAcc)
524                     {
525                         gpuAccCachedFd = cachedFd;
526                     }
527                 }
528                 index += numberOfCacheFiles;
529             }
530         }
531     }
532 
533     if (!CacheDataHandlerInstance().Validate(token, hashValue, dataCacheData.size()))
534     {
535         ALOGW("ArmnnDriverImpl::prepareModelFromCache: ValidateHash() failed!");
536         FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "ValidateHash Failed!", cb);
537         return V1_0::ErrorStatus::GENERAL_FAILURE;
538     }
539 
540     // Deserialize the network..
541     armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){});
542     try
543     {
544         network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
545     }
546     catch (std::exception& e)
547     {
548         std::stringstream message;
549         message << "Exception (" << e.what() << ") caught from Deserializer.";
550         FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
551         return V1_0::ErrorStatus::GENERAL_FAILURE;
552     }
553 
554     // Optimize the network
555     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
556     armnn::OptimizerOptionsOpaque OptOptions;
557     OptOptions.SetReduceFp32ToFp16(float32ToFloat16);
558     OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());
559 
560     armnn::BackendOptions gpuAcc("GpuAcc",
561                                  {
562                                          {"FastMathEnabled",       options.IsFastMathEnabled()},
563                                          {"SaveCachedNetwork",     saveCachedNetwork},
564                                          {"CachedNetworkFilePath", options.GetCachedNetworkFilePath()},
565                                          {"MLGOTuningFilePath",    options.GetClMLGOTunedParametersFile()},
566                                          {"CachedFileDescriptor",  gpuAccCachedFd}
567                                  });
568 
569     armnn::BackendOptions cpuAcc("CpuAcc",
570                                  {
571                                          {"FastMathEnabled", options.IsFastMathEnabled()},
572                                          {"NumberOfThreads", options.GetNumberOfThreads()}
573                                  });
574     OptOptions.AddModelOption(gpuAcc);
575     OptOptions.AddModelOption(cpuAcc);
576 
577     std::vector<std::string> errMessages;
578     try
579     {
580         optNet = armnn::Optimize(*network.get(),
581                                  options.GetBackends(),
582                                  runtime->GetDeviceSpec(),
583                                  OptOptions,
584                                  errMessages);
585     }
586     catch (std::exception& e)
587     {
588         std::stringstream message;
589         message << "Exception (" << e.what() << ") caught from optimize.";
590         FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
591         return V1_0::ErrorStatus::NONE;
592     }
593 
594     // Check that the optimized network is valid.
595     if (!optNet)
596     {
597         std::stringstream message;
598         message << "Invalid optimized network";
599         for (const std::string& msg : errMessages)
600         {
601             message << "\n" << msg;
602         }
603         FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
604         return V1_0::ErrorStatus::NONE;
605     }
606 
607     // Export the optimized network graph to a dot file if an output dump directory
608     // has been specified in the drivers' arguments.
609     std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
610                                                                options.GetRequestInputsAndOutputsDumpDir());
611 
612     // Load it into the runtime.
613     armnn::NetworkId netId = 0;
614     std::string msg;
615     armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
616                                                 MemorySource::Undefined,
617                                                 MemorySource::Undefined,
618                                                 options.IsGpuProfilingEnabled());
619 
620     try
621     {
622         if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
623         {
624             return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, msg, cb);
625         }
626     }
627     catch (std::exception& e)
628     {
629         std::stringstream message;
630         message << "Exception (" << e.what() << ") caught from LoadNetwork.";
631         FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
632         return V1_0::ErrorStatus::NONE;
633     }
634 
635     std::unique_ptr<ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>> preparedModel(
636             new ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>(
637                     netId,
638                     runtime.get(),
639                     options.GetRequestInputsAndOutputsDumpDir(),
640                     options.IsGpuProfilingEnabled(),
641                     options.isAsyncModelExecutionEnabled(),
642                     options.getNoOfArmnnThreads(),
643                     options.isImportEnabled(),
644                     options.isExportEnabled(),
645                     true));
646 
647     NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
648 
649     ALOGV("ArmnnDriverImpl::prepareModelFromCache cache timing = %lld µs",
650           std::chrono::duration_cast<std::chrono::microseconds>
651           (std::chrono::system_clock::now() - modelFromCacheTimepoint).count());
652 
653     return V1_0::ErrorStatus::NONE;
654 }
655 
getCapabilities_1_2(const armnn::IRuntimePtr & runtime,V1_2::IDevice::getCapabilities_1_2_cb cb)656 Return<void> ArmnnDriverImpl::getCapabilities_1_2(const armnn::IRuntimePtr& runtime,
657                                                   V1_2::IDevice::getCapabilities_1_2_cb cb)
658 {
659     ALOGV("hal_1_2::ArmnnDriverImpl::getCapabilities()");
660 
661     V1_2::Capabilities capabilities;
662 
663     float defaultValue = .1f;
664 
665     if (runtime)
666     {
667         capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime =
668                 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
669 
670         capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage =
671                 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
672 
673         capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime =
674                 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
675 
676         capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage =
677                 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
678 
679         // Set the base value for all operand types
680         #if defined(ARMNN_ANDROID_R) || defined(ARMNN_ANDROID_S)
681         capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_2>({FLT_MAX, FLT_MAX});
682         #else
683         capabilities.operandPerformance = nonExtensionOperandPerformance({FLT_MAX, FLT_MAX});
684         #endif
685 
686         // Load supported operand types
687         update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_FLOAT32,
688                 {
689                     .execTime = ParseSystemProperty(g_OperandTypeTensorFloat32PerformanceExecTime, defaultValue),
690                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat32PerformancePowerUsage, defaultValue)
691                 });
692 
693         update(&capabilities.operandPerformance, V1_2::OperandType::FLOAT32,
694                 {
695                     .execTime = ParseSystemProperty(g_OperandTypeFloat32PerformanceExecTime, defaultValue),
696                     .powerUsage = ParseSystemProperty(g_OperandTypeFloat32PerformancePowerUsage, defaultValue)
697                 });
698 
699         update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_FLOAT16,
700                 {
701                     .execTime = ParseSystemProperty(g_OperandTypeTensorFloat16PerformanceExecTime, defaultValue),
702                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat16PerformancePowerUsage, defaultValue)
703                 });
704 
705         update(&capabilities.operandPerformance, V1_2::OperandType::FLOAT16,
706                 {
707                     .execTime = ParseSystemProperty(g_OperandTypeFloat16PerformanceExecTime, defaultValue),
708                     .powerUsage = ParseSystemProperty(g_OperandTypeFloat16PerformancePowerUsage, defaultValue)
709                 });
710 
711         update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_QUANT8_ASYMM,
712                 {
713                     .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformanceExecTime, defaultValue),
714                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformancePowerUsage, defaultValue)
715                 });
716 
717         update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_QUANT8_SYMM,
718                 {
719                     .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformanceExecTime, defaultValue),
720                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformancePowerUsage, defaultValue)
721                 });
722 
723         update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_QUANT16_SYMM,
724                 {
725                     .execTime = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformanceExecTime, defaultValue),
726                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformancePowerUsage, defaultValue)
727                 });
728 
729         update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
730                {
731                    .execTime =
732                    ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime, defaultValue),
733                    .powerUsage =
734                    ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage, defaultValue)
735                });
736 
737         update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_INT32,
738                 {
739                     .execTime = ParseSystemProperty(g_OperandTypeTensorInt32PerformanceExecTime, defaultValue),
740                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorInt32PerformancePowerUsage, defaultValue)
741                 });
742 
743         update(&capabilities.operandPerformance, V1_2::OperandType::INT32,
744                 {
745                     .execTime = ParseSystemProperty(g_OperandTypeInt32PerformanceExecTime, defaultValue),
746                     .powerUsage = ParseSystemProperty(g_OperandTypeInt32PerformancePowerUsage, defaultValue)
747                 });
748 
749         cb(V1_0::ErrorStatus::NONE, capabilities);
750     }
751     else
752     {
753         capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime   = 0;
754         capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage = 0;
755         capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime   = 0;
756         capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage = 0;
757 
758         // Set the base value for all operand types
759         #if defined(ARMNN_ANDROID_R) || defined(ARMNN_ANDROID_S)
760         capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_2>({0.f, 0.0f});
761         #else
762         capabilities.operandPerformance = nonExtensionOperandPerformance({0.f, 0.0f});
763         #endif
764 
765         cb(V1_0::ErrorStatus::DEVICE_UNAVAILABLE, capabilities);
766     }
767 
768     return Void();
769 }
770 
771 } // namespace hal_1_2
772 } // namespace armnn_driver