xref: /aosp_15_r20/external/armnn/tests/InferenceModel.hpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
8 
9 #include <armnn/ArmNN.hpp>
10 
11 #if !defined(ARMNN_DISABLE_THREADS)
12 #include <armnn/Threadpool.hpp>
13 #include <common/include/IgnoreUnused.hpp>
14 #endif
15 
16 #include <armnn/Logging.hpp>
17 #include <armnn/utility/Timer.hpp>
18 #include <armnn/BackendRegistry.hpp>
19 #include <armnn/utility/Assert.hpp>
20 #include <armnn/utility/NumericCast.hpp>
21 
22 #include <armnnUtils/TContainer.hpp>
23 #include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
24 
25 #include <common/include/ProfilingGuid.hpp>
26 
27 #if defined(ARMNN_SERIALIZER)
28 #include "armnnDeserializer/IDeserializer.hpp"
29 #endif
30 #if defined(ARMNN_TF_LITE_PARSER)
31 #include <armnnTfLiteParser/ITfLiteParser.hpp>
32 #endif
33 #if defined(ARMNN_ONNX_PARSER)
34 #include <armnnOnnxParser/IOnnxParser.hpp>
35 #endif
36 
37 #include <armnnUtils/Filesystem.hpp>
38 #include <HeapProfiling.hpp>
39 #include <TensorIOUtils.hpp>
40 
41 #include "armnn/utility/StringUtils.hpp"
42 #include <cxxopts/cxxopts.hpp>
43 #include "CxxoptsUtils.hpp"
44 #include <fmt/format.h>
45 #include <mapbox/variant.hpp>
46 
47 #include <algorithm>
48 #include <iterator>
49 #include <fstream>
50 #include <map>
51 #include <string>
52 #include <vector>
53 #include <type_traits>
54 
55 namespace InferenceModelInternal
56 {
57 using BindingPointInfo = armnn::BindingPointInfo;
58 
59 using QuantizationParams = std::pair<float,int32_t>;
60 
61 struct Params
62 {
63     std::string                     m_ModelPath;
64     std::vector<std::string>        m_InputBindings;
65     std::vector<armnn::TensorShape> m_InputShapes;
66     std::vector<std::string>        m_OutputBindings;
67     std::vector<armnn::BackendId>   m_ComputeDevices;
68     std::string                     m_DynamicBackendsPath;
69     size_t                          m_SubgraphId;
70     bool                            m_AllowExpandedDims;
71     bool                            m_IsModelBinary;
72     bool                            m_VisualizePostOptimizationModel;
73     bool                            m_EnableFp16TurboMode;
74     bool                            m_EnableBf16TurboMode;
75     bool                            m_PrintIntermediateLayers;
76     bool                            m_PrintIntermediateLayersToFile;
77     bool                            m_ParseUnsupported;
78     bool                            m_InferOutputShape;
79     bool                            m_EnableFastMath;
80     bool                            m_SaveCachedNetwork;
81     bool                            m_OutputDetailsToStdOut;
82     bool                            m_OutputDetailsOnlyToStdOut;
83     std::string                     m_CachedNetworkFilePath;
84     unsigned int                    m_NumberOfThreads;
85     std::string                     m_MLGOTuningFilePath;
86     bool                            m_AsyncEnabled;
87     size_t                          m_ThreadPoolSize;
88     bool                            m_ImportInputsIfAligned;
89 
90 
ParamsInferenceModelInternal::Params91     Params()
92         : m_ComputeDevices{}
93         , m_SubgraphId(0)
94         , m_AllowExpandedDims(false)
95         , m_IsModelBinary(true)
96         , m_VisualizePostOptimizationModel(false)
97         , m_EnableFp16TurboMode(false)
98         , m_EnableBf16TurboMode(false)
99         , m_PrintIntermediateLayers(false)
100         , m_PrintIntermediateLayersToFile(false)
101         , m_ParseUnsupported(false)
102         , m_InferOutputShape(false)
103         , m_EnableFastMath(false)
104         , m_SaveCachedNetwork(false)
105         , m_OutputDetailsToStdOut(false)
106         , m_OutputDetailsOnlyToStdOut(false)
107         , m_CachedNetworkFilePath("")
108         , m_NumberOfThreads(0)
109         , m_MLGOTuningFilePath("")
110         , m_AsyncEnabled(false)
111         , m_ThreadPoolSize(0)
112         , m_ImportInputsIfAligned(false)
113     {}
114 };
115 
116 } // namespace InferenceModelInternal
117 
118 template <typename IParser>
119 struct CreateNetworkImpl
120 {
121 public:
122     using Params = InferenceModelInternal::Params;
123 
CreateCreateNetworkImpl124     static armnn::INetworkPtr Create(const Params& params,
125                                      std::vector<armnn::BindingPointInfo>& inputBindings,
126                                      std::vector<armnn::BindingPointInfo>& outputBindings)
127     {
128         const std::string& modelPath = params.m_ModelPath;
129 
130         // Create a network from a file on disk
131         auto parser(IParser::Create());
132 
133         std::map<std::string, armnn::TensorShape> inputShapes;
134         if (!params.m_InputShapes.empty())
135         {
136             const size_t numInputShapes   = params.m_InputShapes.size();
137             const size_t numInputBindings = params.m_InputBindings.size();
138             if (numInputShapes < numInputBindings)
139             {
140                 throw armnn::Exception(fmt::format(
141                     "Not every input has its tensor shape specified: expected={0}, got={1}",
142                     numInputBindings, numInputShapes));
143             }
144 
145             for (size_t i = 0; i < numInputShapes; i++)
146             {
147                 inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i];
148             }
149         }
150 
151         std::vector<std::string> requestedOutputs = params.m_OutputBindings;
152         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
153 
154         {
155             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
156             // Handle text and binary input differently by calling the corresponding parser function
157             network = (params.m_IsModelBinary ?
158                 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
159                 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
160         }
161 
162         for (const std::string& inputLayerName : params.m_InputBindings)
163         {
164             inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName));
165         }
166 
167         for (const std::string& outputLayerName : params.m_OutputBindings)
168         {
169             outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName));
170         }
171 
172         return network;
173     }
174 };
175 
176 #if defined(ARMNN_SERIALIZER)
177 template <>
178 struct CreateNetworkImpl<armnnDeserializer::IDeserializer>
179 {
180 public:
181     using IParser          = armnnDeserializer::IDeserializer;
182     using Params           = InferenceModelInternal::Params;
183 
CreateCreateNetworkImpl184     static armnn::INetworkPtr Create(const Params& params,
185                                      std::vector<armnn::BindingPointInfo>& inputBindings,
186                                      std::vector<armnn::BindingPointInfo>& outputBindings)
187     {
188         auto parser(IParser::Create());
189         ARMNN_ASSERT(parser);
190 
191         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
192 
193         {
194             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
195 
196             std::error_code errorCode;
197             fs::path pathToFile(params.m_ModelPath);
198             if (!fs::exists(pathToFile, errorCode))
199             {
200                 throw armnn::FileNotFoundException(fmt::format("Cannot find the file ({0}) errorCode: {1} {2}",
201                                                    params.m_ModelPath,
202                                                    errorCode.message(),
203                                                    CHECK_LOCATION().AsString()));
204             }
205             std::ifstream file(params.m_ModelPath, std::ios::binary);
206 
207             network = parser->CreateNetworkFromBinary(file);
208         }
209 
210         unsigned int subgraphId = armnn::numeric_cast<unsigned int>(params.m_SubgraphId);
211 
212         for (const std::string& inputLayerName : params.m_InputBindings)
213         {
214             armnnDeserializer::BindingPointInfo inputBinding =
215                 parser->GetNetworkInputBindingInfo(subgraphId, inputLayerName);
216             inputBindings.push_back(std::make_pair(inputBinding.m_BindingId, inputBinding.m_TensorInfo));
217         }
218 
219         for (const std::string& outputLayerName : params.m_OutputBindings)
220         {
221             armnnDeserializer::BindingPointInfo outputBinding =
222                 parser->GetNetworkOutputBindingInfo(subgraphId, outputLayerName);
223             outputBindings.push_back(std::make_pair(outputBinding.m_BindingId, outputBinding.m_TensorInfo));
224         }
225 
226         return network;
227     }
228 };
229 #endif
230 
231 #if defined(ARMNN_TF_LITE_PARSER)
232 template <>
233 struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser>
234 {
235 public:
236     using IParser = armnnTfLiteParser::ITfLiteParser;
237     using Params = InferenceModelInternal::Params;
238 
CreateCreateNetworkImpl239     static armnn::INetworkPtr Create(const Params& params,
240                                      std::vector<armnn::BindingPointInfo>& inputBindings,
241                                      std::vector<armnn::BindingPointInfo>& outputBindings)
242     {
243         const std::string& modelPath = params.m_ModelPath;
244 
245         // Create a network from a file on disk
246         IParser::TfLiteParserOptions options;
247         options.m_AllowExpandedDims          = params.m_AllowExpandedDims;
248         options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
249         options.m_InferAndValidate           = params.m_InferOutputShape;
250         auto parser(IParser::Create(options));
251 
252         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
253 
254         {
255             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
256             network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
257         }
258 
259         for (const std::string& inputLayerName : params.m_InputBindings)
260         {
261             armnn::BindingPointInfo inputBinding =
262                 parser->GetNetworkInputBindingInfo(params.m_SubgraphId, inputLayerName);
263             inputBindings.push_back(inputBinding);
264         }
265 
266         for (const std::string& outputLayerName : params.m_OutputBindings)
267         {
268             armnn::BindingPointInfo outputBinding =
269                 parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, outputLayerName);
270             outputBindings.push_back(outputBinding);
271         }
272 
273         return network;
274     }
275 };
276 #endif
277 
278 #if defined(ARMNN_ONNX_PARSER)
279 template <>
280 struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser>
281 {
282 public:
283     using IParser = armnnOnnxParser::IOnnxParser;
284     using Params = InferenceModelInternal::Params;
285     using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
286 
CreateCreateNetworkImpl287     static armnn::INetworkPtr Create(const Params& params,
288                                      std::vector<BindingPointInfo>& inputBindings,
289                                      std::vector<BindingPointInfo>& outputBindings)
290     {
291         const std::string& modelPath = params.m_ModelPath;
292 
293         // Create a network from a file on disk
294         auto parser(IParser::Create());
295 
296         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
297 
298         std::map<std::string, armnn::TensorShape> inputShapes;
299         if (!params.m_InputShapes.empty())
300         {
301             const size_t numInputShapes   = params.m_InputShapes.size();
302             const size_t numInputBindings = params.m_InputBindings.size();
303             if (numInputShapes < numInputBindings)
304             {
305                 throw armnn::Exception(fmt::format(
306                     "Not every input has its tensor shape specified: expected={0}, got={1}",
307                     numInputBindings, numInputShapes));
308             }
309 
310             for (size_t i = 0; i < numInputShapes; i++)
311             {
312                 inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i];
313             }
314 
315             {
316                 ARMNN_SCOPED_HEAP_PROFILING("Parsing");
317                 network = (params.m_IsModelBinary ?
318                     parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
319                     parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes));
320             }
321         }
322 
323         else
324         {
325             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
326             network = (params.m_IsModelBinary ?
327                 parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
328                 parser->CreateNetworkFromTextFile(modelPath.c_str()));
329         }
330 
331         for (const std::string& inputLayerName : params.m_InputBindings)
332         {
333             BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName);
334             inputBindings.push_back(inputBinding);
335         }
336 
337         for (const std::string& outputLayerName : params.m_OutputBindings)
338         {
339             BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName);
340             outputBindings.push_back(outputBinding);
341         }
342 
343         return network;
344     }
345 };
346 #endif
347 
348 
349 
350 template <typename IParser, typename TDataType>
351 class InferenceModel
352 {
353 public:
354     using DataType           = TDataType;
355     using Params             = InferenceModelInternal::Params;
356     using QuantizationParams = InferenceModelInternal::QuantizationParams;
357 
358 
359     struct CommandLineOptions
360     {
361         std::string m_ModelDir;
362         std::vector<std::string> m_ComputeDevices;
363         std::string m_DynamicBackendsPath;
364         bool m_VisualizePostOptimizationModel;
365         bool m_EnableFp16TurboMode;
366         bool m_EnableBf16TurboMode;
367         std::string m_Labels;
368 
GetComputeDevicesAsBackendIdsInferenceModel::CommandLineOptions369         std::vector<armnn::BackendId> GetComputeDevicesAsBackendIds()
370         {
371             std::vector<armnn::BackendId> backendIds;
372             std::copy(m_ComputeDevices.begin(), m_ComputeDevices.end(), std::back_inserter(backendIds));
373             return backendIds;
374         }
375     };
376 
AddCommandLineOptions(cxxopts::Options & options,CommandLineOptions & cLineOptions,std::vector<std::string> & required)377     static void AddCommandLineOptions(cxxopts::Options& options,
378                                       CommandLineOptions& cLineOptions, std::vector<std::string>& required)
379     {
380         const std::vector<std::string> defaultComputes = { "CpuAcc", "CpuRef" };
381 
382         const std::string backendsMessage = "Which device to run layers on by default. Possible choices: "
383                                           + armnn::BackendRegistryInstance().GetBackendIdsAsString();
384 
385         options
386             .allow_unrecognised_options()
387             .add_options()
388                 ("m,model-dir", "Path to directory containing model files (.prototxt/.tflite)",
389                  cxxopts::value<std::string>(cLineOptions.m_ModelDir))
390                 ("c,compute", backendsMessage.c_str(),
391                  cxxopts::value<std::vector<std::string>>(cLineOptions.m_ComputeDevices)->default_value("CpuRef"))
392                 ("b,dynamic-backends-path",
393                  "Path where to load any available dynamic backend from. "
394                  "If left empty (the default), dynamic backends will not be used.",
395                  cxxopts::value(cLineOptions.m_DynamicBackendsPath))
396                 ("l,labels",
397                  "Text file containing one image filename - correct label pair per line, "
398                  "used to test the accuracy of the network.", cxxopts::value<std::string>(cLineOptions.m_Labels))
399                 ("v,visualize-optimized-model",
400                  "Produce a dot file useful for visualizing the graph post optimization."
401                  "The file will have the same name as the model with the .dot extention.",
402                  cxxopts::value<bool>(cLineOptions.m_VisualizePostOptimizationModel)->default_value("false"))
403                 ("fp16-turbo-mode",
404                  "If this option is enabled FP32 layers, weights and biases will be converted "
405                  "to FP16 where the backend supports it.",
406                  cxxopts::value<bool>(cLineOptions.m_EnableFp16TurboMode)->default_value("false"))
407                 ("bf16-turbo-mode",
408                  "If this option is enabled FP32 layers, weights and biases will be converted "
409                  "to BF16 where the backend supports it.",
410                  cxxopts::value<bool>(cLineOptions.m_EnableBf16TurboMode)->default_value("false"));
411 
412         required.emplace_back("model-dir");
413     }
414 
InferenceModel(const Params & params,bool enableProfiling,const std::string & dynamicBackendsPath,const std::shared_ptr<armnn::IRuntime> & runtime=nullptr)415     InferenceModel(const Params& params,
416                    bool enableProfiling,
417                    const std::string& dynamicBackendsPath,
418                    const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
419         : m_EnableProfiling(enableProfiling),
420           m_ProfilingDetailsMethod(armnn::ProfilingDetailsMethod::Undefined),
421           m_DynamicBackendsPath(dynamicBackendsPath),
422           m_ImportInputsIfAligned(params.m_ImportInputsIfAligned)
423     {
424         if (runtime)
425         {
426             m_Runtime = runtime;
427         }
428         else
429         {
430             armnn::IRuntime::CreationOptions options;
431             options.m_EnableGpuProfiling = m_EnableProfiling;
432             options.m_DynamicBackendsPath = m_DynamicBackendsPath;
433             m_Runtime = armnn::IRuntime::Create(options);
434         }
435 
436         // Configure the Profiler if the the profiling details are opted for
437         if (params.m_OutputDetailsOnlyToStdOut)
438             m_ProfilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly;
439         else if (params.m_OutputDetailsToStdOut)
440             m_ProfilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents;
441 
442         std::string invalidBackends;
443         if (!CheckRequestedBackendsAreValid(params.m_ComputeDevices, armnn::Optional<std::string&>(invalidBackends)))
444         {
445             throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends);
446         }
447 
448         armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
449         {
450             const auto parsing_start_time = armnn::GetTimeNow();
451             armnn::INetworkPtr network = CreateNetworkImpl<IParser>::Create(params, m_InputBindings, m_OutputBindings);
452 
453             ARMNN_LOG(info) << "Network parsing time: " << std::setprecision(2)
454                             << std::fixed << armnn::GetTimeDuration(parsing_start_time).count() << " ms.";
455 
456             ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
457 
458             armnn::OptimizerOptionsOpaque options;
459             options.SetReduceFp32ToFp16(params.m_EnableFp16TurboMode);
460             options.SetDebugEnabled(params.m_PrintIntermediateLayers);
461             options.SetDebugToFileEnabled(params.m_PrintIntermediateLayersToFile);
462             options.SetShapeInferenceMethod(params.m_InferOutputShape ?
463                     armnn::ShapeInferenceMethod::InferAndValidate : armnn::ShapeInferenceMethod::ValidateOnly);
464             options.SetProfilingEnabled(m_EnableProfiling);
465 
466             armnn::BackendOptions gpuAcc("GpuAcc",
467             {
468                 { "FastMathEnabled", params.m_EnableFastMath },
469                 { "SaveCachedNetwork", params.m_SaveCachedNetwork },
470                 { "CachedNetworkFilePath", params.m_CachedNetworkFilePath },
471                 { "MLGOTuningFilePath", params.m_MLGOTuningFilePath }
472             });
473 
474             armnn::BackendOptions cpuAcc("CpuAcc",
475             {
476                 { "FastMathEnabled", params.m_EnableFastMath },
477                 { "NumberOfThreads", params.m_NumberOfThreads }
478             });
479             options.AddModelOption(gpuAcc);
480             options.AddModelOption(cpuAcc);
481 
482             const auto optimization_start_time = armnn::GetTimeNow();
483             optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
484 
485             ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
486                             << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms.";
487 
488             if (!optNet)
489             {
490                 throw armnn::Exception("Optimize returned nullptr");
491             }
492 
493 
494         }
495 
496         if (params.m_VisualizePostOptimizationModel)
497         {
498             fs::path filename = params.m_ModelPath;
499             filename.replace_extension("dot");
500             std::fstream file(filename.c_str(), std::ios_base::out);
501             optNet->SerializeToDot(file);
502         }
503 
504         armnn::Status ret;
505         {
506             ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
507 
508             const auto loading_start_time = armnn::GetTimeNow();
509             armnn::INetworkProperties networkProperties(params.m_AsyncEnabled,
510                                                         armnn::MemorySource::Undefined,
511                                                         armnn::MemorySource::Undefined,
512                                                         enableProfiling,
513                                                         m_ProfilingDetailsMethod);
514             std::string errorMessage;
515             ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet), errorMessage, networkProperties);
516 
517             ARMNN_LOG(info) << "Network loading time: " << std::setprecision(2)
518                             << std::fixed << armnn::GetTimeDuration(loading_start_time).count() << " ms.";
519 #if !defined(ARMNN_DISABLE_THREADS)
520             if (params.m_AsyncEnabled && params.m_ThreadPoolSize > 0)
521             {
522                 std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
523                 for (size_t i = 0; i < params.m_ThreadPoolSize; ++i)
524                 {
525                     memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier));
526                 }
527 
528                 m_Threadpool = std::make_unique<armnn::Threadpool>(params.m_ThreadPoolSize,
529                                                                    m_Runtime.get(),
530                                                                    memHandles);
531             }
532 #endif
533         }
534 
535         if (ret == armnn::Status::Failure)
536         {
537             throw armnn::Exception("IRuntime::LoadNetwork failed");
538         }
539     }
540 
CheckInputIndexIsValid(unsigned int inputIndex) const541     void CheckInputIndexIsValid(unsigned int inputIndex) const
542     {
543         if (m_InputBindings.size() < inputIndex + 1)
544         {
545             throw armnn::Exception(fmt::format("Input index out of range: {}", inputIndex));
546         }
547     }
548 
CheckOutputIndexIsValid(unsigned int outputIndex) const549     void CheckOutputIndexIsValid(unsigned int outputIndex) const
550     {
551         if (m_OutputBindings.size() < outputIndex + 1)
552         {
553             throw armnn::Exception(fmt::format("Output index out of range: {}", outputIndex));
554         }
555     }
556 
GetInputSize(unsigned int inputIndex=0u) const557     unsigned int GetInputSize(unsigned int inputIndex = 0u) const
558     {
559         CheckInputIndexIsValid(inputIndex);
560         return m_InputBindings[inputIndex].second.GetNumElements();
561     }
562 
GetOutputSize(unsigned int outputIndex=0u) const563     unsigned int GetOutputSize(unsigned int outputIndex = 0u) const
564     {
565         CheckOutputIndexIsValid(outputIndex);
566         return m_OutputBindings[outputIndex].second.GetNumElements();
567     }
568 
Run(const std::vector<armnnUtils::TContainer> & inputContainers,std::vector<armnnUtils::TContainer> & outputContainers)569     std::chrono::duration<double, std::milli> Run(
570             const std::vector<armnnUtils::TContainer>& inputContainers,
571             std::vector<armnnUtils::TContainer>& outputContainers)
572     {
573         for (unsigned int i = 0; i < outputContainers.size(); ++i)
574         {
575             const unsigned int expectedOutputDataSize = GetOutputSize(i);
576 
577             mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value)
578             {
579                 const unsigned int actualOutputDataSize   = armnn::numeric_cast<unsigned int>(value.size());
580                 if (actualOutputDataSize < expectedOutputDataSize)
581                 {
582                     unsigned int outputIndex = i;
583                     throw armnn::Exception(
584                             fmt::format("Not enough data for output #{0}: expected "
585                             "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
586                 }
587             },
588             outputContainers[i]);
589         }
590 
591         std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
592 
593         // Start timer to record inference time in EnqueueWorkload (in milliseconds)
594         const auto start_time = armnn::GetTimeNow();
595 
596         armnn::Status ret;
597         if (m_ImportInputsIfAligned)
598         {
599             std::vector<armnn::ImportedInputId> importedInputIds = m_Runtime->ImportInputs(
600                 m_NetworkIdentifier, MakeInputTensors(inputContainers), armnn::MemorySource::Malloc);
601 
602             std::vector<armnn::ImportedOutputId> importedOutputIds = m_Runtime->ImportOutputs(
603                 m_NetworkIdentifier, MakeOutputTensors(outputContainers), armnn::MemorySource::Malloc);
604 
605             ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
606                                              MakeInputTensors(inputContainers),
607                                              MakeOutputTensors(outputContainers),
608                                              importedInputIds,
609                                              importedOutputIds);
610         }
611         else
612         {
613             ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
614                                              MakeInputTensors(inputContainers),
615                                              MakeOutputTensors(outputContainers));
616         }
617         const auto duration = armnn::GetTimeDuration(start_time);
618 
619         // if profiling is enabled print out the results
620         if (profiler && profiler->IsProfilingEnabled())
621         {
622             profiler->Print(std::cout);
623         }
624 
625         if (ret == armnn::Status::Failure)
626         {
627             throw armnn::Exception("IRuntime::EnqueueWorkload failed");
628         }
629         else
630         {
631             return duration;
632         }
633     }
634 
RunAsync(armnn::experimental::IWorkingMemHandle & workingMemHandleRef,const std::vector<armnnUtils::TContainer> & inputContainers,std::vector<armnnUtils::TContainer> & outputContainers,unsigned int inferenceID)635     std::tuple<unsigned int, std::chrono::duration<double, std::milli>> RunAsync(
636         armnn::experimental::IWorkingMemHandle& workingMemHandleRef,
637         const std::vector<armnnUtils::TContainer>& inputContainers,
638         std::vector<armnnUtils::TContainer>& outputContainers,
639         unsigned int inferenceID)
640     {
641         for (unsigned int i = 0; i < outputContainers.size(); ++i)
642         {
643             const unsigned int expectedOutputDataSize = GetOutputSize(i);
644 
645             mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value)
646             {
647                 const unsigned int actualOutputDataSize   = armnn::numeric_cast<unsigned int>(value.size());
648                 if (actualOutputDataSize < expectedOutputDataSize)
649                 {
650                     unsigned int outputIndex = i;
651                     throw armnn::Exception(
652                             fmt::format("Not enough data for output #{0}: expected "
653                             "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
654                 }
655             },
656             outputContainers[i]);
657         }
658 
659         std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
660 
661         // Start timer to record inference time in EnqueueWorkload (in milliseconds)
662         const auto start_time = armnn::GetTimeNow();
663 
664         armnn::Status ret = m_Runtime->Execute(workingMemHandleRef,
665                                                MakeInputTensors(inputContainers),
666                                                MakeOutputTensors(outputContainers));
667 
668         const auto duration = armnn::GetTimeDuration(start_time);
669 
670         // if profiling is enabled print out the results
671         if (profiler && profiler->IsProfilingEnabled())
672         {
673             profiler->Print(std::cout);
674         }
675 
676         if (ret == armnn::Status::Failure)
677         {
678             throw armnn::Exception(
679                 fmt::format("IRuntime::Execute asynchronously failed for network #{0} on inference #{1}",
680                             m_NetworkIdentifier, inferenceID));
681         }
682         else
683         {
684             return std::make_tuple(inferenceID, duration);
685         }
686     }
687 
RunAsync(const std::vector<armnnUtils::TContainer> & inputContainers,std::vector<armnnUtils::TContainer> & outputContainers,std::shared_ptr<armnn::IAsyncExecutionCallback> cb)688     void RunAsync(const std::vector<armnnUtils::TContainer>& inputContainers,
689                   std::vector<armnnUtils::TContainer>& outputContainers,
690                   std::shared_ptr<armnn::IAsyncExecutionCallback> cb)
691     {
692 #if !defined(ARMNN_DISABLE_THREADS)
693         for (unsigned int i = 0; i < outputContainers.size(); ++i)
694         {
695             const unsigned int expectedOutputDataSize = GetOutputSize(i);
696 
697             mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value)
698             {
699                 const unsigned int actualOutputDataSize   = armnn::numeric_cast<unsigned int>(value.size());
700                 if (actualOutputDataSize < expectedOutputDataSize)
701                 {
702                     unsigned int outputIndex = i;
703                     throw armnn::Exception(
704                             fmt::format("Not enough data for output #{0}: expected "
705                             "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
706                 }
707             },
708             outputContainers[i]);
709         }
710 
711         std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
712 
713         m_Threadpool->Schedule(m_NetworkIdentifier,
714                                MakeInputTensors(inputContainers),
715                                MakeOutputTensors(outputContainers),
716                                armnn::QosExecPriority::Medium,
717                                cb);
718 
719         // if profiling is enabled print out the results
720         if (profiler && profiler->IsProfilingEnabled())
721         {
722             profiler->Print(std::cout);
723         }
724 #endif
725     }
726 
GetInputBindingInfo(unsigned int inputIndex=0u) const727     const armnn::BindingPointInfo& GetInputBindingInfo(unsigned int inputIndex = 0u) const
728     {
729         CheckInputIndexIsValid(inputIndex);
730         return m_InputBindings[inputIndex];
731     }
732 
GetInputBindingInfos() const733     const std::vector<armnn::BindingPointInfo>& GetInputBindingInfos() const
734     {
735         return m_InputBindings;
736     }
737 
GetOutputBindingInfo(unsigned int outputIndex=0u) const738     const armnn::BindingPointInfo& GetOutputBindingInfo(unsigned int outputIndex = 0u) const
739     {
740         CheckOutputIndexIsValid(outputIndex);
741         return m_OutputBindings[outputIndex];
742     }
743 
GetOutputBindingInfos() const744     const std::vector<armnn::BindingPointInfo>& GetOutputBindingInfos() const
745     {
746         return m_OutputBindings;
747     }
748 
GetQuantizationParams(unsigned int outputIndex=0u) const749     QuantizationParams GetQuantizationParams(unsigned int outputIndex = 0u) const
750     {
751         CheckOutputIndexIsValid(outputIndex);
752         return std::make_pair(m_OutputBindings[outputIndex].second.GetQuantizationScale(),
753                               m_OutputBindings[outputIndex].second.GetQuantizationOffset());
754     }
755 
GetInputQuantizationParams(unsigned int inputIndex=0u) const756     QuantizationParams GetInputQuantizationParams(unsigned int inputIndex = 0u) const
757     {
758         CheckInputIndexIsValid(inputIndex);
759         return std::make_pair(m_InputBindings[inputIndex].second.GetQuantizationScale(),
760                               m_InputBindings[inputIndex].second.GetQuantizationOffset());
761     }
762 
GetAllQuantizationParams() const763     std::vector<QuantizationParams> GetAllQuantizationParams() const
764     {
765         std::vector<QuantizationParams> quantizationParams;
766         for (unsigned int i = 0u; i < m_OutputBindings.size(); i++)
767         {
768             quantizationParams.push_back(GetQuantizationParams(i));
769         }
770         return quantizationParams;
771     }
772 
CreateWorkingMemHandle()773     std::unique_ptr<armnn::experimental::IWorkingMemHandle> CreateWorkingMemHandle()
774     {
775         return m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier);
776     }
777 
778 private:
779     armnn::NetworkId m_NetworkIdentifier;
780     std::shared_ptr<armnn::IRuntime> m_Runtime;
781 #if !defined(ARMNN_DISABLE_THREADS)
782     std::unique_ptr<armnn::Threadpool> m_Threadpool;
783 #endif
784 
785     std::vector<armnn::BindingPointInfo> m_InputBindings;
786     std::vector<armnn::BindingPointInfo> m_OutputBindings;
787     bool m_EnableProfiling;
788     armnn::ProfilingDetailsMethod m_ProfilingDetailsMethod;
789     std::string m_DynamicBackendsPath;
790     bool m_ImportInputsIfAligned;
791 
792     template<typename TContainer>
MakeInputTensors(const std::vector<TContainer> & inputDataContainers)793     armnn::InputTensors MakeInputTensors(const std::vector<TContainer>& inputDataContainers)
794     {
795         return armnnUtils::MakeInputTensors(m_InputBindings, inputDataContainers);
796     }
797 
798     template<typename TContainer>
MakeOutputTensors(std::vector<TContainer> & outputDataContainers)799     armnn::OutputTensors MakeOutputTensors(std::vector<TContainer>& outputDataContainers)
800     {
801         return armnnUtils::MakeOutputTensors(m_OutputBindings, outputDataContainers);
802     }
803 };
804