xref: /aosp_15_r20/external/armnn/tests/ExecuteNetwork/ArmNNExecutor.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 
7 #include "ArmNNExecutor.hpp"
8 #include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
9 
10 #include <armnn/IAsyncExecutionCallback.hpp>
11 #include <AsyncExecutionCallback.hpp>
12 
13 
14 using namespace armnn;
15 using namespace std::chrono;
16 
ArmNNExecutor(const ExecuteNetworkParams & params,armnn::IRuntime::CreationOptions runtimeOptions)17 ArmNNExecutor::ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime::CreationOptions runtimeOptions)
18 : m_Params(params)
19 {
20     runtimeOptions.m_EnableGpuProfiling = params.m_EnableProfiling;
21     runtimeOptions.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
22 
23     // Create/Get the static ArmNN Runtime. Note that the m_Runtime will be shared by all ArmNNExecutor
24     // instances so the RuntimeOptions cannot be altered for different ArmNNExecutor instances.
25     m_Runtime = GetRuntime(runtimeOptions);
26 
27     auto parser = CreateParser();
28     auto network = parser->CreateNetwork(m_Params);
29     auto optNet = OptimizeNetwork(network.get());
30 
31     m_IOInfo = GetIOInfo(optNet.get());
32 
33     armnn::ProfilingDetailsMethod profilingDetailsMethod = ProfilingDetailsMethod::Undefined;
34     if (params.m_OutputDetailsOnlyToStdOut)
35     {
36         profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly;
37     }
38     else if (params.m_OutputDetailsToStdOut)
39     {
40         profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents;
41     }
42 
43     INetworkProperties networkProperties{m_Params.m_Concurrent,
44                                          MemorySource::Undefined,
45                                          MemorySource::Undefined,
46                                          params.m_EnableProfiling,
47                                          profilingDetailsMethod};
48 
49     std::string errorMsg;
50     Status status = m_Runtime->LoadNetwork(m_NetworkId, std::move(optNet), errorMsg, networkProperties);
51     if (status != Status::Success)
52     {
53         std::string message("Failed to create Arm NN Executor: ");
54         message.append(errorMsg);
55         // Throwing an exception at this point in the constructor causes lots of problems. We'll instead mark this
56         // executor as not constructed.
57         ARMNN_LOG(fatal) << message;
58         m_constructionFailed = true;
59         return;
60     }
61 
62     SetupInputsAndOutputs();
63 
64     if (m_Params.m_Iterations > 1)
65     {
66         std::stringstream msg;
67         msg << "Network will be executed " << m_Params.m_Iterations;
68         if (m_Params.m_Concurrent)
69         {
70             msg << " times in an asynchronous manner. ";
71         }
72         else
73         {
74             msg << " times successively. ";
75         }
76         msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
77                "cover each execution.";
78         ARMNN_LOG(info) << msg.str();
79     }
80 
81     if (m_Params.m_GenerateTensorData)
82     {
83         ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
84     }
85 
86     if (m_Params.m_DontPrintOutputs)
87     {
88         ARMNN_LOG(info) << "Printing outputs to console is disabled.";
89     }
90 }
91 
ExecuteAsync()92 void ArmNNExecutor::ExecuteAsync()
93 {
94 #if !defined(ARMNN_DISABLE_THREADS)
95     std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
96     std::unique_ptr<armnn::Threadpool> threadpool;
97     armnn::AsyncCallbackManager callbackManager;
98     std::unordered_map<armnn::InferenceId, const armnn::OutputTensors*> inferenceOutputMap;
99 
100     for (size_t i = 0; i < m_Params.m_ThreadPoolSize; ++i)
101     {
102         memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkId));
103     }
104 
105     threadpool = std::make_unique<armnn::Threadpool>(m_Params.m_ThreadPoolSize,
106                                                      m_Runtime,
107                                                      memHandles);
108 
109     ARMNN_LOG(info) << "Asynchronous Execution with Arm NN thread pool...  \n";
110     // Declare the latest and earliest inference times here to be used when calculating overall time
111     std::chrono::high_resolution_clock::time_point earliestStartTime =
112             std::chrono::high_resolution_clock::time_point::max();
113     std::chrono::high_resolution_clock::time_point latestEndTime =
114             std::chrono::high_resolution_clock::now();
115 
116     // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
117     // LoadedNetwork with each scheduled inference having a specific priority
118     for (size_t i = 0; i < m_Params.m_Iterations; ++i)
119     {
120         std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
121 
122         std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
123         inferenceOutputMap.insert({cb->GetInferenceId(), &m_OutputTensorsVec[i]});
124         threadpool->Schedule(m_NetworkId,
125                              m_InputTensorsVec[i],
126                              m_OutputTensorsVec[i],
127                              armnn::QosExecPriority::Medium,
128                              cb);
129     }
130 
131     // Check the results
132     for (size_t iteration = 0; iteration < m_Params.m_Iterations; ++iteration)
133     {
134         auto cb = callbackManager.GetNotifiedCallback();
135 
136         // Get the results
137         if (earliestStartTime > cb->GetStartTime())
138         {
139             earliestStartTime = cb->GetStartTime();
140         }
141         if (latestEndTime < cb->GetEndTime())
142         {
143             latestEndTime = cb->GetEndTime();
144         }
145 
146         auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
147         auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
148         auto inferenceDuration = endTime - startTime;
149         CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
150         if(!m_Params.m_DontPrintOutputs)
151         {
152             const armnn::OutputTensors* out = inferenceOutputMap[cb->GetInferenceId()];
153             PrintOutputTensors(out, iteration);
154         }
155     }
156 
157     // Print duration difference between overallStartTime and overallEndTime
158     auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
159     auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
160     auto totalInferenceDuration = overallEndTime - overallStartTime;
161     ARMNN_LOG(info) << "Overall Inference time: " << std::setprecision(2)
162                     << std::fixed << totalInferenceDuration.count() << " ms\n";
163 
164 #endif
165 }
166 
ExecuteSync()167 void ArmNNExecutor::ExecuteSync()
168 {
169     for (size_t x = 0; x < m_Params.m_Iterations; x++)
170     {
171         std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
172 
173         const auto start_time = armnn::GetTimeNow();
174         armnn::Status ret;
175         if (m_Params.m_ImportInputsIfAligned)
176         {
177              ret = m_Runtime->EnqueueWorkload(m_NetworkId,
178                                               m_InputTensorsVec[x],
179                                               m_OutputTensorsVec[x],
180                                               m_ImportedInputIds[x],
181                                               m_ImportedOutputIds[x]);
182         }
183         else
184         {
185             ret = m_Runtime->EnqueueWorkload(m_NetworkId,
186                                              m_InputTensorsVec[x],
187                                              m_OutputTensorsVec[x]);
188         }
189 
190         const auto inferenceDuration = armnn::GetTimeDuration(start_time);
191 
192         // If profiling is enabled print out the results
193         if(profiler && profiler->IsProfilingEnabled() && x == (m_Params.m_Iterations - 1))
194         {
195             profiler->Print(std::cout);
196         }
197 
198         if(ret == armnn::Status::Failure)
199         {
200             throw armnn::Exception("IRuntime::EnqueueWorkload failed");
201         }
202 
203         if(!m_Params.m_DontPrintOutputs)
204         {
205             PrintOutputTensors(&m_OutputTensorsVec[x],  x);
206         }
207 
208         // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
209         CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
210     }
211 }
212 
Execute()213 std::vector<const void*> ArmNNExecutor::Execute()
214 {
215     if(m_Params.m_ThreadPoolSize == 0)
216     {
217         ExecuteSync();
218     }
219     else
220     {
221         ExecuteAsync();
222     }
223     std::vector<const void*> results;
224     for (auto& output : m_OutputStorage)
225     {
226         results.push_back(output.m_Mem);
227     }
228 
229     return results;
230 }
231 
PrintNetworkInfo()232 void ArmNNExecutor::PrintNetworkInfo()
233 {
234     const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
235                                                  m_Params.m_InputNames :
236                                                  m_IOInfo.m_InputNames;
237     std::stringstream ss;
238     ss << "===== Network Info =====\n";
239     ss << "Inputs in order:\n";
240     for (const auto& inputName : inputNames)
241     {
242         const auto inputInfo = m_IOInfo.m_InputInfoMap[inputName].second;
243         ss <<  inputName << ", " << inputInfo.GetShape() << ", " << GetDataTypeName(inputInfo.GetDataType());
244         if (inputInfo.IsQuantized())
245         {
246             ss << " Quantization Offset: " << inputInfo.GetQuantizationOffset();
247             if (inputInfo.HasMultipleQuantizationScales())
248             {
249                 ss << " Quantization scales: ";
250                 for (const auto scale: inputInfo.GetQuantizationScales())
251                 {
252                     ss << scale << ", ";
253                 }
254             }
255             else
256             {
257                 ss << " Quantization scale: " << inputInfo.GetQuantizationScale();
258             }
259         }
260         ss  << "\n";
261     }
262 
263     ss << "Outputs in order:\n";
264     for (const auto& outputName : m_IOInfo.m_OutputNames)
265     {
266         const auto outputInfo = m_IOInfo.m_OutputInfoMap[outputName].second;
267         ss <<  outputName << ", " << outputInfo.GetShape() << ", " << GetDataTypeName(outputInfo.GetDataType());
268         if (outputInfo.IsQuantized())
269         {
270             ss << " Quantization Offset: " << outputInfo.GetQuantizationOffset();
271             if (outputInfo.HasMultipleQuantizationScales())
272             {
273                 ss << " Quantization scales: ";
274                 for (const auto scale: outputInfo.GetQuantizationScales())
275                 {
276                     ss << scale << ", ";
277                 }
278             }
279             else
280             {
281                 ss << " Quantization scale: " << outputInfo.GetQuantizationScale();
282             }
283         }
284         ss  << "\n";
285     }
286 
287     std::cout << ss.str() << std::endl;
288 }
289 
SetupInputsAndOutputs()290 void ArmNNExecutor::SetupInputsAndOutputs()
291 {
292     const unsigned int noOfInputs = m_IOInfo.m_InputNames.size();
293 
294     if (m_Params.m_InputNames.size() != 0 && m_Params.m_InputNames.size() != noOfInputs)
295     {
296         LogAndThrow("Number of input names does not match number of inputs");
297     }
298 
299     const unsigned int inputFilePaths = m_Params.m_InputTensorDataFilePaths.size();
300     const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
301                                                  m_Params.m_InputNames :
302                                                  m_IOInfo.m_InputNames;
303     unsigned int noInputSets = 1;
304 
305     if (inputFilePaths != 0)
306     {
307         if (inputFilePaths % noOfInputs != 0)
308         {
309             LogAndThrow("Number of input files: " + std::to_string(inputFilePaths) +
310                         " not compatible with number of inputs: " + std::to_string(noOfInputs));
311         }
312         noInputSets = inputFilePaths / noOfInputs;
313         if (noInputSets != 1 && m_Params.m_ReuseBuffers)
314         {
315             LogAndThrow("Specifying multiple sets of inputs not compatible with ReuseBuffers");
316         }
317     }
318 
319     const unsigned int noOfOutputs = m_IOInfo.m_OutputNames.size();
320     const unsigned int outputFilePaths = m_Params.m_OutputTensorFiles.size();
321     unsigned int noOutputSets = 1;
322 
323     if (outputFilePaths != 0)
324     {
325         if (outputFilePaths % noOfOutputs != 0)
326         {
327             LogAndThrow("Number of output files: " + std::to_string(outputFilePaths) +
328                         ", not compatible with number of outputs: " + std::to_string(noOfOutputs));
329         }
330         noOutputSets = outputFilePaths / noOfOutputs;
331 
332         if (noOutputSets != 1 && m_Params.m_ReuseBuffers)
333         {
334             LogAndThrow("Specifying multiple sets of outputs not compatible with ReuseBuffers");
335         }
336     }
337 
338     if (m_Params.m_ThreadPoolSize != 0)
339     {
340         // The current implementation of the Threadpool does not allow binding of outputs to a thread
341         // So to ensure no two threads write to the same output at the same time, no output can be reused
342         noOutputSets = m_Params.m_Iterations;
343     }
344 
345     if (m_Params.m_InputTensorDataFilePaths.size() > noOfInputs)
346     {
347         ARMNN_LOG(info) << "Given network has " << noOfInputs << " input/s. One input-tensor-data file is required "
348                         << "for each input. The user provided "
349                         << m_Params.m_InputTensorDataFilePaths.size()
350                         << " input-tensor-data file/s which will be used to fill the input/s.\n";
351     }
352 
353     unsigned int inputCount = 0;
354     for(unsigned int inputSet = 0; inputSet < noInputSets; ++inputSet)
355     {
356         armnn::InputTensors inputTensors;
357         for (const auto& inputName: inputNames)
358         {
359             armnn::BindingPointInfo bindingPointInfo;
360             try
361             {
362                 bindingPointInfo = m_IOInfo.m_InputInfoMap.at(inputName);
363             }
364             catch (const std::out_of_range& e)
365             {
366                 LogAndThrow("Input with inputName: " + inputName + " not found.");
367             }
368 
369             const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
370             auto newInfo = armnn::TensorInfo{tensorInfo.GetShape(), tensorInfo.GetDataType(),
371                                              tensorInfo.GetQuantizationScale(),
372                                              tensorInfo.GetQuantizationOffset(),
373                                              true};
374 
375             m_InputStorage.emplace_back(IOStorage{tensorInfo.GetNumBytes()});
376 
377             const int bindingId = bindingPointInfo.first;
378             inputTensors.emplace_back(bindingId, armnn::ConstTensor{newInfo, m_InputStorage.back().m_Mem});
379 
380             const armnn::Optional<std::string> dataFile = m_Params.m_GenerateTensorData ?
381                                                           armnn::EmptyOptional() :
382                                                           armnn::MakeOptional<std::string>(
383                                                                   m_Params.m_InputTensorDataFilePaths.at(inputCount++));
384 
385             switch (tensorInfo.GetDataType())
386             {
387                 case armnn::DataType::Float32:
388                 {
389                     auto typedTensor = reinterpret_cast<float*>(m_InputStorage.back().m_Mem);
390                     PopulateTensorWithData<float>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
391                     break;
392                 }
393                 case armnn::DataType::QSymmS16:
394                 {
395                     auto typedTensor = reinterpret_cast<int16_t*>(m_InputStorage.back().m_Mem);
396                     PopulateTensorWithData<int16_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
397                     break;
398                 }
399                 case armnn::DataType::QSymmS8:
400                 case armnn::DataType::QAsymmS8:
401                 {
402                     auto typedTensor = reinterpret_cast<int8_t*>(m_InputStorage.back().m_Mem);
403                     PopulateTensorWithData<int8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
404                     break;
405                 }
406                 case armnn::DataType::QAsymmU8:
407                 {
408                     auto typedTensor = reinterpret_cast<uint8_t*>(m_InputStorage.back().m_Mem);
409                     PopulateTensorWithData<uint8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
410                     break;
411                 }
412                 case armnn::DataType::Signed32:
413                 {
414                     auto typedTensor = reinterpret_cast<int32_t*>(m_InputStorage.back().m_Mem);
415                     PopulateTensorWithData<int32_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
416                     break;
417                 }
418                 default:
419                 {
420                     LogAndThrow("Unexpected DataType");
421                 }
422             }
423 
424         }
425 
426         if (m_Params.m_ImportInputsIfAligned)
427         {
428             m_ImportedInputIds.push_back(
429                 m_Runtime->ImportInputs(m_NetworkId, inputTensors, armnn::MemorySource::Malloc));
430         }
431         m_InputTensorsVec.emplace_back(inputTensors);
432     }
433 
434     for(unsigned int outputSet = 0; outputSet < noOutputSets; ++outputSet)
435     {
436         armnn::OutputTensors outputTensors;
437         for (const auto& output: m_IOInfo.m_OutputInfoMap)
438         {
439             const armnn::BindingPointInfo& bindingPointInfo = output.second;
440             const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
441 
442             m_OutputStorage.emplace_back(tensorInfo.GetNumBytes());
443             outputTensors.emplace_back(bindingPointInfo.first, armnn::Tensor{tensorInfo, m_OutputStorage.back().m_Mem});
444         }
445         m_OutputTensorsVec.emplace_back(outputTensors);
446         if (m_Params.m_ImportInputsIfAligned)
447         {
448             m_ImportedOutputIds.push_back(
449                     m_Runtime->ImportOutputs(m_NetworkId, m_OutputTensorsVec.back(), armnn::MemorySource::Malloc));
450         }
451     }
452 
453     // If iterations > noSets fill the remaining iterations repeating the given files
454     // If iterations < noSets just ignore the extra files
455     const unsigned int remainingInputSets = (m_Params.m_Iterations > noInputSets)
456                                           ? m_Params.m_Iterations - noInputSets
457                                           : 0;
458     for (unsigned int i = 0; i < remainingInputSets; ++i)
459     {
460         m_InputTensorsVec.push_back(m_InputTensorsVec[i % noInputSets]);
461         if (m_Params.m_ImportInputsIfAligned)
462         {
463             m_ImportedInputIds.push_back(m_ImportedInputIds[i % noInputSets]);
464         }
465     }
466 
467     const unsigned int remainingOutputSets = (m_Params.m_Iterations > noOutputSets)
468                                            ? m_Params.m_Iterations - noOutputSets
469                                            : 0;
470     for (unsigned int i = 0; i < remainingOutputSets; ++i)
471     {
472         m_OutputTensorsVec.push_back(m_OutputTensorsVec[i % noOutputSets]);
473         if (m_Params.m_ImportInputsIfAligned)
474         {
475             m_ImportedOutputIds.push_back(m_ImportedOutputIds[i % noOutputSets]);
476         }
477     }
478 }
479 
GetIOInfo(armnn::IOptimizedNetwork * optNet)480 ArmNNExecutor::IOInfo ArmNNExecutor::GetIOInfo(armnn::IOptimizedNetwork* optNet)
481 {
482     struct IOStrategy : armnn::IStrategy
483     {
484         void ExecuteStrategy(const armnn::IConnectableLayer* layer,
485                              const armnn::BaseDescriptor& descriptor,
486                              const std::vector<armnn::ConstTensor>& constants,
487                              const char* name,
488                              const armnn::LayerBindingId id = 0) override
489         {
490             armnn::IgnoreUnused(descriptor, constants, id);
491             switch (layer->GetType())
492             {
493                 case armnn::LayerType::Input:
494                 {
495                     m_IOInfo.m_InputNames.emplace_back(name);
496                     m_IOInfo.m_InputInfoMap[name] = {id, layer->GetOutputSlot(0).GetTensorInfo()};
497                     break;
498                 }
499                 case armnn::LayerType::Output:
500                 {
501                     m_IOInfo.m_OutputNames.emplace_back(name);
502                     m_IOInfo.m_OutputInfoMap[name] = {id, layer->GetInputSlot(0).GetConnection()->GetTensorInfo()};
503                     break;
504                 }
505                 default: {}
506             }
507         }
508         IOInfo m_IOInfo;
509     };
510 
511     IOStrategy ioStrategy;
512     optNet->ExecuteStrategy(ioStrategy);
513 
514     return ioStrategy.m_IOInfo;
515 }
516 
OptimizeNetwork(armnn::INetwork * network)517 armnn::IOptimizedNetworkPtr ArmNNExecutor::OptimizeNetwork(armnn::INetwork* network)
518 {
519     armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
520 
521     armnn::OptimizerOptionsOpaque options;
522     options.SetReduceFp32ToFp16(m_Params.m_EnableFp16TurboMode);
523     options.SetDebugEnabled(m_Params.m_PrintIntermediate);
524     options.SetDebugToFileEnabled(m_Params.m_PrintIntermediateOutputsToFile);
525     options.SetShapeInferenceMethod(m_Params.m_InferOutputShape ?
526                                     armnn::ShapeInferenceMethod::InferAndValidate :
527                                     armnn::ShapeInferenceMethod::ValidateOnly);
528     options.SetProfilingEnabled(m_Params.m_EnableProfiling);
529     options.SetAllowExpandedDims(m_Params.m_AllowExpandedDims);
530 
531     armnn::BackendOptions gpuAcc("GpuAcc",
532                                  {
533                                          { "FastMathEnabled", m_Params.m_EnableFastMath },
534                                          { "SaveCachedNetwork", m_Params.m_SaveCachedNetwork },
535                                          { "CachedNetworkFilePath", m_Params.m_CachedNetworkFilePath },
536                                          { "MLGOTuningFilePath", m_Params.m_MLGOTuningFilePath }
537                                  });
538 
539     armnn::BackendOptions cpuAcc("CpuAcc",
540                                  {
541                                          { "FastMathEnabled", m_Params.m_EnableFastMath },
542                                          { "NumberOfThreads", m_Params.m_NumberOfThreads }
543                                  });
544     options.AddModelOption(gpuAcc);
545     options.AddModelOption(cpuAcc);
546     // The shapeInferenceMethod and allowExpandedDims values have to be added to the model options
547     // because these are what are passed to the OptimizeSubgraphViews method and are used to create
548     // the new optimized INetwork that method uses
549     armnn::BackendOptions allowExDimOpt("AllowExpandedDims",
550                                         {
551                                                 { "AllowExpandedDims", m_Params.m_AllowExpandedDims }
552                                         });
553     options.AddModelOption(allowExDimOpt);
554     armnn::BackendOptions shapeInferOpt("ShapeInferenceMethod",
555                                         {
556                                                 { "InferAndValidate", m_Params.m_InferOutputShape }
557                                         });
558     options.AddModelOption(shapeInferOpt);
559 
560     const auto optimization_start_time = armnn::GetTimeNow();
561     optNet = armnn::Optimize(*network, m_Params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
562 
563     ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
564                     << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n";
565 
566     if (!optNet)
567     {
568         LogAndThrow("Optimize returned nullptr");
569     }
570 
571     // If v,visualize-optimized-model is enabled then construct a file name for the dot file.
572     if (m_Params.m_EnableLayerDetails)
573     {
574         fs::path filename = m_Params.m_ModelPath;
575         filename.replace_extension("dot");
576         std::fstream file(filename.c_str(), std::ios_base::out);
577         optNet->SerializeToDot(file);
578     }
579 
580     return optNet;
581 }
582 
CreateParser()583 std::unique_ptr<ArmNNExecutor::IParser> ArmNNExecutor::CreateParser()
584 {
585     const fs::path modelFilename = m_Params.m_ModelPath;
586     const std::string modelExtension = modelFilename.extension();
587 
588     m_Params.m_IsModelBinary = modelExtension != ".json";
589     std::unique_ptr<IParser> parser = nullptr;
590     // Forward to implementation based on the parser type
591     if (modelExtension == ".armnn")
592     {
593 #if defined(ARMNN_SERIALIZER)
594         parser = std::make_unique<ArmNNDeserializer>();
595 #else
596         LogAndThrow("Not built with serialization support.");
597 #endif
598     }
599     else if (modelExtension == ".tflite")
600     {
601 #if defined(ARMNN_TF_LITE_PARSER)
602         parser = std::make_unique<TfliteParser>(m_Params);
603 #else
604         LogAndThrow("Not built with Tensorflow-Lite parser support.");
605 #endif
606     }
607     else if (modelExtension == ".onnx")
608     {
609 #if defined(ARMNN_ONNX_PARSER)
610         parser = std::make_unique<OnnxParser>();
611 #else
612         LogAndThrow("Not built with Onnx parser support.");
613 #endif
614     }
615 
616     return parser;
617 }
618 
PrintOutputTensors(const armnn::OutputTensors * outputTensors,unsigned int iteration)619 void ArmNNExecutor::PrintOutputTensors(const armnn::OutputTensors* outputTensors,
620                                        unsigned int iteration)
621 {
622     auto findOutputName = [&](const armnn::LayerBindingId id)
623     {
624         for (auto it = m_IOInfo.m_OutputInfoMap.begin(); it != m_IOInfo.m_OutputInfoMap.end(); ++it)
625         {
626             if (id == it->second.first)
627             {
628                 return it->first;
629             }
630         }
631         return std::string{};
632     };
633 
634     unsigned int outputIndex = 0;
635     unsigned int numOutputs = outputTensors->size();
636     for (const auto& output: *outputTensors)
637     {
638         const auto bindingName = findOutputName(output.first);
639         // We've made sure before that the number of output files either equals numOutputs, in which
640         // case we override those files when processing the results of each iteration (only the result
641         // of the last iteration will be stored), or there are enough
642         // output files for each output of each iteration.
643         size_t outputFileIndex = iteration * numOutputs + outputIndex;
644         if (!m_Params.m_OutputTensorFiles.empty())
645         {
646             outputFileIndex = outputFileIndex % m_Params.m_OutputTensorFiles.size();
647             ARMNN_LOG(info) << "Writing output: " << bindingName << " bindingId: '"
648                             << output.first
649                             << "' of iteration: " << iteration + 1 << " to file: '"
650                             << m_Params.m_OutputTensorFiles[outputFileIndex] << "'";
651         }
652 
653         const armnn::Optional<std::string> outputTensorFile = m_Params.m_OutputTensorFiles.empty() ?
654                                                               armnn::EmptyOptional() :
655                                                               armnn::MakeOptional<std::string>(
656                                                                       m_Params.m_OutputTensorFiles[outputFileIndex]);
657 
658         OutputWriteInfo outputWriteInfo
659         {
660             outputTensorFile,
661             bindingName,
662             output.second,
663             !m_Params.m_DontPrintOutputs
664         };
665 
666         std::cout << bindingName << ": ";
667         std::vector<float> values;
668         switch (output.second.GetDataType())
669         {
670             case armnn::DataType::Float32:
671             {
672                 PrintTensor<float>(outputWriteInfo, "%f ");
673                 break;
674             }
675 
676             case armnn::DataType::Signed32:
677             {
678                 PrintTensor<int>(outputWriteInfo, "%d ");
679                 break;
680             }
681             case armnn::DataType::QSymmS8:
682             case armnn::DataType::QAsymmS8:
683             {
684                 PrintTensor<int8_t>(outputWriteInfo, "%d ");
685                 break;
686             }
687             case armnn::DataType::QAsymmU8:
688             {
689                 PrintTensor<uint8_t>(outputWriteInfo, "%d ");
690                 break;
691             }
692             case armnn::DataType::Float16:
693             case armnn::DataType::QSymmS16:
694             case armnn::DataType::BFloat16:
695             case armnn::DataType::Boolean:
696             case armnn::DataType::Signed64:
697             default:
698             {
699                 LogAndThrow("Unexpected DataType");
700             }
701         }
702         std::cout << "\n";
703         ++outputIndex;
704     }
705 }
706 
CompareAndPrintResult(std::vector<const void * > otherOutput)707 void ArmNNExecutor::CompareAndPrintResult(std::vector<const void*> otherOutput)
708 {
709     unsigned int index = 0;
710     std::string typeString;
711     for (const auto& outputTensors: m_OutputTensorsVec)
712     {
713         for (const auto& outputTensor: outputTensors)
714         {
715             size_t size = outputTensor.second.GetNumBytes();
716             double result = ComputeByteLevelRMSE(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
717             std::cout << "Byte level root mean square error: " << result << "\n";
718         }
719     }
720 }
721 #if defined(ARMNN_SERIALIZER)
ArmNNDeserializer()722 ArmNNExecutor::ArmNNDeserializer::ArmNNDeserializer() : m_Parser(armnnDeserializer::IDeserializer::Create()){}
723 
CreateNetwork(const ExecuteNetworkParams & params)724 armnn::INetworkPtr ArmNNExecutor::ArmNNDeserializer::CreateNetwork(const ExecuteNetworkParams& params)
725 {
726     const std::string& modelPath = params.m_ModelPath;
727 
728     std::ifstream file(modelPath, std::ios::binary);
729     return m_Parser->CreateNetworkFromBinary(file);
730 }
731 
732 armnn::BindingPointInfo
GetInputBindingPointInfo(size_t,const std::string & inputName)733 ArmNNExecutor::ArmNNDeserializer::GetInputBindingPointInfo(size_t, const std::string& inputName)
734 {
735     armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkInputBindingInfo(0, inputName);
736     return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
737 }
738 
739 armnn::BindingPointInfo
GetOutputBindingPointInfo(size_t,const std::string & outputName)740 ArmNNExecutor::ArmNNDeserializer::GetOutputBindingPointInfo(size_t, const std::string& outputName)
741 {
742     armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkOutputBindingInfo(0, outputName);
743     return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
744 }
745 #endif
746 
747 #if defined(ARMNN_TF_LITE_PARSER)
TfliteParser(const ExecuteNetworkParams & params)748 ArmNNExecutor::TfliteParser::TfliteParser(const ExecuteNetworkParams& params)
749 {
750     armnnTfLiteParser::ITfLiteParser::TfLiteParserOptions options;
751     options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
752     options.m_InferAndValidate = params.m_InferOutputShape;
753     options.m_AllowExpandedDims = params.m_AllowExpandedDims;
754 
755     m_Parser = armnnTfLiteParser::ITfLiteParser::Create(options);
756 }
757 
CreateNetwork(const ExecuteNetworkParams & params)758 armnn::INetworkPtr ArmNNExecutor::TfliteParser::CreateNetwork(const ExecuteNetworkParams& params)
759 {
760     const std::string& modelPath = params.m_ModelPath;
761     return m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str());
762 }
763 
GetInputBindingPointInfo(size_t subgraphId,const std::string & inputName)764 armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetInputBindingPointInfo(size_t subgraphId,
765                                                                               const std::string& inputName)
766 {
767     return m_Parser->GetNetworkInputBindingInfo(subgraphId, inputName);
768 }
769 
GetOutputBindingPointInfo(size_t subgraphId,const std::string & outputName)770 armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetOutputBindingPointInfo(size_t subgraphId,
771                                                                                const std::string& outputName)
772 {
773     return m_Parser->GetNetworkOutputBindingInfo(subgraphId, outputName);
774 }
775 #endif
776 
777 
778 #if defined(ARMNN_ONNX_PARSER)
OnnxParser()779 ArmNNExecutor::OnnxParser::OnnxParser() : m_Parser(armnnOnnxParser::IOnnxParser::Create()){}
780 
CreateNetwork(const ExecuteNetworkParams & params)781 armnn::INetworkPtr ArmNNExecutor::OnnxParser::CreateNetwork(const ExecuteNetworkParams& params)
782 {
783     const std::string& modelPath = params.m_ModelPath;
784     m_Parser = armnnOnnxParser::IOnnxParser::Create();
785     std::map<std::string, armnn::TensorShape> inputShapes;
786     if(!params.m_InputTensorShapes.empty())
787     {
788         const size_t numInputShapes = params.m_InputTensorShapes.size();
789         const size_t numInputBindings = params.m_InputNames.size();
790         if(numInputShapes < numInputBindings)
791         {
792             throw armnn::Exception(
793                     fmt::format("Not every input has its tensor shape specified: expected={0}, got={1}",
794                                 numInputBindings, numInputShapes));
795         }
796 
797         for (size_t i = 0; i < numInputShapes; i++)
798         {
799             inputShapes[params.m_InputNames[i]] = params.m_InputTensorShapes[i];
800         }
801 
802         return params.m_IsModelBinary ?
803                m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
804                m_Parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes);
805     }
806 
807     // Handle text and binary input differently by calling the corresponding parser function
808     return params.m_IsModelBinary ?
809            m_Parser->CreateNetworkFromBinaryFile(params.m_ModelPath.c_str()) :
810            m_Parser->CreateNetworkFromTextFile(params.m_ModelPath.c_str());
811 }
812 
GetInputBindingPointInfo(size_t,const std::string & inputName)813 armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetInputBindingPointInfo(size_t, const std::string& inputName)
814 {
815     return m_Parser->GetNetworkInputBindingInfo(inputName);
816 }
817 
GetOutputBindingPointInfo(size_t,const std::string & outputName)818 armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetOutputBindingPointInfo(size_t, const std::string& outputName)
819 {
820     return m_Parser->GetNetworkOutputBindingInfo(outputName);
821 }
822 #endif
823