1 // 2 // Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. 3 // SPDX-License-Identifier: MIT 4 // 5 6 #pragma once 7 8 9 #include <armnn/ArmNN.hpp> 10 11 #if !defined(ARMNN_DISABLE_THREADS) 12 #include <armnn/Threadpool.hpp> 13 #include <common/include/IgnoreUnused.hpp> 14 #endif 15 16 #include <armnn/Logging.hpp> 17 #include <armnn/utility/Timer.hpp> 18 #include <armnn/BackendRegistry.hpp> 19 #include <armnn/utility/Assert.hpp> 20 #include <armnn/utility/NumericCast.hpp> 21 22 #include <armnnUtils/TContainer.hpp> 23 #include "NetworkExecutionUtils/NetworkExecutionUtils.hpp" 24 25 #include <common/include/ProfilingGuid.hpp> 26 27 #if defined(ARMNN_SERIALIZER) 28 #include "armnnDeserializer/IDeserializer.hpp" 29 #endif 30 #if defined(ARMNN_TF_LITE_PARSER) 31 #include <armnnTfLiteParser/ITfLiteParser.hpp> 32 #endif 33 #if defined(ARMNN_ONNX_PARSER) 34 #include <armnnOnnxParser/IOnnxParser.hpp> 35 #endif 36 37 #include <armnnUtils/Filesystem.hpp> 38 #include <HeapProfiling.hpp> 39 #include <TensorIOUtils.hpp> 40 41 #include "armnn/utility/StringUtils.hpp" 42 #include <cxxopts/cxxopts.hpp> 43 #include "CxxoptsUtils.hpp" 44 #include <fmt/format.h> 45 #include <mapbox/variant.hpp> 46 47 #include <algorithm> 48 #include <iterator> 49 #include <fstream> 50 #include <map> 51 #include <string> 52 #include <vector> 53 #include <type_traits> 54 55 namespace InferenceModelInternal 56 { 57 using BindingPointInfo = armnn::BindingPointInfo; 58 59 using QuantizationParams = std::pair<float,int32_t>; 60 61 struct Params 62 { 63 std::string m_ModelPath; 64 std::vector<std::string> m_InputBindings; 65 std::vector<armnn::TensorShape> m_InputShapes; 66 std::vector<std::string> m_OutputBindings; 67 std::vector<armnn::BackendId> m_ComputeDevices; 68 std::string m_DynamicBackendsPath; 69 size_t m_SubgraphId; 70 bool m_AllowExpandedDims; 71 bool m_IsModelBinary; 72 bool m_VisualizePostOptimizationModel; 73 bool m_EnableFp16TurboMode; 74 bool m_EnableBf16TurboMode; 75 bool m_PrintIntermediateLayers; 76 bool m_PrintIntermediateLayersToFile; 77 bool m_ParseUnsupported; 78 bool m_InferOutputShape; 79 bool m_EnableFastMath; 80 bool m_SaveCachedNetwork; 81 bool m_OutputDetailsToStdOut; 82 bool m_OutputDetailsOnlyToStdOut; 83 std::string m_CachedNetworkFilePath; 84 unsigned int m_NumberOfThreads; 85 std::string m_MLGOTuningFilePath; 86 bool m_AsyncEnabled; 87 size_t m_ThreadPoolSize; 88 bool m_ImportInputsIfAligned; 89 90 ParamsInferenceModelInternal::Params91 Params() 92 : m_ComputeDevices{} 93 , m_SubgraphId(0) 94 , m_AllowExpandedDims(false) 95 , m_IsModelBinary(true) 96 , m_VisualizePostOptimizationModel(false) 97 , m_EnableFp16TurboMode(false) 98 , m_EnableBf16TurboMode(false) 99 , m_PrintIntermediateLayers(false) 100 , m_PrintIntermediateLayersToFile(false) 101 , m_ParseUnsupported(false) 102 , m_InferOutputShape(false) 103 , m_EnableFastMath(false) 104 , m_SaveCachedNetwork(false) 105 , m_OutputDetailsToStdOut(false) 106 , m_OutputDetailsOnlyToStdOut(false) 107 , m_CachedNetworkFilePath("") 108 , m_NumberOfThreads(0) 109 , m_MLGOTuningFilePath("") 110 , m_AsyncEnabled(false) 111 , m_ThreadPoolSize(0) 112 , m_ImportInputsIfAligned(false) 113 {} 114 }; 115 116 } // namespace InferenceModelInternal 117 118 template <typename IParser> 119 struct CreateNetworkImpl 120 { 121 public: 122 using Params = InferenceModelInternal::Params; 123 CreateCreateNetworkImpl124 static armnn::INetworkPtr Create(const Params& params, 125 std::vector<armnn::BindingPointInfo>& inputBindings, 126 std::vector<armnn::BindingPointInfo>& outputBindings) 127 { 128 const std::string& modelPath = params.m_ModelPath; 129 130 // Create a network from a file on disk 131 auto parser(IParser::Create()); 132 133 std::map<std::string, armnn::TensorShape> inputShapes; 134 if (!params.m_InputShapes.empty()) 135 { 136 const size_t numInputShapes = params.m_InputShapes.size(); 137 const size_t numInputBindings = params.m_InputBindings.size(); 138 if (numInputShapes < numInputBindings) 139 { 140 throw armnn::Exception(fmt::format( 141 "Not every input has its tensor shape specified: expected={0}, got={1}", 142 numInputBindings, numInputShapes)); 143 } 144 145 for (size_t i = 0; i < numInputShapes; i++) 146 { 147 inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i]; 148 } 149 } 150 151 std::vector<std::string> requestedOutputs = params.m_OutputBindings; 152 armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}}; 153 154 { 155 ARMNN_SCOPED_HEAP_PROFILING("Parsing"); 156 // Handle text and binary input differently by calling the corresponding parser function 157 network = (params.m_IsModelBinary ? 158 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) : 159 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs)); 160 } 161 162 for (const std::string& inputLayerName : params.m_InputBindings) 163 { 164 inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName)); 165 } 166 167 for (const std::string& outputLayerName : params.m_OutputBindings) 168 { 169 outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName)); 170 } 171 172 return network; 173 } 174 }; 175 176 #if defined(ARMNN_SERIALIZER) 177 template <> 178 struct CreateNetworkImpl<armnnDeserializer::IDeserializer> 179 { 180 public: 181 using IParser = armnnDeserializer::IDeserializer; 182 using Params = InferenceModelInternal::Params; 183 CreateCreateNetworkImpl184 static armnn::INetworkPtr Create(const Params& params, 185 std::vector<armnn::BindingPointInfo>& inputBindings, 186 std::vector<armnn::BindingPointInfo>& outputBindings) 187 { 188 auto parser(IParser::Create()); 189 ARMNN_ASSERT(parser); 190 191 armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}}; 192 193 { 194 ARMNN_SCOPED_HEAP_PROFILING("Parsing"); 195 196 std::error_code errorCode; 197 fs::path pathToFile(params.m_ModelPath); 198 if (!fs::exists(pathToFile, errorCode)) 199 { 200 throw armnn::FileNotFoundException(fmt::format("Cannot find the file ({0}) errorCode: {1} {2}", 201 params.m_ModelPath, 202 errorCode.message(), 203 CHECK_LOCATION().AsString())); 204 } 205 std::ifstream file(params.m_ModelPath, std::ios::binary); 206 207 network = parser->CreateNetworkFromBinary(file); 208 } 209 210 unsigned int subgraphId = armnn::numeric_cast<unsigned int>(params.m_SubgraphId); 211 212 for (const std::string& inputLayerName : params.m_InputBindings) 213 { 214 armnnDeserializer::BindingPointInfo inputBinding = 215 parser->GetNetworkInputBindingInfo(subgraphId, inputLayerName); 216 inputBindings.push_back(std::make_pair(inputBinding.m_BindingId, inputBinding.m_TensorInfo)); 217 } 218 219 for (const std::string& outputLayerName : params.m_OutputBindings) 220 { 221 armnnDeserializer::BindingPointInfo outputBinding = 222 parser->GetNetworkOutputBindingInfo(subgraphId, outputLayerName); 223 outputBindings.push_back(std::make_pair(outputBinding.m_BindingId, outputBinding.m_TensorInfo)); 224 } 225 226 return network; 227 } 228 }; 229 #endif 230 231 #if defined(ARMNN_TF_LITE_PARSER) 232 template <> 233 struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser> 234 { 235 public: 236 using IParser = armnnTfLiteParser::ITfLiteParser; 237 using Params = InferenceModelInternal::Params; 238 CreateCreateNetworkImpl239 static armnn::INetworkPtr Create(const Params& params, 240 std::vector<armnn::BindingPointInfo>& inputBindings, 241 std::vector<armnn::BindingPointInfo>& outputBindings) 242 { 243 const std::string& modelPath = params.m_ModelPath; 244 245 // Create a network from a file on disk 246 IParser::TfLiteParserOptions options; 247 options.m_AllowExpandedDims = params.m_AllowExpandedDims; 248 options.m_StandInLayerForUnsupported = params.m_ParseUnsupported; 249 options.m_InferAndValidate = params.m_InferOutputShape; 250 auto parser(IParser::Create(options)); 251 252 armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}}; 253 254 { 255 ARMNN_SCOPED_HEAP_PROFILING("Parsing"); 256 network = parser->CreateNetworkFromBinaryFile(modelPath.c_str()); 257 } 258 259 for (const std::string& inputLayerName : params.m_InputBindings) 260 { 261 armnn::BindingPointInfo inputBinding = 262 parser->GetNetworkInputBindingInfo(params.m_SubgraphId, inputLayerName); 263 inputBindings.push_back(inputBinding); 264 } 265 266 for (const std::string& outputLayerName : params.m_OutputBindings) 267 { 268 armnn::BindingPointInfo outputBinding = 269 parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, outputLayerName); 270 outputBindings.push_back(outputBinding); 271 } 272 273 return network; 274 } 275 }; 276 #endif 277 278 #if defined(ARMNN_ONNX_PARSER) 279 template <> 280 struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser> 281 { 282 public: 283 using IParser = armnnOnnxParser::IOnnxParser; 284 using Params = InferenceModelInternal::Params; 285 using BindingPointInfo = InferenceModelInternal::BindingPointInfo; 286 CreateCreateNetworkImpl287 static armnn::INetworkPtr Create(const Params& params, 288 std::vector<BindingPointInfo>& inputBindings, 289 std::vector<BindingPointInfo>& outputBindings) 290 { 291 const std::string& modelPath = params.m_ModelPath; 292 293 // Create a network from a file on disk 294 auto parser(IParser::Create()); 295 296 armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}}; 297 298 std::map<std::string, armnn::TensorShape> inputShapes; 299 if (!params.m_InputShapes.empty()) 300 { 301 const size_t numInputShapes = params.m_InputShapes.size(); 302 const size_t numInputBindings = params.m_InputBindings.size(); 303 if (numInputShapes < numInputBindings) 304 { 305 throw armnn::Exception(fmt::format( 306 "Not every input has its tensor shape specified: expected={0}, got={1}", 307 numInputBindings, numInputShapes)); 308 } 309 310 for (size_t i = 0; i < numInputShapes; i++) 311 { 312 inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i]; 313 } 314 315 { 316 ARMNN_SCOPED_HEAP_PROFILING("Parsing"); 317 network = (params.m_IsModelBinary ? 318 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) : 319 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes)); 320 } 321 } 322 323 else 324 { 325 ARMNN_SCOPED_HEAP_PROFILING("Parsing"); 326 network = (params.m_IsModelBinary ? 327 parser->CreateNetworkFromBinaryFile(modelPath.c_str()) : 328 parser->CreateNetworkFromTextFile(modelPath.c_str())); 329 } 330 331 for (const std::string& inputLayerName : params.m_InputBindings) 332 { 333 BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName); 334 inputBindings.push_back(inputBinding); 335 } 336 337 for (const std::string& outputLayerName : params.m_OutputBindings) 338 { 339 BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName); 340 outputBindings.push_back(outputBinding); 341 } 342 343 return network; 344 } 345 }; 346 #endif 347 348 349 350 template <typename IParser, typename TDataType> 351 class InferenceModel 352 { 353 public: 354 using DataType = TDataType; 355 using Params = InferenceModelInternal::Params; 356 using QuantizationParams = InferenceModelInternal::QuantizationParams; 357 358 359 struct CommandLineOptions 360 { 361 std::string m_ModelDir; 362 std::vector<std::string> m_ComputeDevices; 363 std::string m_DynamicBackendsPath; 364 bool m_VisualizePostOptimizationModel; 365 bool m_EnableFp16TurboMode; 366 bool m_EnableBf16TurboMode; 367 std::string m_Labels; 368 GetComputeDevicesAsBackendIdsInferenceModel::CommandLineOptions369 std::vector<armnn::BackendId> GetComputeDevicesAsBackendIds() 370 { 371 std::vector<armnn::BackendId> backendIds; 372 std::copy(m_ComputeDevices.begin(), m_ComputeDevices.end(), std::back_inserter(backendIds)); 373 return backendIds; 374 } 375 }; 376 AddCommandLineOptions(cxxopts::Options & options,CommandLineOptions & cLineOptions,std::vector<std::string> & required)377 static void AddCommandLineOptions(cxxopts::Options& options, 378 CommandLineOptions& cLineOptions, std::vector<std::string>& required) 379 { 380 const std::vector<std::string> defaultComputes = { "CpuAcc", "CpuRef" }; 381 382 const std::string backendsMessage = "Which device to run layers on by default. Possible choices: " 383 + armnn::BackendRegistryInstance().GetBackendIdsAsString(); 384 385 options 386 .allow_unrecognised_options() 387 .add_options() 388 ("m,model-dir", "Path to directory containing model files (.prototxt/.tflite)", 389 cxxopts::value<std::string>(cLineOptions.m_ModelDir)) 390 ("c,compute", backendsMessage.c_str(), 391 cxxopts::value<std::vector<std::string>>(cLineOptions.m_ComputeDevices)->default_value("CpuRef")) 392 ("b,dynamic-backends-path", 393 "Path where to load any available dynamic backend from. " 394 "If left empty (the default), dynamic backends will not be used.", 395 cxxopts::value(cLineOptions.m_DynamicBackendsPath)) 396 ("l,labels", 397 "Text file containing one image filename - correct label pair per line, " 398 "used to test the accuracy of the network.", cxxopts::value<std::string>(cLineOptions.m_Labels)) 399 ("v,visualize-optimized-model", 400 "Produce a dot file useful for visualizing the graph post optimization." 401 "The file will have the same name as the model with the .dot extention.", 402 cxxopts::value<bool>(cLineOptions.m_VisualizePostOptimizationModel)->default_value("false")) 403 ("fp16-turbo-mode", 404 "If this option is enabled FP32 layers, weights and biases will be converted " 405 "to FP16 where the backend supports it.", 406 cxxopts::value<bool>(cLineOptions.m_EnableFp16TurboMode)->default_value("false")) 407 ("bf16-turbo-mode", 408 "If this option is enabled FP32 layers, weights and biases will be converted " 409 "to BF16 where the backend supports it.", 410 cxxopts::value<bool>(cLineOptions.m_EnableBf16TurboMode)->default_value("false")); 411 412 required.emplace_back("model-dir"); 413 } 414 InferenceModel(const Params & params,bool enableProfiling,const std::string & dynamicBackendsPath,const std::shared_ptr<armnn::IRuntime> & runtime=nullptr)415 InferenceModel(const Params& params, 416 bool enableProfiling, 417 const std::string& dynamicBackendsPath, 418 const std::shared_ptr<armnn::IRuntime>& runtime = nullptr) 419 : m_EnableProfiling(enableProfiling), 420 m_ProfilingDetailsMethod(armnn::ProfilingDetailsMethod::Undefined), 421 m_DynamicBackendsPath(dynamicBackendsPath), 422 m_ImportInputsIfAligned(params.m_ImportInputsIfAligned) 423 { 424 if (runtime) 425 { 426 m_Runtime = runtime; 427 } 428 else 429 { 430 armnn::IRuntime::CreationOptions options; 431 options.m_EnableGpuProfiling = m_EnableProfiling; 432 options.m_DynamicBackendsPath = m_DynamicBackendsPath; 433 m_Runtime = armnn::IRuntime::Create(options); 434 } 435 436 // Configure the Profiler if the the profiling details are opted for 437 if (params.m_OutputDetailsOnlyToStdOut) 438 m_ProfilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly; 439 else if (params.m_OutputDetailsToStdOut) 440 m_ProfilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents; 441 442 std::string invalidBackends; 443 if (!CheckRequestedBackendsAreValid(params.m_ComputeDevices, armnn::Optional<std::string&>(invalidBackends))) 444 { 445 throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends); 446 } 447 448 armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}}; 449 { 450 const auto parsing_start_time = armnn::GetTimeNow(); 451 armnn::INetworkPtr network = CreateNetworkImpl<IParser>::Create(params, m_InputBindings, m_OutputBindings); 452 453 ARMNN_LOG(info) << "Network parsing time: " << std::setprecision(2) 454 << std::fixed << armnn::GetTimeDuration(parsing_start_time).count() << " ms."; 455 456 ARMNN_SCOPED_HEAP_PROFILING("Optimizing"); 457 458 armnn::OptimizerOptionsOpaque options; 459 options.SetReduceFp32ToFp16(params.m_EnableFp16TurboMode); 460 options.SetDebugEnabled(params.m_PrintIntermediateLayers); 461 options.SetDebugToFileEnabled(params.m_PrintIntermediateLayersToFile); 462 options.SetShapeInferenceMethod(params.m_InferOutputShape ? 463 armnn::ShapeInferenceMethod::InferAndValidate : armnn::ShapeInferenceMethod::ValidateOnly); 464 options.SetProfilingEnabled(m_EnableProfiling); 465 466 armnn::BackendOptions gpuAcc("GpuAcc", 467 { 468 { "FastMathEnabled", params.m_EnableFastMath }, 469 { "SaveCachedNetwork", params.m_SaveCachedNetwork }, 470 { "CachedNetworkFilePath", params.m_CachedNetworkFilePath }, 471 { "MLGOTuningFilePath", params.m_MLGOTuningFilePath } 472 }); 473 474 armnn::BackendOptions cpuAcc("CpuAcc", 475 { 476 { "FastMathEnabled", params.m_EnableFastMath }, 477 { "NumberOfThreads", params.m_NumberOfThreads } 478 }); 479 options.AddModelOption(gpuAcc); 480 options.AddModelOption(cpuAcc); 481 482 const auto optimization_start_time = armnn::GetTimeNow(); 483 optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options); 484 485 ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2) 486 << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms."; 487 488 if (!optNet) 489 { 490 throw armnn::Exception("Optimize returned nullptr"); 491 } 492 493 494 } 495 496 if (params.m_VisualizePostOptimizationModel) 497 { 498 fs::path filename = params.m_ModelPath; 499 filename.replace_extension("dot"); 500 std::fstream file(filename.c_str(), std::ios_base::out); 501 optNet->SerializeToDot(file); 502 } 503 504 armnn::Status ret; 505 { 506 ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork"); 507 508 const auto loading_start_time = armnn::GetTimeNow(); 509 armnn::INetworkProperties networkProperties(params.m_AsyncEnabled, 510 armnn::MemorySource::Undefined, 511 armnn::MemorySource::Undefined, 512 enableProfiling, 513 m_ProfilingDetailsMethod); 514 std::string errorMessage; 515 ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet), errorMessage, networkProperties); 516 517 ARMNN_LOG(info) << "Network loading time: " << std::setprecision(2) 518 << std::fixed << armnn::GetTimeDuration(loading_start_time).count() << " ms."; 519 #if !defined(ARMNN_DISABLE_THREADS) 520 if (params.m_AsyncEnabled && params.m_ThreadPoolSize > 0) 521 { 522 std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles; 523 for (size_t i = 0; i < params.m_ThreadPoolSize; ++i) 524 { 525 memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier)); 526 } 527 528 m_Threadpool = std::make_unique<armnn::Threadpool>(params.m_ThreadPoolSize, 529 m_Runtime.get(), 530 memHandles); 531 } 532 #endif 533 } 534 535 if (ret == armnn::Status::Failure) 536 { 537 throw armnn::Exception("IRuntime::LoadNetwork failed"); 538 } 539 } 540 CheckInputIndexIsValid(unsigned int inputIndex) const541 void CheckInputIndexIsValid(unsigned int inputIndex) const 542 { 543 if (m_InputBindings.size() < inputIndex + 1) 544 { 545 throw armnn::Exception(fmt::format("Input index out of range: {}", inputIndex)); 546 } 547 } 548 CheckOutputIndexIsValid(unsigned int outputIndex) const549 void CheckOutputIndexIsValid(unsigned int outputIndex) const 550 { 551 if (m_OutputBindings.size() < outputIndex + 1) 552 { 553 throw armnn::Exception(fmt::format("Output index out of range: {}", outputIndex)); 554 } 555 } 556 GetInputSize(unsigned int inputIndex=0u) const557 unsigned int GetInputSize(unsigned int inputIndex = 0u) const 558 { 559 CheckInputIndexIsValid(inputIndex); 560 return m_InputBindings[inputIndex].second.GetNumElements(); 561 } 562 GetOutputSize(unsigned int outputIndex=0u) const563 unsigned int GetOutputSize(unsigned int outputIndex = 0u) const 564 { 565 CheckOutputIndexIsValid(outputIndex); 566 return m_OutputBindings[outputIndex].second.GetNumElements(); 567 } 568 Run(const std::vector<armnnUtils::TContainer> & inputContainers,std::vector<armnnUtils::TContainer> & outputContainers)569 std::chrono::duration<double, std::milli> Run( 570 const std::vector<armnnUtils::TContainer>& inputContainers, 571 std::vector<armnnUtils::TContainer>& outputContainers) 572 { 573 for (unsigned int i = 0; i < outputContainers.size(); ++i) 574 { 575 const unsigned int expectedOutputDataSize = GetOutputSize(i); 576 577 mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value) 578 { 579 const unsigned int actualOutputDataSize = armnn::numeric_cast<unsigned int>(value.size()); 580 if (actualOutputDataSize < expectedOutputDataSize) 581 { 582 unsigned int outputIndex = i; 583 throw armnn::Exception( 584 fmt::format("Not enough data for output #{0}: expected " 585 "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize)); 586 } 587 }, 588 outputContainers[i]); 589 } 590 591 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier); 592 593 // Start timer to record inference time in EnqueueWorkload (in milliseconds) 594 const auto start_time = armnn::GetTimeNow(); 595 596 armnn::Status ret; 597 if (m_ImportInputsIfAligned) 598 { 599 std::vector<armnn::ImportedInputId> importedInputIds = m_Runtime->ImportInputs( 600 m_NetworkIdentifier, MakeInputTensors(inputContainers), armnn::MemorySource::Malloc); 601 602 std::vector<armnn::ImportedOutputId> importedOutputIds = m_Runtime->ImportOutputs( 603 m_NetworkIdentifier, MakeOutputTensors(outputContainers), armnn::MemorySource::Malloc); 604 605 ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier, 606 MakeInputTensors(inputContainers), 607 MakeOutputTensors(outputContainers), 608 importedInputIds, 609 importedOutputIds); 610 } 611 else 612 { 613 ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier, 614 MakeInputTensors(inputContainers), 615 MakeOutputTensors(outputContainers)); 616 } 617 const auto duration = armnn::GetTimeDuration(start_time); 618 619 // if profiling is enabled print out the results 620 if (profiler && profiler->IsProfilingEnabled()) 621 { 622 profiler->Print(std::cout); 623 } 624 625 if (ret == armnn::Status::Failure) 626 { 627 throw armnn::Exception("IRuntime::EnqueueWorkload failed"); 628 } 629 else 630 { 631 return duration; 632 } 633 } 634 RunAsync(armnn::experimental::IWorkingMemHandle & workingMemHandleRef,const std::vector<armnnUtils::TContainer> & inputContainers,std::vector<armnnUtils::TContainer> & outputContainers,unsigned int inferenceID)635 std::tuple<unsigned int, std::chrono::duration<double, std::milli>> RunAsync( 636 armnn::experimental::IWorkingMemHandle& workingMemHandleRef, 637 const std::vector<armnnUtils::TContainer>& inputContainers, 638 std::vector<armnnUtils::TContainer>& outputContainers, 639 unsigned int inferenceID) 640 { 641 for (unsigned int i = 0; i < outputContainers.size(); ++i) 642 { 643 const unsigned int expectedOutputDataSize = GetOutputSize(i); 644 645 mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value) 646 { 647 const unsigned int actualOutputDataSize = armnn::numeric_cast<unsigned int>(value.size()); 648 if (actualOutputDataSize < expectedOutputDataSize) 649 { 650 unsigned int outputIndex = i; 651 throw armnn::Exception( 652 fmt::format("Not enough data for output #{0}: expected " 653 "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize)); 654 } 655 }, 656 outputContainers[i]); 657 } 658 659 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier); 660 661 // Start timer to record inference time in EnqueueWorkload (in milliseconds) 662 const auto start_time = armnn::GetTimeNow(); 663 664 armnn::Status ret = m_Runtime->Execute(workingMemHandleRef, 665 MakeInputTensors(inputContainers), 666 MakeOutputTensors(outputContainers)); 667 668 const auto duration = armnn::GetTimeDuration(start_time); 669 670 // if profiling is enabled print out the results 671 if (profiler && profiler->IsProfilingEnabled()) 672 { 673 profiler->Print(std::cout); 674 } 675 676 if (ret == armnn::Status::Failure) 677 { 678 throw armnn::Exception( 679 fmt::format("IRuntime::Execute asynchronously failed for network #{0} on inference #{1}", 680 m_NetworkIdentifier, inferenceID)); 681 } 682 else 683 { 684 return std::make_tuple(inferenceID, duration); 685 } 686 } 687 RunAsync(const std::vector<armnnUtils::TContainer> & inputContainers,std::vector<armnnUtils::TContainer> & outputContainers,std::shared_ptr<armnn::IAsyncExecutionCallback> cb)688 void RunAsync(const std::vector<armnnUtils::TContainer>& inputContainers, 689 std::vector<armnnUtils::TContainer>& outputContainers, 690 std::shared_ptr<armnn::IAsyncExecutionCallback> cb) 691 { 692 #if !defined(ARMNN_DISABLE_THREADS) 693 for (unsigned int i = 0; i < outputContainers.size(); ++i) 694 { 695 const unsigned int expectedOutputDataSize = GetOutputSize(i); 696 697 mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value) 698 { 699 const unsigned int actualOutputDataSize = armnn::numeric_cast<unsigned int>(value.size()); 700 if (actualOutputDataSize < expectedOutputDataSize) 701 { 702 unsigned int outputIndex = i; 703 throw armnn::Exception( 704 fmt::format("Not enough data for output #{0}: expected " 705 "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize)); 706 } 707 }, 708 outputContainers[i]); 709 } 710 711 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier); 712 713 m_Threadpool->Schedule(m_NetworkIdentifier, 714 MakeInputTensors(inputContainers), 715 MakeOutputTensors(outputContainers), 716 armnn::QosExecPriority::Medium, 717 cb); 718 719 // if profiling is enabled print out the results 720 if (profiler && profiler->IsProfilingEnabled()) 721 { 722 profiler->Print(std::cout); 723 } 724 #endif 725 } 726 GetInputBindingInfo(unsigned int inputIndex=0u) const727 const armnn::BindingPointInfo& GetInputBindingInfo(unsigned int inputIndex = 0u) const 728 { 729 CheckInputIndexIsValid(inputIndex); 730 return m_InputBindings[inputIndex]; 731 } 732 GetInputBindingInfos() const733 const std::vector<armnn::BindingPointInfo>& GetInputBindingInfos() const 734 { 735 return m_InputBindings; 736 } 737 GetOutputBindingInfo(unsigned int outputIndex=0u) const738 const armnn::BindingPointInfo& GetOutputBindingInfo(unsigned int outputIndex = 0u) const 739 { 740 CheckOutputIndexIsValid(outputIndex); 741 return m_OutputBindings[outputIndex]; 742 } 743 GetOutputBindingInfos() const744 const std::vector<armnn::BindingPointInfo>& GetOutputBindingInfos() const 745 { 746 return m_OutputBindings; 747 } 748 GetQuantizationParams(unsigned int outputIndex=0u) const749 QuantizationParams GetQuantizationParams(unsigned int outputIndex = 0u) const 750 { 751 CheckOutputIndexIsValid(outputIndex); 752 return std::make_pair(m_OutputBindings[outputIndex].second.GetQuantizationScale(), 753 m_OutputBindings[outputIndex].second.GetQuantizationOffset()); 754 } 755 GetInputQuantizationParams(unsigned int inputIndex=0u) const756 QuantizationParams GetInputQuantizationParams(unsigned int inputIndex = 0u) const 757 { 758 CheckInputIndexIsValid(inputIndex); 759 return std::make_pair(m_InputBindings[inputIndex].second.GetQuantizationScale(), 760 m_InputBindings[inputIndex].second.GetQuantizationOffset()); 761 } 762 GetAllQuantizationParams() const763 std::vector<QuantizationParams> GetAllQuantizationParams() const 764 { 765 std::vector<QuantizationParams> quantizationParams; 766 for (unsigned int i = 0u; i < m_OutputBindings.size(); i++) 767 { 768 quantizationParams.push_back(GetQuantizationParams(i)); 769 } 770 return quantizationParams; 771 } 772 CreateWorkingMemHandle()773 std::unique_ptr<armnn::experimental::IWorkingMemHandle> CreateWorkingMemHandle() 774 { 775 return m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier); 776 } 777 778 private: 779 armnn::NetworkId m_NetworkIdentifier; 780 std::shared_ptr<armnn::IRuntime> m_Runtime; 781 #if !defined(ARMNN_DISABLE_THREADS) 782 std::unique_ptr<armnn::Threadpool> m_Threadpool; 783 #endif 784 785 std::vector<armnn::BindingPointInfo> m_InputBindings; 786 std::vector<armnn::BindingPointInfo> m_OutputBindings; 787 bool m_EnableProfiling; 788 armnn::ProfilingDetailsMethod m_ProfilingDetailsMethod; 789 std::string m_DynamicBackendsPath; 790 bool m_ImportInputsIfAligned; 791 792 template<typename TContainer> MakeInputTensors(const std::vector<TContainer> & inputDataContainers)793 armnn::InputTensors MakeInputTensors(const std::vector<TContainer>& inputDataContainers) 794 { 795 return armnnUtils::MakeInputTensors(m_InputBindings, inputDataContainers); 796 } 797 798 template<typename TContainer> MakeOutputTensors(std::vector<TContainer> & outputDataContainers)799 armnn::OutputTensors MakeOutputTensors(std::vector<TContainer>& outputDataContainers) 800 { 801 return armnnUtils::MakeOutputTensors(m_OutputBindings, outputDataContainers); 802 } 803 }; 804