1 // 2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved. 3 // SPDX-License-Identifier: MIT 4 // 5 #pragma once 6 7 #include "Network.hpp" 8 #include "LayerFwd.hpp" 9 #include "Profiling.hpp" 10 11 #include <armnn/Tensor.hpp> 12 13 #include <armnn/backends/IBackendInternal.hpp> 14 #include <armnn/backends/IMemoryOptimizerStrategy.hpp> 15 #include <armnn/backends/Workload.hpp> 16 #include <armnn/backends/WorkloadFactory.hpp> 17 18 #include <backendsCommon/DefaultAllocator.hpp> 19 #include <backendsCommon/MemoryManager.hpp> 20 #include <backendsCommon/TensorHandleFactoryRegistry.hpp> 21 #include <backendsCommon/memoryOptimizerStrategyLibrary/strategies/SingleAxisPriorityList.hpp> 22 23 #include <client/include/IProfilingService.hpp> 24 #include <client/include/TimelineUtilityMethods.hpp> 25 26 #include <common/include/LabelsAndEventClasses.hpp> 27 28 #include <mutex> 29 #include <condition_variable> 30 #include <unordered_map> 31 32 namespace cl 33 { 34 class Context; 35 class CommandQueue; 36 class Device; 37 } 38 39 namespace armnn 40 { 41 42 class LoadedNetwork 43 { 44 public: 45 using WorkloadQueue = std::vector<std::unique_ptr<IWorkload>>; 46 ~LoadedNetwork()47 ~LoadedNetwork() 48 { 49 FreeWorkingMemory(); 50 } 51 52 /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have 53 /// overlapped Execution by calling this function from different threads. 54 std::unique_ptr<IWorkingMemHandle> CreateWorkingMemHandle(NetworkId networkId); 55 56 TensorInfo GetInputTensorInfo(LayerBindingId layerId) const; 57 TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const; 58 59 std::vector<ImportedInputId> ImportInputs(const InputTensors& inputTensors, 60 MemorySource forceImportMemorySource = MemorySource::Undefined); 61 std::vector<ImportedOutputId> ImportOutputs(const OutputTensors& outputTensors, 62 MemorySource forceImportMemorySource = MemorySource::Undefined); 63 64 void ClearImportedInputs(const std::vector<ImportedInputId> inputIds); 65 void ClearImportedOutputs(const std::vector<ImportedOutputId> outputIds); 66 67 /// Single thread execution of the loaded network 68 Status EnqueueWorkload(const InputTensors& inputTensors, const OutputTensors& outputTensors, 69 std::vector<ImportedInputId> preImportedInputIds = {}, 70 std::vector<ImportedOutputId> preImportedOutputIds = {}); 71 72 /// Thread safe execution of the loaded network 73 Status Execute(const InputTensors& inputTensors, 74 const OutputTensors& outputTensors, 75 IWorkingMemHandle& workingMemHandle, 76 std::vector<ImportedInputId> preImportedInputs = {}, 77 std::vector<ImportedOutputId> preImportedOutputs = {}); 78 79 static std::unique_ptr<LoadedNetwork> MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net, 80 std::string& errorMessage, 81 const INetworkProperties& networkProperties, 82 arm::pipe::IProfilingService* profilingService); 83 84 // NOTE we return by reference as the purpose of this method is only to provide 85 // access to the private m_Profiler and in theory we should not need to increment 86 // the shared_ptr's reference counter GetProfiler() const87 const std::shared_ptr<IProfiler>& GetProfiler() const { return m_OptimizedNetwork->GetProfiler(); } 88 89 void FreeWorkingMemory(); 90 91 void RegisterDebugCallback(const DebugCallbackFunction& func); 92 93 void SendNetworkStructure(arm::pipe::IProfilingService& profilingService); 94 IsAsyncEnabled()95 bool IsAsyncEnabled() 96 { 97 return m_NetworkProperties.m_AsyncEnabled; 98 } 99 100 arm::pipe::ProfilingGuid GetNetworkGuid(); 101 102 private: 103 104 105 void AllocateWorkingMemory( 106 #if !defined(ARMNN_DISABLE_THREADS) 107 std::lock_guard<std::mutex>& lock 108 #endif 109 ); 110 void AllocateAndExecuteConstantWorkloads(); 111 void AllocateAndExecuteConstantWorkloadsAsync(); 112 113 std::unordered_map<LayerGuid, std::unique_ptr<IWorkload>> m_ConstantWorkloads; 114 std::unordered_map<LayerGuid, ITensorHandle*> m_ConstantTensorHandles; 115 116 std::unique_ptr<IMemoryOptimizerStrategy> m_ConstantStrategy = std::make_unique<SingleAxisPriorityList>(); 117 118 LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net, 119 const INetworkProperties& networkProperties, 120 arm::pipe::IProfilingService* profilingService); 121 122 void EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo); 123 124 void EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo); 125 126 void EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle); 127 128 void ImportOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle); 129 130 bool Execute(std::unique_ptr<arm::pipe::TimelineUtilityMethods>& timelineUtils, 131 arm::pipe::ProfilingGuid inferenceGuid); 132 133 const IWorkloadFactory& GetWorkloadFactory(const Layer& layer) const; 134 135 inline LayerBindingId ValidateImportedInputID(ImportedInputId id); 136 inline LayerBindingId ValidateImportedOutputID(ImportedOutputId id); 137 138 void CreateMemoryProfile(); 139 void CreateMemoryProfileAsync(); 140 141 std::unique_ptr<MemoryManager> CreateExternalMemoryManger( 142 std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>>& tensorMemory); 143 144 using BackendPtrMap = std::unordered_map<BackendId, IBackendInternalUniquePtr>; 145 146 BackendPtrMap m_Backends; 147 std::vector<IBackendInternal::IMemoryManagerSharedPtr> m_BackendMemoryMangers; 148 149 using WorkloadFactoryMap = std::unordered_map<BackendId, IBackendInternal::IWorkloadFactoryPtr>; 150 WorkloadFactoryMap m_WorkloadFactories; 151 152 std::unique_ptr<IOptimizedNetwork> m_OptimizedNetwork; 153 154 WorkloadQueue m_InputQueue; 155 WorkloadQueue m_WorkloadQueue; 156 WorkloadQueue m_OutputQueue; 157 158 #if !defined(ARMNN_DISABLE_THREADS) 159 mutable std::mutex m_WorkingMemMutex; 160 #endif 161 162 bool m_IsWorkingMemAllocated = false; 163 164 INetworkProperties m_NetworkProperties; 165 166 TensorHandleFactoryRegistry m_TensorHandleFactoryRegistry; 167 168 // NOTE: raw pointer because the profiling service is controlled by the Runtime 169 arm::pipe::IProfilingService* m_ProfilingService; 170 171 struct ImportedTensorHandlePin 172 { ImportedTensorHandlePinarmnn::LoadedNetwork::ImportedTensorHandlePin173 ImportedTensorHandlePin() 174 {} 175 ImportedTensorHandlePinarmnn::LoadedNetwork::ImportedTensorHandlePin176 ImportedTensorHandlePin(LayerBindingId layerBindingId, 177 std::unique_ptr<ITensorHandle> tensorHandle) 178 : m_LayerBindingId(layerBindingId) 179 , m_TensorHandle(std::move(tensorHandle)) 180 {} 181 182 ImportedTensorHandlePin(ImportedTensorHandlePin&&) = default; 183 ~ImportedTensorHandlePinarmnn::LoadedNetwork::ImportedTensorHandlePin184 ~ImportedTensorHandlePin() 185 { 186 if (m_TensorHandle) 187 { 188 m_TensorHandle->Unimport(); 189 } 190 } 191 192 LayerBindingId m_LayerBindingId; 193 std::unique_ptr<ITensorHandle> m_TensorHandle; 194 }; 195 196 std::vector<ImportedTensorHandlePin> m_PreImportedInputHandles; 197 std::vector<ImportedTensorHandlePin> m_PreImportedOutputHandles; 198 199 ImportedInputId m_CurImportedInputId = 0; 200 ImportedInputId m_CurImportedOutputId = 0; 201 202 std::unordered_map<BackendId, std::vector<MemBlock>> m_MemBlockMap; 203 std::unordered_map<BackendId, std::vector<MemBin>> m_MemBinMap; 204 205 std::vector<ITensorHandle*> m_Tensorhandles; 206 207 std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>> m_TensorMemory; 208 209 std::unique_ptr<MemoryManager> m_ExternalMemoryManager; 210 211 std::unordered_map<BackendId, bool> m_SupportsExternallyManagedMemory; 212 213 // A set of vectors to record the workload queue indexes and their corresponding Input/Output Slot indexes 214 // which are connected to Inputs and Outputs for the network. 215 struct WorkloadIndices 216 { 217 unsigned int m_WorkloadIndex; 218 unsigned int m_SlotIndex; 219 }; 220 221 struct OutputWorkloadIndices 222 { 223 WorkloadIndices m_OutputSlotIndices; 224 std::vector<WorkloadIndices> m_InputSlotIndices; 225 }; 226 std::unordered_map<LayerBindingId, std::vector<WorkloadIndices>> m_InputWorkloadSlotPairs; 227 std::unordered_map<LayerBindingId, OutputWorkloadIndices> m_OutputWorkloadSlotPairs; 228 std::vector<bool> m_IsInputImported; 229 std::vector<bool> m_IsOutputImported; 230 231 }; 232 233 } 234