1 // 2 // Copyright © 2017 Arm Ltd. All rights reserved. 3 // SPDX-License-Identifier: MIT 4 // 5 #pragma once 6 7 #include "BackendOptions.hpp" 8 #include "INetwork.hpp" 9 #include "IProfiler.hpp" 10 #include "IWorkingMemHandle.hpp" 11 #include "IAsyncExecutionCallback.hpp" 12 #include "Tensor.hpp" 13 #include "Types.hpp" 14 #include "TypesUtils.hpp" 15 16 #include <armnn/backends/ICustomAllocator.hpp> 17 #include <armnn/backends/IMemoryOptimizerStrategy.hpp> 18 19 #include <memory> 20 #include <map> 21 22 namespace arm 23 { 24 25 namespace pipe 26 { 27 class ILocalPacketHandler; 28 using ILocalPacketHandlerSharedPtr = std::shared_ptr<ILocalPacketHandler>; 29 } // pipe 30 } // arm 31 32 namespace armnn 33 { 34 35 using NetworkId = int; 36 37 class IGpuAccTunedParameters; 38 39 struct RuntimeImpl; 40 class IRuntime; 41 using IRuntimePtr = std::unique_ptr<IRuntime, void(*)(IRuntime* runtime)>; 42 43 struct INetworkProperties 44 { INetworkPropertiesarmnn::INetworkProperties45 INetworkProperties(bool asyncEnabled, 46 MemorySource inputSource, 47 MemorySource outputSource, 48 bool profilingEnabled = false, 49 ProfilingDetailsMethod detailsMethod = ProfilingDetailsMethod::Undefined, 50 bool externalMemoryManagementEnabled = false) 51 : m_ImportEnabled(inputSource != MemorySource::Undefined), 52 m_ExportEnabled(outputSource != MemorySource::Undefined), 53 m_AsyncEnabled(asyncEnabled), 54 m_ProfilingEnabled(profilingEnabled), 55 m_OutputNetworkDetailsMethod(detailsMethod), 56 m_InputSource(inputSource), 57 m_OutputSource(outputSource), 58 m_ExternalMemoryManagementEnabled(externalMemoryManagementEnabled) 59 {} 60 61 /// Deprecated and will be removed in future release. 62 const bool m_ImportEnabled; 63 /// Deprecated and will be removed in future release. 64 const bool m_ExportEnabled; 65 66 const bool m_AsyncEnabled; 67 68 const bool m_ProfilingEnabled; 69 70 const ProfilingDetailsMethod m_OutputNetworkDetailsMethod; 71 72 const MemorySource m_InputSource; 73 const MemorySource m_OutputSource; 74 75 const bool m_ExternalMemoryManagementEnabled; 76 ~INetworkPropertiesarmnn::INetworkProperties77 virtual ~INetworkProperties() {} 78 }; 79 80 using namespace armnn::experimental; 81 82 class IRuntime 83 { 84 public: 85 struct CreationOptions 86 { CreationOptionsarmnn::IRuntime::CreationOptions87 CreationOptions() 88 : m_GpuAccTunedParameters(nullptr) 89 , m_EnableGpuProfiling(false) 90 , m_DynamicBackendsPath("") 91 , m_ProtectedMode(false) 92 , m_CustomAllocatorMap() 93 , m_MemoryOptimizerStrategyMap() 94 {} 95 96 /// If set, uses the GpuAcc tuned parameters from the given object when executing GPU workloads. 97 /// It will also be updated with new tuned parameters if it is configured to do so. 98 std::shared_ptr<IGpuAccTunedParameters> m_GpuAccTunedParameters; 99 100 /// Setting this flag will allow the user to obtain GPU profiling information from the runtime. 101 bool m_EnableGpuProfiling; 102 103 /// Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive 104 /// Only a single path is allowed for the override 105 /// It defines the path to search for any [dynamic backend libraries](src/dynamic/README.md). 106 std::string m_DynamicBackendsPath; 107 108 /// Setting this flag will allow the user to create the Runtime in protected mode. 109 /// It will run all the inferences on protected memory and will make sure that 110 /// INetworkProperties::m_ImportEnabled set to true with MemorySource::DmaBufProtected option 111 /// This requires that the backend supports Protected Memory and has an allocator capable of 112 /// allocating Protected Memory associated with it. 113 bool m_ProtectedMode; 114 115 /// @brief A map to define a custom memory allocator for specific backend Ids. 116 /// 117 /// @details A Custom Allocator is used for allocation of working memory in the backends. 118 /// Set this if you need to take control of how memory is allocated on a backend. Required for 119 /// Protected Mode in order to correctly allocate Protected Memory 120 /// 121 /// @note Only supported for GpuAcc 122 std::map<BackendId, std::shared_ptr<ICustomAllocator>> m_CustomAllocatorMap; 123 124 /// @brief A map to define a custom memory optimizer strategy for specific backend Ids. 125 /// 126 /// @details A Memory Optimizer Strategy provides a solution to an abstract representation of 127 /// a network's memory requirements. This can also be used to return a pre-computed solution 128 /// for a specific network. Set this if you want to implement a Custom Memory Optimizer Strategy 129 /// for a given backend. 130 std::map<BackendId, std::shared_ptr<IMemoryOptimizerStrategy>> m_MemoryOptimizerStrategyMap; 131 132 struct ExternalProfilingOptions 133 { ExternalProfilingOptionsarmnn::IRuntime::CreationOptions::ExternalProfilingOptions134 ExternalProfilingOptions() 135 : m_EnableProfiling(false) 136 , m_TimelineEnabled(false) 137 , m_OutgoingCaptureFile("") 138 , m_IncomingCaptureFile("") 139 , m_FileOnly(false) 140 , m_CapturePeriod(LOWEST_CAPTURE_PERIOD) 141 , m_FileFormat("binary") 142 , m_LocalPacketHandlers() 143 {} 144 145 /// Indicates whether external profiling is enabled or not. 146 bool m_EnableProfiling; 147 /// Indicates whether external timeline profiling is enabled or not. 148 bool m_TimelineEnabled; 149 /// Path to a file in which outgoing timeline profiling messages will be stored. 150 std::string m_OutgoingCaptureFile; 151 /// Path to a file in which incoming timeline profiling messages will be stored. 152 std::string m_IncomingCaptureFile; 153 /// Enable profiling output to file only. 154 bool m_FileOnly; 155 /// The duration at which captured profiling messages will be flushed. 156 uint32_t m_CapturePeriod; 157 /// The format of the file used for outputting profiling data. 158 std::string m_FileFormat; 159 std::vector<arm::pipe::ILocalPacketHandlerSharedPtr> m_LocalPacketHandlers; 160 }; 161 ExternalProfilingOptions m_ProfilingOptions; 162 163 /// Pass backend specific options. 164 /// 165 /// For example, to enable GpuAcc tuning add the following 166 /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp 167 /// m_BackendOption.emplace_back( 168 /// BackendOptions{"GpuAcc", 169 /// { 170 /// {"TuningLevel", 2}, 171 /// {"TuningFile", filename} 172 /// {"MemoryOptimizerStrategy", strategyname} 173 /// } 174 /// }); 175 /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 176 /// Execute representative workloads through the runtime to generate tuning data. 177 /// The tuning file is written once the runtime is destroyed 178 179 /// To execute with the tuning data, start up with just the tuning file specified. 180 /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp 181 /// m_BackendOption.emplace_back( 182 /// BackendOptions{"GpuAcc", 183 /// { 184 /// {"TuningFile", filename} 185 /// } 186 /// }); 187 /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 188 189 /// The following backend options are available: 190 /// AllBackends: 191 /// "MemoryOptimizerStrategy" : string [stategynameString] 192 /// (Existing Memory Optimizer Strategies: ConstantMemoryStrategy) 193 /// GpuAcc: 194 /// "TuningLevel" : int [0..3] (0=UseOnly(default) | 1=RapidTuning | 2=NormalTuning | 3=ExhaustiveTuning) 195 /// "TuningFile" : string [filenameString] 196 /// "KernelProfilingEnabled" : bool [true | false] 197 std::vector<BackendOptions> m_BackendOptions; 198 }; 199 200 static IRuntime* CreateRaw(const CreationOptions& options); 201 static IRuntimePtr Create(const CreationOptions& options); 202 static void Destroy(IRuntime* runtime); 203 204 /// Loads a complete network into the IRuntime. 205 /// @param [out] networkIdOut - Unique identifier for the network is returned in this reference. 206 /// @param [in] network - Complete network to load into the IRuntime. 207 /// The runtime takes ownership of the network once passed in. 208 /// @return armnn::Status 209 Status LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr network); 210 211 /// Load a complete network into the IRuntime. 212 /// @param [out] networkIdOut Unique identifier for the network is returned in this reference. 213 /// @param [in] network Complete network to load into the IRuntime. 214 /// @param [out] errorMessage Error message if there were any errors. 215 /// The runtime takes ownership of the network once passed in. 216 /// @return armnn::Status 217 Status LoadNetwork(NetworkId& networkIdOut, 218 IOptimizedNetworkPtr network, 219 std::string& errorMessage); 220 221 Status LoadNetwork(NetworkId& networkIdOut, 222 IOptimizedNetworkPtr network, 223 std::string& errorMessage, 224 const INetworkProperties& networkProperties); 225 226 TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const; 227 TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const; 228 229 /// ImportInputs separates the importing and mapping of InputTensors from network execution. 230 /// Allowing for a set of InputTensors to be imported and mapped once, but used in execution many times. 231 /// This function is not thread safe and must not be used while other threads are calling Execute(). 232 /// No exceptions are thrown for failed imports. It is the caller's responsibility to check whether 233 /// tensors have been successfully imported by comparing returned ids with those passed in the InputTensors. 234 /// Whether a tensor can be imported or not is backend specific. 235 std::vector<ImportedInputId> ImportInputs(NetworkId networkId, const InputTensors& inputTensors, 236 MemorySource forceImportMemorySource = MemorySource::Undefined); 237 238 /// ImportOutputs separates the importing and mapping of OutputTensors from network execution. 239 /// Allowing for a set of OutputTensors to be imported and mapped once, but used in execution many times. 240 /// This function is not thread safe and must not be used while other threads are calling Execute(). 241 /// No exceptions are thrown for failed imports. It is the caller's responsibility to check whether 242 /// tensors have been successfully imported by comparing returned ids with those passed in the OutputTensors. 243 /// Whether a tensor can be imported or not is backend specific. 244 std::vector<ImportedOutputId> ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors, 245 MemorySource forceImportMemorySource = MemorySource::Undefined); 246 247 /// Un-import and delete the imported InputTensor/s 248 /// This function is not thread safe and must not be used while other threads are calling Execute(). 249 /// Only compatible with AsyncEnabled networks 250 void ClearImportedInputs(NetworkId networkId, const std::vector<ImportedInputId> inputIds); 251 252 /// Un-import and delete the imported OutputTensor/s 253 /// This function is not thread safe and must not be used while other threads are calling Execute(). 254 /// Only compatible with AsyncEnabled networks 255 void ClearImportedOutputs(NetworkId networkId, const std::vector<ImportedOutputId> outputIds); 256 257 /// Evaluates a network using input in inputTensors and outputs filled into outputTensors 258 Status EnqueueWorkload(NetworkId networkId, 259 const InputTensors& inputTensors, 260 const OutputTensors& outputTensors, 261 std::vector<ImportedInputId> preImportedInputIds = {}, 262 std::vector<ImportedOutputId> preImportedOutputIds = {}); 263 264 /// This is an experimental function. 265 /// Evaluates a network using input in inputTensors and outputs filled into outputTensors. 266 /// This function performs a thread safe execution of the network. Returns once execution is complete. 267 /// Will block until this and any other thread using the same workingMem object completes. 268 Status Execute(IWorkingMemHandle& workingMemHandle, 269 const InputTensors& inputTensors, 270 const OutputTensors& outputTensors, 271 std::vector<ImportedInputId> preImportedInputs = {}, 272 std::vector<ImportedOutputId> preImportedOutputs = {}); 273 274 /// Unloads a network from the IRuntime. 275 /// At the moment this only removes the network from the m_Impl->m_Network. 276 /// This might need more work in the future to be AndroidNN compliant. 277 /// @param [in] networkId - Unique identifier for the network to be unloaded. Generated in LoadNetwork(). 278 /// @return armnn::Status 279 Status UnloadNetwork(NetworkId networkId); 280 281 const IDeviceSpec& GetDeviceSpec() const; 282 283 /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have 284 /// overlapped Execution by calling this function from different threads. 285 std::unique_ptr<IWorkingMemHandle> CreateWorkingMemHandle(NetworkId networkId); 286 287 /// Gets the profiler corresponding to the given network id. 288 /// @param networkId The id of the network for which to get the profile. 289 /// @return A pointer to the requested profiler, or nullptr if not found. 290 const std::shared_ptr<IProfiler> GetProfiler(NetworkId networkId) const; 291 292 /// Registers a callback function to debug layers performing custom computations on intermediate tensors. 293 /// @param networkId The id of the network to register the callback. 294 /// @param func callback function to pass to the debug layer. 295 void RegisterDebugCallback(NetworkId networkId, const DebugCallbackFunction& func); 296 297 protected: 298 IRuntime(); 299 IRuntime(const IRuntime::CreationOptions& options); 300 ~IRuntime(); 301 302 std::unique_ptr<RuntimeImpl> pRuntimeImpl; 303 }; 304 305 306 /// The following API is replaced by the backend options API. 307 using IGpuAccTunedParametersPtr = std::shared_ptr<IGpuAccTunedParameters>; 308 309 /// Manages a set of GpuAcc parameters which have been tuned for maximum performance. 310 /// Passes an instance of this object to the IRuntime::Create() method (via IRuntime::CreationOptions) to use it 311 /// for all GPU workload execution. 312 /// 313 /// Can be created in two modes: 314 /// - In UseTunedParameters mode, the parameters stored in this object are used to execute GPU workloads. 315 /// - In UpdateTunedParameters mode, additionally, whenever a GPU workload is executed for the first time, the 316 /// optimum parameters will be found and stored in this object. WARNING - This tuning can be slow. 317 /// 318 /// The parameters can be loaded from and saved to a file so that you can first run a slow initial read-write 319 /// execution, save the parameters for later and then run fast read-only executions using the optimised parameters. 320 class IGpuAccTunedParameters 321 { 322 public: 323 enum class Mode 324 { 325 UseTunedParameters, 326 UpdateTunedParameters 327 }; 328 329 enum class TuningLevel 330 { 331 Rapid = 1, 332 Normal = 2, 333 Exhaustive = 3 334 }; 335 336 /// Creates an IClTunedParameters with the given mode. 337 /// @{ 338 static IGpuAccTunedParameters* CreateRaw(Mode mode, TuningLevel tunerMode); 339 static IGpuAccTunedParametersPtr Create(Mode mode, TuningLevel tunerMode); 340 /// @} 341 static void Destroy(IGpuAccTunedParameters* params); 342 343 /// Loads an existing set of tuned parameters from the given file. 344 /// If there is an error loading the file, an armnn::Exception is thrown. 345 virtual void Load(const char* filename) = 0; 346 347 /// Saves the current set of tuned parameters to the given file. 348 /// If there is an error saving to the file, an armnn::Exception is thrown. 349 virtual void Save(const char* filename) const = 0; 350 351 protected: ~IGpuAccTunedParameters()352 virtual ~IGpuAccTunedParameters() {}; 353 }; 354 355 } // namespace armnn 356