xref: /aosp_15_r20/external/armnn/include/armnn/IRuntime.hpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include "BackendOptions.hpp"
8 #include "INetwork.hpp"
9 #include "IProfiler.hpp"
10 #include "IWorkingMemHandle.hpp"
11 #include "IAsyncExecutionCallback.hpp"
12 #include "Tensor.hpp"
13 #include "Types.hpp"
14 #include "TypesUtils.hpp"
15 
16 #include <armnn/backends/ICustomAllocator.hpp>
17 #include <armnn/backends/IMemoryOptimizerStrategy.hpp>
18 
19 #include <memory>
20 #include <map>
21 
22 namespace arm
23 {
24 
25 namespace pipe
26 {
27 class ILocalPacketHandler;
28 using ILocalPacketHandlerSharedPtr = std::shared_ptr<ILocalPacketHandler>;
29 } // pipe
30 } // arm
31 
32 namespace armnn
33 {
34 
35 using NetworkId = int;
36 
37 class IGpuAccTunedParameters;
38 
39 struct RuntimeImpl;
40 class IRuntime;
41 using IRuntimePtr = std::unique_ptr<IRuntime, void(*)(IRuntime* runtime)>;
42 
43 struct INetworkProperties
44 {
INetworkPropertiesarmnn::INetworkProperties45     INetworkProperties(bool asyncEnabled,
46                        MemorySource inputSource,
47                        MemorySource outputSource,
48                        bool profilingEnabled = false,
49                        ProfilingDetailsMethod detailsMethod = ProfilingDetailsMethod::Undefined,
50                        bool externalMemoryManagementEnabled = false)
51         : m_ImportEnabled(inputSource != MemorySource::Undefined),
52           m_ExportEnabled(outputSource != MemorySource::Undefined),
53           m_AsyncEnabled(asyncEnabled),
54           m_ProfilingEnabled(profilingEnabled),
55           m_OutputNetworkDetailsMethod(detailsMethod),
56           m_InputSource(inputSource),
57           m_OutputSource(outputSource),
58           m_ExternalMemoryManagementEnabled(externalMemoryManagementEnabled)
59     {}
60 
61     /// Deprecated and will be removed in future release.
62     const bool m_ImportEnabled;
63     /// Deprecated and will be removed in future release.
64     const bool m_ExportEnabled;
65 
66     const bool m_AsyncEnabled;
67 
68     const bool m_ProfilingEnabled;
69 
70     const ProfilingDetailsMethod m_OutputNetworkDetailsMethod;
71 
72     const MemorySource m_InputSource;
73     const MemorySource m_OutputSource;
74 
75     const bool m_ExternalMemoryManagementEnabled;
76 
~INetworkPropertiesarmnn::INetworkProperties77     virtual ~INetworkProperties() {}
78 };
79 
80 using namespace armnn::experimental;
81 
82 class IRuntime
83 {
84 public:
85     struct CreationOptions
86     {
CreationOptionsarmnn::IRuntime::CreationOptions87         CreationOptions()
88             : m_GpuAccTunedParameters(nullptr)
89             , m_EnableGpuProfiling(false)
90             , m_DynamicBackendsPath("")
91             , m_ProtectedMode(false)
92             , m_CustomAllocatorMap()
93             , m_MemoryOptimizerStrategyMap()
94         {}
95 
96         /// If set, uses the GpuAcc tuned parameters from the given object when executing GPU workloads.
97         /// It will also be updated with new tuned parameters if it is configured to do so.
98         std::shared_ptr<IGpuAccTunedParameters> m_GpuAccTunedParameters;
99 
100         /// Setting this flag will allow the user to obtain GPU profiling information from the runtime.
101         bool m_EnableGpuProfiling;
102 
103         /// Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive
104         /// Only a single path is allowed for the override
105         /// It defines the path to search for any [dynamic backend libraries](src/dynamic/README.md).
106         std::string m_DynamicBackendsPath;
107 
108         /// Setting this flag will allow the user to create the Runtime in protected mode.
109         /// It will run all the inferences on protected memory and will make sure that
110         /// INetworkProperties::m_ImportEnabled set to true with MemorySource::DmaBufProtected option
111         /// This requires that the backend supports Protected Memory and has an allocator capable of
112         /// allocating Protected Memory associated with it.
113         bool m_ProtectedMode;
114 
115         /// @brief A map to define a custom memory allocator for specific backend Ids.
116         ///
117         /// @details  A Custom Allocator is used for allocation of working memory in the backends.
118         /// Set this if you need to take control of how memory is allocated on a backend. Required for
119         /// Protected Mode in order to correctly allocate Protected Memory
120         ///
121         /// @note Only supported for GpuAcc
122         std::map<BackendId, std::shared_ptr<ICustomAllocator>> m_CustomAllocatorMap;
123 
124         /// @brief A map to define a custom memory optimizer strategy for specific backend Ids.
125         ///
126         /// @details  A Memory Optimizer Strategy provides a solution to an abstract representation of
127         /// a network's memory requirements. This can also be used to return a pre-computed solution
128         /// for a specific network. Set this if you want to implement a Custom Memory Optimizer Strategy
129         /// for a given backend.
130         std::map<BackendId, std::shared_ptr<IMemoryOptimizerStrategy>> m_MemoryOptimizerStrategyMap;
131 
132         struct ExternalProfilingOptions
133         {
ExternalProfilingOptionsarmnn::IRuntime::CreationOptions::ExternalProfilingOptions134             ExternalProfilingOptions()
135                 : m_EnableProfiling(false)
136                 , m_TimelineEnabled(false)
137                 , m_OutgoingCaptureFile("")
138                 , m_IncomingCaptureFile("")
139                 , m_FileOnly(false)
140                 , m_CapturePeriod(LOWEST_CAPTURE_PERIOD)
141                 , m_FileFormat("binary")
142                 , m_LocalPacketHandlers()
143             {}
144 
145             /// Indicates whether external profiling is enabled or not.
146             bool        m_EnableProfiling;
147             /// Indicates whether external timeline profiling is enabled or not.
148             bool        m_TimelineEnabled;
149             /// Path to a file in which outgoing timeline profiling messages will be stored.
150             std::string m_OutgoingCaptureFile;
151             /// Path to a file in which incoming timeline profiling messages will be stored.
152             std::string m_IncomingCaptureFile;
153             /// Enable profiling output to file only.
154             bool        m_FileOnly;
155             /// The duration at which captured profiling messages will be flushed.
156             uint32_t    m_CapturePeriod;
157             /// The format of the file used for outputting profiling data.
158             std::string m_FileFormat;
159             std::vector<arm::pipe::ILocalPacketHandlerSharedPtr> m_LocalPacketHandlers;
160         };
161         ExternalProfilingOptions m_ProfilingOptions;
162 
163         /// Pass backend specific options.
164         ///
165         /// For example, to enable GpuAcc tuning add the following
166         /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp
167         /// m_BackendOption.emplace_back(
168         ///     BackendOptions{"GpuAcc",
169         ///       {
170         ///         {"TuningLevel", 2},
171         ///         {"TuningFile", filename}
172         ///         {"MemoryOptimizerStrategy", strategyname}
173         ///       }
174         ///     });
175         /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
176         /// Execute representative workloads through the runtime to generate tuning data.
177         /// The tuning file is written once the runtime is destroyed
178 
179         /// To execute with the tuning data, start up with just the tuning file specified.
180         /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp
181         /// m_BackendOption.emplace_back(
182         ///     BackendOptions{"GpuAcc",
183         ///       {
184         ///         {"TuningFile", filename}
185         ///       }
186         ///     });
187         /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
188 
189         /// The following backend options are available:
190         /// AllBackends:
191         ///   "MemoryOptimizerStrategy" : string [stategynameString]
192         ///    (Existing Memory Optimizer Strategies: ConstantMemoryStrategy)
193         /// GpuAcc:
194         ///   "TuningLevel" : int [0..3] (0=UseOnly(default) | 1=RapidTuning | 2=NormalTuning | 3=ExhaustiveTuning)
195         ///   "TuningFile" : string [filenameString]
196         ///   "KernelProfilingEnabled" : bool [true | false]
197         std::vector<BackendOptions> m_BackendOptions;
198     };
199 
200     static IRuntime* CreateRaw(const CreationOptions& options);
201     static IRuntimePtr Create(const CreationOptions& options);
202     static void Destroy(IRuntime* runtime);
203 
204     /// Loads a complete network into the IRuntime.
205     /// @param [out] networkIdOut - Unique identifier for the network is returned in this reference.
206     /// @param [in] network - Complete network to load into the IRuntime.
207     /// The runtime takes ownership of the network once passed in.
208     /// @return armnn::Status
209     Status LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr network);
210 
211     /// Load a complete network into the IRuntime.
212     /// @param [out] networkIdOut Unique identifier for the network is returned in this reference.
213     /// @param [in] network Complete network to load into the IRuntime.
214     /// @param [out] errorMessage Error message if there were any errors.
215     /// The runtime takes ownership of the network once passed in.
216     /// @return armnn::Status
217     Status LoadNetwork(NetworkId& networkIdOut,
218                        IOptimizedNetworkPtr network,
219                        std::string& errorMessage);
220 
221     Status LoadNetwork(NetworkId& networkIdOut,
222                        IOptimizedNetworkPtr network,
223                        std::string& errorMessage,
224                        const INetworkProperties& networkProperties);
225 
226     TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;
227     TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;
228 
229     /// ImportInputs separates the importing and mapping of InputTensors from network execution.
230     /// Allowing for a set of InputTensors to be imported and mapped once, but used in execution many times.
231     /// This function is not thread safe and must not be used while other threads are calling Execute().
232     /// No exceptions are thrown for failed imports. It is the caller's responsibility to check whether
233     /// tensors have been successfully imported by comparing returned ids with those passed in the InputTensors.
234     /// Whether a tensor can be imported or not is backend specific.
235     std::vector<ImportedInputId> ImportInputs(NetworkId networkId, const InputTensors& inputTensors,
236                                               MemorySource forceImportMemorySource = MemorySource::Undefined);
237 
238     /// ImportOutputs separates the importing and mapping of OutputTensors from network execution.
239     /// Allowing for a set of OutputTensors to be imported and mapped once, but used in execution many times.
240     /// This function is not thread safe and must not be used while other threads are calling Execute().
241     /// No exceptions are thrown for failed imports. It is the caller's responsibility to check whether
242     /// tensors have been successfully imported by comparing returned ids with those passed in the OutputTensors.
243     /// Whether a tensor can be imported or not is backend specific.
244     std::vector<ImportedOutputId> ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors,
245                                                 MemorySource forceImportMemorySource = MemorySource::Undefined);
246 
247     /// Un-import and delete the imported InputTensor/s
248     /// This function is not thread safe and must not be used while other threads are calling Execute().
249     /// Only compatible with AsyncEnabled networks
250     void ClearImportedInputs(NetworkId networkId, const std::vector<ImportedInputId> inputIds);
251 
252     /// Un-import and delete the imported OutputTensor/s
253     /// This function is not thread safe and must not be used while other threads are calling Execute().
254     /// Only compatible with AsyncEnabled networks
255     void ClearImportedOutputs(NetworkId networkId, const std::vector<ImportedOutputId> outputIds);
256 
257     /// Evaluates a network using input in inputTensors and outputs filled into outputTensors
258     Status EnqueueWorkload(NetworkId networkId,
259                            const InputTensors& inputTensors,
260                            const OutputTensors& outputTensors,
261                            std::vector<ImportedInputId> preImportedInputIds = {},
262                            std::vector<ImportedOutputId> preImportedOutputIds = {});
263 
264     /// This is an experimental function.
265     /// Evaluates a network using input in inputTensors and outputs filled into outputTensors.
266     /// This function performs a thread safe execution of the network. Returns once execution is complete.
267     /// Will block until this and any other thread using the same workingMem object completes.
268     Status Execute(IWorkingMemHandle& workingMemHandle,
269                    const InputTensors& inputTensors,
270                    const OutputTensors& outputTensors,
271                    std::vector<ImportedInputId> preImportedInputs = {},
272                    std::vector<ImportedOutputId> preImportedOutputs = {});
273 
274     /// Unloads a network from the IRuntime.
275     /// At the moment this only removes the network from the m_Impl->m_Network.
276     /// This might need more work in the future to be AndroidNN compliant.
277     /// @param [in] networkId - Unique identifier for the network to be unloaded. Generated in LoadNetwork().
278     /// @return armnn::Status
279     Status UnloadNetwork(NetworkId networkId);
280 
281     const IDeviceSpec& GetDeviceSpec() const;
282 
283     /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
284     /// overlapped Execution by calling this function from different threads.
285     std::unique_ptr<IWorkingMemHandle> CreateWorkingMemHandle(NetworkId networkId);
286 
287     /// Gets the profiler corresponding to the given network id.
288     /// @param networkId The id of the network for which to get the profile.
289     /// @return A pointer to the requested profiler, or nullptr if not found.
290     const std::shared_ptr<IProfiler> GetProfiler(NetworkId networkId) const;
291 
292     /// Registers a callback function to debug layers performing custom computations on intermediate tensors.
293     /// @param networkId The id of the network to register the callback.
294     /// @param func callback function to pass to the debug layer.
295     void RegisterDebugCallback(NetworkId networkId, const DebugCallbackFunction& func);
296 
297 protected:
298     IRuntime();
299     IRuntime(const IRuntime::CreationOptions& options);
300     ~IRuntime();
301 
302     std::unique_ptr<RuntimeImpl> pRuntimeImpl;
303 };
304 
305 
306 /// The following API is replaced by the backend options API.
307 using IGpuAccTunedParametersPtr = std::shared_ptr<IGpuAccTunedParameters>;
308 
309 /// Manages a set of GpuAcc parameters which have been tuned for maximum performance.
310 /// Passes an instance of this object to the IRuntime::Create() method (via IRuntime::CreationOptions) to use it
311 /// for all GPU workload execution.
312 ///
313 /// Can be created in two modes:
314 ///     - In UseTunedParameters mode, the parameters stored in this object are used to execute GPU workloads.
315 ///     - In UpdateTunedParameters mode, additionally, whenever a GPU workload is executed for the first time, the
316 ///       optimum parameters will be found and stored in this object. WARNING - This tuning can be slow.
317 ///
318 /// The parameters can be loaded from and saved to a file so that you can first run a slow initial read-write
319 /// execution, save the parameters for later and then run fast read-only executions using the optimised parameters.
320 class IGpuAccTunedParameters
321 {
322 public:
323     enum class Mode
324     {
325         UseTunedParameters,
326         UpdateTunedParameters
327     };
328 
329     enum class TuningLevel
330     {
331         Rapid = 1,
332         Normal = 2,
333         Exhaustive = 3
334     };
335 
336     /// Creates an IClTunedParameters with the given mode.
337     /// @{
338     static IGpuAccTunedParameters* CreateRaw(Mode mode, TuningLevel tunerMode);
339     static IGpuAccTunedParametersPtr Create(Mode mode, TuningLevel tunerMode);
340     /// @}
341     static void Destroy(IGpuAccTunedParameters* params);
342 
343     /// Loads an existing set of tuned parameters from the given file.
344     /// If there is an error loading the file, an armnn::Exception is thrown.
345     virtual void Load(const char* filename) = 0;
346 
347     /// Saves the current set of tuned parameters to the given file.
348     /// If there is an error saving to the file, an armnn::Exception is thrown.
349     virtual void Save(const char* filename) const = 0;
350 
351 protected:
~IGpuAccTunedParameters()352     virtual ~IGpuAccTunedParameters() {};
353 };
354 
355 } // namespace armnn
356