xref: /aosp_15_r20/external/armnn/src/armnn/LoadedNetwork.hpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include "Network.hpp"
8 #include "LayerFwd.hpp"
9 #include "Profiling.hpp"
10 
11 #include <armnn/Tensor.hpp>
12 
13 #include <armnn/backends/IBackendInternal.hpp>
14 #include <armnn/backends/IMemoryOptimizerStrategy.hpp>
15 #include <armnn/backends/Workload.hpp>
16 #include <armnn/backends/WorkloadFactory.hpp>
17 
18 #include <backendsCommon/DefaultAllocator.hpp>
19 #include <backendsCommon/MemoryManager.hpp>
20 #include <backendsCommon/TensorHandleFactoryRegistry.hpp>
21 #include <backendsCommon/memoryOptimizerStrategyLibrary/strategies/SingleAxisPriorityList.hpp>
22 
23 #include <client/include/IProfilingService.hpp>
24 #include <client/include/TimelineUtilityMethods.hpp>
25 
26 #include <common/include/LabelsAndEventClasses.hpp>
27 
28 #include <mutex>
29 #include <condition_variable>
30 #include <unordered_map>
31 
32 namespace cl
33 {
34 class Context;
35 class CommandQueue;
36 class Device;
37 }
38 
39 namespace armnn
40 {
41 
42 class LoadedNetwork
43 {
44 public:
45     using WorkloadQueue = std::vector<std::unique_ptr<IWorkload>>;
46 
~LoadedNetwork()47     ~LoadedNetwork()
48     {
49         FreeWorkingMemory();
50     }
51 
52     /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
53     /// overlapped Execution by calling this function from different threads.
54     std::unique_ptr<IWorkingMemHandle> CreateWorkingMemHandle(NetworkId networkId);
55 
56     TensorInfo GetInputTensorInfo(LayerBindingId layerId) const;
57     TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const;
58 
59     std::vector<ImportedInputId> ImportInputs(const InputTensors& inputTensors,
60                                               MemorySource forceImportMemorySource = MemorySource::Undefined);
61     std::vector<ImportedOutputId> ImportOutputs(const OutputTensors& outputTensors,
62                                                 MemorySource forceImportMemorySource = MemorySource::Undefined);
63 
64     void ClearImportedInputs(const std::vector<ImportedInputId> inputIds);
65     void ClearImportedOutputs(const std::vector<ImportedOutputId> outputIds);
66 
67     /// Single thread execution of the loaded network
68     Status EnqueueWorkload(const InputTensors& inputTensors, const OutputTensors& outputTensors,
69                            std::vector<ImportedInputId> preImportedInputIds = {},
70                            std::vector<ImportedOutputId> preImportedOutputIds = {});
71 
72     /// Thread safe execution of the loaded network
73     Status Execute(const InputTensors& inputTensors,
74                    const OutputTensors& outputTensors,
75                    IWorkingMemHandle& workingMemHandle,
76                    std::vector<ImportedInputId> preImportedInputs = {},
77                    std::vector<ImportedOutputId> preImportedOutputs = {});
78 
79     static std::unique_ptr<LoadedNetwork> MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
80                                                             std::string& errorMessage,
81                                                             const INetworkProperties& networkProperties,
82                                                             arm::pipe::IProfilingService* profilingService);
83 
84     // NOTE we return by reference as the purpose of this method is only to provide
85     // access to the private m_Profiler and in theory we should not need to increment
86     // the shared_ptr's reference counter
GetProfiler() const87     const std::shared_ptr<IProfiler>& GetProfiler() const { return m_OptimizedNetwork->GetProfiler(); }
88 
89     void FreeWorkingMemory();
90 
91     void RegisterDebugCallback(const DebugCallbackFunction& func);
92 
93     void SendNetworkStructure(arm::pipe::IProfilingService& profilingService);
94 
IsAsyncEnabled()95     bool IsAsyncEnabled()
96     {
97         return m_NetworkProperties.m_AsyncEnabled;
98     }
99 
100     arm::pipe::ProfilingGuid GetNetworkGuid();
101 
102 private:
103 
104 
105     void AllocateWorkingMemory(
106 #if !defined(ARMNN_DISABLE_THREADS)
107         std::lock_guard<std::mutex>& lock
108 #endif
109     );
110     void AllocateAndExecuteConstantWorkloads();
111     void AllocateAndExecuteConstantWorkloadsAsync();
112 
113     std::unordered_map<LayerGuid, std::unique_ptr<IWorkload>> m_ConstantWorkloads;
114     std::unordered_map<LayerGuid, ITensorHandle*> m_ConstantTensorHandles;
115 
116     std::unique_ptr<IMemoryOptimizerStrategy> m_ConstantStrategy = std::make_unique<SingleAxisPriorityList>();
117 
118     LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
119                   const INetworkProperties& networkProperties,
120                   arm::pipe::IProfilingService* profilingService);
121 
122     void EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo);
123 
124     void EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo);
125 
126     void EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle);
127 
128     void ImportOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle);
129 
130     bool Execute(std::unique_ptr<arm::pipe::TimelineUtilityMethods>& timelineUtils,
131                  arm::pipe::ProfilingGuid inferenceGuid);
132 
133     const IWorkloadFactory& GetWorkloadFactory(const Layer& layer) const;
134 
135     inline LayerBindingId ValidateImportedInputID(ImportedInputId id);
136     inline LayerBindingId ValidateImportedOutputID(ImportedOutputId id);
137 
138     void CreateMemoryProfile();
139     void CreateMemoryProfileAsync();
140 
141     std::unique_ptr<MemoryManager> CreateExternalMemoryManger(
142             std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>>& tensorMemory);
143 
144     using BackendPtrMap = std::unordered_map<BackendId, IBackendInternalUniquePtr>;
145 
146     BackendPtrMap  m_Backends;
147     std::vector<IBackendInternal::IMemoryManagerSharedPtr> m_BackendMemoryMangers;
148 
149     using WorkloadFactoryMap = std::unordered_map<BackendId, IBackendInternal::IWorkloadFactoryPtr>;
150     WorkloadFactoryMap  m_WorkloadFactories;
151 
152     std::unique_ptr<IOptimizedNetwork> m_OptimizedNetwork;
153 
154     WorkloadQueue                      m_InputQueue;
155     WorkloadQueue                      m_WorkloadQueue;
156     WorkloadQueue                      m_OutputQueue;
157 
158 #if !defined(ARMNN_DISABLE_THREADS)
159     mutable std::mutex m_WorkingMemMutex;
160 #endif
161 
162     bool m_IsWorkingMemAllocated = false;
163 
164     INetworkProperties m_NetworkProperties;
165 
166     TensorHandleFactoryRegistry m_TensorHandleFactoryRegistry;
167 
168     // NOTE: raw pointer because the profiling service is controlled by the Runtime
169     arm::pipe::IProfilingService* m_ProfilingService;
170 
171     struct ImportedTensorHandlePin
172     {
ImportedTensorHandlePinarmnn::LoadedNetwork::ImportedTensorHandlePin173         ImportedTensorHandlePin()
174         {}
175 
ImportedTensorHandlePinarmnn::LoadedNetwork::ImportedTensorHandlePin176         ImportedTensorHandlePin(LayerBindingId layerBindingId,
177                                 std::unique_ptr<ITensorHandle> tensorHandle)
178         : m_LayerBindingId(layerBindingId)
179         , m_TensorHandle(std::move(tensorHandle))
180         {}
181 
182         ImportedTensorHandlePin(ImportedTensorHandlePin&&) = default;
183 
~ImportedTensorHandlePinarmnn::LoadedNetwork::ImportedTensorHandlePin184         ~ImportedTensorHandlePin()
185         {
186             if (m_TensorHandle)
187             {
188                 m_TensorHandle->Unimport();
189             }
190         }
191 
192         LayerBindingId m_LayerBindingId;
193         std::unique_ptr<ITensorHandle> m_TensorHandle;
194     };
195 
196     std::vector<ImportedTensorHandlePin> m_PreImportedInputHandles;
197     std::vector<ImportedTensorHandlePin> m_PreImportedOutputHandles;
198 
199     ImportedInputId m_CurImportedInputId = 0;
200     ImportedInputId m_CurImportedOutputId = 0;
201 
202     std::unordered_map<BackendId, std::vector<MemBlock>> m_MemBlockMap;
203     std::unordered_map<BackendId, std::vector<MemBin>> m_MemBinMap;
204 
205     std::vector<ITensorHandle*> m_Tensorhandles;
206 
207     std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>> m_TensorMemory;
208 
209     std::unique_ptr<MemoryManager> m_ExternalMemoryManager;
210 
211     std::unordered_map<BackendId, bool> m_SupportsExternallyManagedMemory;
212 
213     // A set of vectors to record the workload queue indexes and their corresponding Input/Output Slot indexes
214     // which are connected to Inputs and Outputs for the network.
215     struct WorkloadIndices
216     {
217         unsigned int m_WorkloadIndex;
218         unsigned int m_SlotIndex;
219     };
220 
221     struct OutputWorkloadIndices
222     {
223         WorkloadIndices m_OutputSlotIndices;
224         std::vector<WorkloadIndices> m_InputSlotIndices;
225     };
226     std::unordered_map<LayerBindingId, std::vector<WorkloadIndices>> m_InputWorkloadSlotPairs;
227     std::unordered_map<LayerBindingId, OutputWorkloadIndices> m_OutputWorkloadSlotPairs;
228     std::vector<bool> m_IsInputImported;
229     std::vector<bool> m_IsOutputImported;
230 
231 };
232 
233 }
234