xref: /aosp_15_r20/external/armnn/src/backends/cl/ClBackend.hpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include <armnn/backends/IBackendInternal.hpp>
8 
9 #include <arm_compute/core/Types.h>
10 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
11 
12 #include <aclCommon/BaseMemoryManager.hpp>
13 #include <arm_compute/runtime/CL/CLMemoryRegion.h>
14 
15 #include <arm_compute/core/CL/CLKernelLibrary.h>
16 #include <CL/cl_ext.h>
17 
18 // System includes for mapping and unmapping memory
19 #include <sys/mman.h>
20 
21 namespace armnn
22 {
23 
24 // add new capabilities here..
25 const BackendCapabilities gpuAccCapabilities("GpuAcc",
26                                              {
27                                                      {"NonConstWeights", false},
28                                                      {"AsyncExecution", false},
29                                                      {"ProtectedContentAllocation", true},
30                                                      {"ConstantTensorsAsInputs", true},
31                                                      {"PreImportIOTensors", false},
32                                                      {"ExternallyManagedMemory", true},
33                                                      {"MultiAxisPacking", false},
34                                                      {"SingleAxisPacking", true}
35                                              });
36 
37 class ClBackend : public IBackendInternal
38 {
39 public:
ClBackend()40     ClBackend() : m_CustomAllocator(nullptr) {};
ClBackend(std::shared_ptr<ICustomAllocator> allocator)41     ClBackend(std::shared_ptr<ICustomAllocator> allocator)
42     {
43         std::string err;
44         UseCustomMemoryAllocator(allocator, err);
45     }
46     ~ClBackend() = default;
47 
48     static const BackendId& GetIdStatic();
GetId() const49     const BackendId& GetId() const override { return GetIdStatic(); }
50 
51     IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override;
52 
53     IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(
54         const IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr) const override;
55 
56     IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(
57         TensorHandleFactoryRegistry& registry) const override;
58 
59     IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr& memoryManager,
60                                               const ModelOptions& modelOptions) const override;
61 
62     IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
63                                               const ModelOptions& modelOptions) const override;
64 
65     IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
66                                               const ModelOptions& modelOptions,
67                                               MemorySourceFlags inputFlags,
68                                               MemorySourceFlags outputFlags) const override;
69 
70     std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override;
71 
72     void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override;
73 
74     void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry,
75                                        MemorySourceFlags inputFlags,
76                                        MemorySourceFlags outputFlags) override;
77 
78     IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override;
79     IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(
80         const IRuntime::CreationOptions&, IBackendProfilingPtr& backendProfiling) override;
81 
82     IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override;
83     IBackendInternal::ILayerSupportSharedPtr GetLayerSupport(const ModelOptions& modelOptions) const override;
84 
85     OptimizationViews OptimizeSubgraphView(const SubgraphView& subgraph,
86                                            const ModelOptions& modelOptions) const override;
87 
88     IBackendInternal::IBackendSpecificModelContextPtr CreateBackendSpecificModelContext(
89         const ModelOptions& modelOptions) const override;
90 
91     std::unique_ptr<ICustomAllocator> GetDefaultAllocator() const override;
92 
GetCapabilities() const93     BackendCapabilities GetCapabilities() const override
94     {
95         return gpuAccCapabilities;
96     };
97 
UseCustomMemoryAllocator(std::shared_ptr<ICustomAllocator> allocator,armnn::Optional<std::string &> errMsg)98     virtual bool UseCustomMemoryAllocator(std::shared_ptr<ICustomAllocator> allocator,
99                                           armnn::Optional<std::string&> errMsg) override
100     {
101         IgnoreUnused(errMsg);
102         ARMNN_LOG(info) << "Using Custom Allocator for ClBackend";
103 
104         // Set flag to signal the backend to use a custom memory allocator
105         m_CustomAllocator = std::make_shared<ClBackendCustomAllocatorWrapper>(std::move(allocator));
106         m_UsingCustomAllocator = true;
107         return m_UsingCustomAllocator;
108     }
109 
GetNumberOfCacheFiles() const110     virtual unsigned int GetNumberOfCacheFiles() const override { return 1; }
111 
112     // Cl requires a arm_compute::IAllocator we wrap the Arm NN ICustomAllocator to achieve this
113     class ClBackendCustomAllocatorWrapper : public arm_compute::IAllocator
114     {
115     public:
ClBackendCustomAllocatorWrapper(std::shared_ptr<ICustomAllocator> alloc)116         ClBackendCustomAllocatorWrapper(std::shared_ptr<ICustomAllocator> alloc) : m_CustomAllocator(alloc)
117         {}
118         // Inherited methods overridden:
allocate(size_t size,size_t alignment)119         void* allocate(size_t size, size_t alignment) override
120         {
121             auto alloc = m_CustomAllocator->allocate(size, alignment);
122             return MapAllocatedMemory(alloc, size, m_CustomAllocator->GetMemorySourceType());
123         }
free(void * ptr)124         void free(void* ptr) override
125         {
126             auto hostMemPtr = m_AllocatedBufferMappings[ptr];
127             clReleaseMemObject(static_cast<cl_mem>(ptr));
128             m_CustomAllocator->free(hostMemPtr);
129         }
make_region(size_t size,size_t alignment)130         std::unique_ptr<arm_compute::IMemoryRegion> make_region(size_t size, size_t alignment) override
131         {
132             auto hostMemPtr = m_CustomAllocator->allocate(size, alignment);
133             cl_mem buffer = MapAllocatedMemory(hostMemPtr, size, m_CustomAllocator->GetMemorySourceType());
134 
135             return std::make_unique<ClBackendCustomAllocatorMemoryRegion>(cl::Buffer(buffer),
136                                                                           hostMemPtr,
137                                                                           m_CustomAllocator->GetMemorySourceType());
138         }
139     private:
MapAllocatedMemory(void * memory,size_t size,MemorySource source)140         cl_mem MapAllocatedMemory(void* memory, size_t size, MemorySource source)
141         {
142             // Round the size of the buffer to a multiple of the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
143             auto cachelineAlignment =
144                     arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
145             auto roundedSize = cachelineAlignment + size - (size % cachelineAlignment);
146 
147             if (source == MemorySource::Malloc)
148             {
149                 const cl_import_properties_arm importProperties[] =
150                         {
151                             CL_IMPORT_TYPE_ARM,
152                             CL_IMPORT_TYPE_HOST_ARM,
153                             0
154                         };
155                 cl_int error = CL_SUCCESS;
156                 cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
157                                                   CL_MEM_READ_WRITE,
158                                                   importProperties,
159                                                   memory,
160                                                   roundedSize,
161                                                   &error);
162                 if (error == CL_SUCCESS)
163                 {
164                     m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory));
165                     return buffer;
166                 }
167                 throw armnn::Exception(
168                     "Mapping allocated memory from CustomMemoryAllocator failed, errcode: " + std::to_string(error));
169             }
170             else if (source == MemorySource::DmaBuf)
171             {
172                 const cl_import_properties_arm importProperties[] =
173                         {
174                             CL_IMPORT_TYPE_ARM,
175                             CL_IMPORT_TYPE_DMA_BUF_ARM,
176                             CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
177                             CL_TRUE,
178                             0
179                         };
180                 cl_int error = CL_SUCCESS;
181                 cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
182                                                   CL_MEM_READ_WRITE,
183                                                   importProperties,
184                                                   memory,
185                                                   roundedSize,
186                                                   &error);
187                 if (error == CL_SUCCESS)
188                 {
189                     m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory));
190                     return buffer;
191                 }
192                 throw armnn::Exception(
193                         "Mapping allocated memory from CustomMemoryAllocator failed, errcode: "
194                          + std::to_string(error));
195             }
196             else if (source == MemorySource::DmaBufProtected)
197             {
198                 const cl_import_properties_arm importProperties[] =
199                         {
200                                 CL_IMPORT_TYPE_ARM,
201                                 CL_IMPORT_TYPE_DMA_BUF_ARM,
202                                 CL_IMPORT_TYPE_PROTECTED_ARM,
203                                 CL_TRUE,
204                                 0
205                         };
206                 cl_int error = CL_SUCCESS;
207                 cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
208                                                   CL_MEM_READ_WRITE,
209                                                   importProperties,
210                                                   memory,
211                                                   roundedSize,
212                                                   &error);
213                 if (error == CL_SUCCESS)
214                 {
215                     m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory));
216                     return buffer;
217                 }
218                 throw armnn::Exception(
219                         "Mapping allocated memory from CustomMemoryAllocator failed, errcode: "
220                          + std::to_string(error));
221             }
222             throw armnn::Exception(
223                     "Attempting to allocate memory with unsupported MemorySource type in CustomAllocator");
224         }
225         std::shared_ptr<ICustomAllocator> m_CustomAllocator;
226         std::map<void*, void*> m_AllocatedBufferMappings;
227     };
228 
229     class ClBackendCustomAllocatorMemoryRegion : public arm_compute::ICLMemoryRegion
230     {
231     public:
232         // We need to have a new version of ICLMemoryRegion which holds a hostMemPtr to allow for cpu copy access
ClBackendCustomAllocatorMemoryRegion(const cl::Buffer & buffer,void * hostMemPtr,armnn::MemorySource source)233         ClBackendCustomAllocatorMemoryRegion(const cl::Buffer &buffer, void* hostMemPtr, armnn::MemorySource source)
234             : ICLMemoryRegion(buffer.getInfo<CL_MEM_SIZE>())
235         {
236             _mem = buffer;
237             m_HostMemPtr = hostMemPtr;
238             m_MemorySource = source;
239         }
240 
241         // Inherited methods overridden :
ptr()242         void* ptr() override
243         {
244             return nullptr;
245         }
246 
map(cl::CommandQueue & q,bool blocking)247         void* map(cl::CommandQueue &q, bool blocking) override
248         {
249             armnn::IgnoreUnused(q, blocking);
250             if (m_HostMemPtr == nullptr)
251             {
252                 throw armnn::Exception("ClBackend: Attempting to map memory with an invalid host ptr");
253             }
254             if (_mapping != nullptr)
255             {
256                 throw armnn::Exception("ClBackend: Attempting to map memory which has not yet been unmapped");
257             }
258             switch (m_MemorySource)
259             {
260                 case armnn::MemorySource::Malloc:
261                     _mapping = m_HostMemPtr;
262                     return _mapping;
263                     break;
264                 case armnn::MemorySource::DmaBuf:
265                 case armnn::MemorySource::DmaBufProtected:
266                     // If the source is a Dmabuf then the memory ptr should be pointing to an integer value for the fd
267                     _mapping = mmap(NULL, _size, PROT_WRITE, MAP_SHARED, *(reinterpret_cast<int*>(m_HostMemPtr)), 0);
268                     return _mapping;
269                     break;
270                 default:
271                     throw armnn::Exception("ClBackend: Attempting to map imported memory without a valid source");
272                     break;
273             }
274         }
275 
unmap(cl::CommandQueue & q)276         void unmap(cl::CommandQueue &q) override
277         {
278             armnn::IgnoreUnused(q);
279             switch (m_MemorySource)
280             {
281                 case armnn::MemorySource::Malloc:
282                     _mapping = nullptr;
283                     break;
284                 case armnn::MemorySource::DmaBuf:
285                 case armnn::MemorySource::DmaBufProtected:
286                     munmap(_mapping, _size);
287                     _mapping = nullptr;
288                     break;
289                 default:
290                     throw armnn::Exception("ClBackend: Attempting to unmap imported memory without a valid source");
291                     break;
292             }
293         }
294     private:
295         void* m_HostMemPtr = nullptr;
296         armnn::MemorySource m_MemorySource;
297     };
298 
299     std::shared_ptr<ClBackendCustomAllocatorWrapper> m_CustomAllocator;
300     bool m_UsingCustomAllocator = false;
301 };
302 
303 } // namespace armnn
304