1 // 2 // Copyright © 2017 Arm Ltd. All rights reserved. 3 // SPDX-License-Identifier: MIT 4 // 5 #pragma once 6 7 #include <armnn/backends/IBackendInternal.hpp> 8 9 #include <arm_compute/core/Types.h> 10 #include <arm_compute/runtime/CL/CLBufferAllocator.h> 11 12 #include <aclCommon/BaseMemoryManager.hpp> 13 #include <arm_compute/runtime/CL/CLMemoryRegion.h> 14 15 #include <arm_compute/core/CL/CLKernelLibrary.h> 16 #include <CL/cl_ext.h> 17 18 // System includes for mapping and unmapping memory 19 #include <sys/mman.h> 20 21 namespace armnn 22 { 23 24 // add new capabilities here.. 25 const BackendCapabilities gpuAccCapabilities("GpuAcc", 26 { 27 {"NonConstWeights", false}, 28 {"AsyncExecution", false}, 29 {"ProtectedContentAllocation", true}, 30 {"ConstantTensorsAsInputs", true}, 31 {"PreImportIOTensors", false}, 32 {"ExternallyManagedMemory", true}, 33 {"MultiAxisPacking", false}, 34 {"SingleAxisPacking", true} 35 }); 36 37 class ClBackend : public IBackendInternal 38 { 39 public: ClBackend()40 ClBackend() : m_CustomAllocator(nullptr) {}; ClBackend(std::shared_ptr<ICustomAllocator> allocator)41 ClBackend(std::shared_ptr<ICustomAllocator> allocator) 42 { 43 std::string err; 44 UseCustomMemoryAllocator(allocator, err); 45 } 46 ~ClBackend() = default; 47 48 static const BackendId& GetIdStatic(); GetId() const49 const BackendId& GetId() const override { return GetIdStatic(); } 50 51 IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override; 52 53 IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory( 54 const IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr) const override; 55 56 IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory( 57 TensorHandleFactoryRegistry& registry) const override; 58 59 IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr& memoryManager, 60 const ModelOptions& modelOptions) const override; 61 62 IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, 63 const ModelOptions& modelOptions) const override; 64 65 IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, 66 const ModelOptions& modelOptions, 67 MemorySourceFlags inputFlags, 68 MemorySourceFlags outputFlags) const override; 69 70 std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override; 71 72 void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override; 73 74 void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry, 75 MemorySourceFlags inputFlags, 76 MemorySourceFlags outputFlags) override; 77 78 IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override; 79 IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext( 80 const IRuntime::CreationOptions&, IBackendProfilingPtr& backendProfiling) override; 81 82 IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override; 83 IBackendInternal::ILayerSupportSharedPtr GetLayerSupport(const ModelOptions& modelOptions) const override; 84 85 OptimizationViews OptimizeSubgraphView(const SubgraphView& subgraph, 86 const ModelOptions& modelOptions) const override; 87 88 IBackendInternal::IBackendSpecificModelContextPtr CreateBackendSpecificModelContext( 89 const ModelOptions& modelOptions) const override; 90 91 std::unique_ptr<ICustomAllocator> GetDefaultAllocator() const override; 92 GetCapabilities() const93 BackendCapabilities GetCapabilities() const override 94 { 95 return gpuAccCapabilities; 96 }; 97 UseCustomMemoryAllocator(std::shared_ptr<ICustomAllocator> allocator,armnn::Optional<std::string &> errMsg)98 virtual bool UseCustomMemoryAllocator(std::shared_ptr<ICustomAllocator> allocator, 99 armnn::Optional<std::string&> errMsg) override 100 { 101 IgnoreUnused(errMsg); 102 ARMNN_LOG(info) << "Using Custom Allocator for ClBackend"; 103 104 // Set flag to signal the backend to use a custom memory allocator 105 m_CustomAllocator = std::make_shared<ClBackendCustomAllocatorWrapper>(std::move(allocator)); 106 m_UsingCustomAllocator = true; 107 return m_UsingCustomAllocator; 108 } 109 GetNumberOfCacheFiles() const110 virtual unsigned int GetNumberOfCacheFiles() const override { return 1; } 111 112 // Cl requires a arm_compute::IAllocator we wrap the Arm NN ICustomAllocator to achieve this 113 class ClBackendCustomAllocatorWrapper : public arm_compute::IAllocator 114 { 115 public: ClBackendCustomAllocatorWrapper(std::shared_ptr<ICustomAllocator> alloc)116 ClBackendCustomAllocatorWrapper(std::shared_ptr<ICustomAllocator> alloc) : m_CustomAllocator(alloc) 117 {} 118 // Inherited methods overridden: allocate(size_t size,size_t alignment)119 void* allocate(size_t size, size_t alignment) override 120 { 121 auto alloc = m_CustomAllocator->allocate(size, alignment); 122 return MapAllocatedMemory(alloc, size, m_CustomAllocator->GetMemorySourceType()); 123 } free(void * ptr)124 void free(void* ptr) override 125 { 126 auto hostMemPtr = m_AllocatedBufferMappings[ptr]; 127 clReleaseMemObject(static_cast<cl_mem>(ptr)); 128 m_CustomAllocator->free(hostMemPtr); 129 } make_region(size_t size,size_t alignment)130 std::unique_ptr<arm_compute::IMemoryRegion> make_region(size_t size, size_t alignment) override 131 { 132 auto hostMemPtr = m_CustomAllocator->allocate(size, alignment); 133 cl_mem buffer = MapAllocatedMemory(hostMemPtr, size, m_CustomAllocator->GetMemorySourceType()); 134 135 return std::make_unique<ClBackendCustomAllocatorMemoryRegion>(cl::Buffer(buffer), 136 hostMemPtr, 137 m_CustomAllocator->GetMemorySourceType()); 138 } 139 private: MapAllocatedMemory(void * memory,size_t size,MemorySource source)140 cl_mem MapAllocatedMemory(void* memory, size_t size, MemorySource source) 141 { 142 // Round the size of the buffer to a multiple of the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 143 auto cachelineAlignment = 144 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>(); 145 auto roundedSize = cachelineAlignment + size - (size % cachelineAlignment); 146 147 if (source == MemorySource::Malloc) 148 { 149 const cl_import_properties_arm importProperties[] = 150 { 151 CL_IMPORT_TYPE_ARM, 152 CL_IMPORT_TYPE_HOST_ARM, 153 0 154 }; 155 cl_int error = CL_SUCCESS; 156 cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(), 157 CL_MEM_READ_WRITE, 158 importProperties, 159 memory, 160 roundedSize, 161 &error); 162 if (error == CL_SUCCESS) 163 { 164 m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory)); 165 return buffer; 166 } 167 throw armnn::Exception( 168 "Mapping allocated memory from CustomMemoryAllocator failed, errcode: " + std::to_string(error)); 169 } 170 else if (source == MemorySource::DmaBuf) 171 { 172 const cl_import_properties_arm importProperties[] = 173 { 174 CL_IMPORT_TYPE_ARM, 175 CL_IMPORT_TYPE_DMA_BUF_ARM, 176 CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM, 177 CL_TRUE, 178 0 179 }; 180 cl_int error = CL_SUCCESS; 181 cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(), 182 CL_MEM_READ_WRITE, 183 importProperties, 184 memory, 185 roundedSize, 186 &error); 187 if (error == CL_SUCCESS) 188 { 189 m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory)); 190 return buffer; 191 } 192 throw armnn::Exception( 193 "Mapping allocated memory from CustomMemoryAllocator failed, errcode: " 194 + std::to_string(error)); 195 } 196 else if (source == MemorySource::DmaBufProtected) 197 { 198 const cl_import_properties_arm importProperties[] = 199 { 200 CL_IMPORT_TYPE_ARM, 201 CL_IMPORT_TYPE_DMA_BUF_ARM, 202 CL_IMPORT_TYPE_PROTECTED_ARM, 203 CL_TRUE, 204 0 205 }; 206 cl_int error = CL_SUCCESS; 207 cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(), 208 CL_MEM_READ_WRITE, 209 importProperties, 210 memory, 211 roundedSize, 212 &error); 213 if (error == CL_SUCCESS) 214 { 215 m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory)); 216 return buffer; 217 } 218 throw armnn::Exception( 219 "Mapping allocated memory from CustomMemoryAllocator failed, errcode: " 220 + std::to_string(error)); 221 } 222 throw armnn::Exception( 223 "Attempting to allocate memory with unsupported MemorySource type in CustomAllocator"); 224 } 225 std::shared_ptr<ICustomAllocator> m_CustomAllocator; 226 std::map<void*, void*> m_AllocatedBufferMappings; 227 }; 228 229 class ClBackendCustomAllocatorMemoryRegion : public arm_compute::ICLMemoryRegion 230 { 231 public: 232 // We need to have a new version of ICLMemoryRegion which holds a hostMemPtr to allow for cpu copy access ClBackendCustomAllocatorMemoryRegion(const cl::Buffer & buffer,void * hostMemPtr,armnn::MemorySource source)233 ClBackendCustomAllocatorMemoryRegion(const cl::Buffer &buffer, void* hostMemPtr, armnn::MemorySource source) 234 : ICLMemoryRegion(buffer.getInfo<CL_MEM_SIZE>()) 235 { 236 _mem = buffer; 237 m_HostMemPtr = hostMemPtr; 238 m_MemorySource = source; 239 } 240 241 // Inherited methods overridden : ptr()242 void* ptr() override 243 { 244 return nullptr; 245 } 246 map(cl::CommandQueue & q,bool blocking)247 void* map(cl::CommandQueue &q, bool blocking) override 248 { 249 armnn::IgnoreUnused(q, blocking); 250 if (m_HostMemPtr == nullptr) 251 { 252 throw armnn::Exception("ClBackend: Attempting to map memory with an invalid host ptr"); 253 } 254 if (_mapping != nullptr) 255 { 256 throw armnn::Exception("ClBackend: Attempting to map memory which has not yet been unmapped"); 257 } 258 switch (m_MemorySource) 259 { 260 case armnn::MemorySource::Malloc: 261 _mapping = m_HostMemPtr; 262 return _mapping; 263 break; 264 case armnn::MemorySource::DmaBuf: 265 case armnn::MemorySource::DmaBufProtected: 266 // If the source is a Dmabuf then the memory ptr should be pointing to an integer value for the fd 267 _mapping = mmap(NULL, _size, PROT_WRITE, MAP_SHARED, *(reinterpret_cast<int*>(m_HostMemPtr)), 0); 268 return _mapping; 269 break; 270 default: 271 throw armnn::Exception("ClBackend: Attempting to map imported memory without a valid source"); 272 break; 273 } 274 } 275 unmap(cl::CommandQueue & q)276 void unmap(cl::CommandQueue &q) override 277 { 278 armnn::IgnoreUnused(q); 279 switch (m_MemorySource) 280 { 281 case armnn::MemorySource::Malloc: 282 _mapping = nullptr; 283 break; 284 case armnn::MemorySource::DmaBuf: 285 case armnn::MemorySource::DmaBufProtected: 286 munmap(_mapping, _size); 287 _mapping = nullptr; 288 break; 289 default: 290 throw armnn::Exception("ClBackend: Attempting to unmap imported memory without a valid source"); 291 break; 292 } 293 } 294 private: 295 void* m_HostMemPtr = nullptr; 296 armnn::MemorySource m_MemorySource; 297 }; 298 299 std::shared_ptr<ClBackendCustomAllocatorWrapper> m_CustomAllocator; 300 bool m_UsingCustomAllocator = false; 301 }; 302 303 } // namespace armnn 304