xref: /aosp_15_r20/external/executorch/backends/qualcomm/runtime/SharedBuffer.cpp (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) Qualcomm Innovation Center, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 #include <dlfcn.h>
9 #include <executorch/backends/qualcomm/runtime/Logging.h>
10 #include <executorch/backends/qualcomm/runtime/SharedBuffer.h>
11 
12 // Refer to the QNN HTP Shared Buffer Tutorial
13 // in Qualcomm® AI Engine Direct document
14 constexpr uint8_t RPCMEM_HEAP_ID_SYSTEM = 25;
15 constexpr uint8_t RPCMEM_DEFAULT_FLAGS = 1;
16 
operator ()(const CustomMemTensorInfo & info) const17 std::size_t std::hash<CustomMemTensorInfo>::operator()(
18     const CustomMemTensorInfo& info) const noexcept {
19   size_t hash_val = 0;
20   hash_val ^= std::hash<void*>()(info.tensor_addr);
21   hash_val ^= std::hash<void*>()(info.custom_mem);
22   hash_val ^= std::hash<size_t>()(info.pos);
23   hash_val ^= std::hash<size_t>()(info.tensor_bytes);
24   for (int i = 0; i < info.rank; ++i) {
25     hash_val ^= info.shape[i];
26   }
27   hash_val ^= std::hash<uint32_t>()(info.rank);
28   hash_val ^= std::hash<executorch::aten::ScalarType>()(info.dtype);
29   return hash_val;
30 }
31 
operator ==(const CustomMemTensorInfo & lhs,const CustomMemTensorInfo & rhs)32 bool operator==(
33     const CustomMemTensorInfo& lhs,
34     const CustomMemTensorInfo& rhs) {
35   bool is_same =
36       (lhs.tensor_addr == rhs.tensor_addr && lhs.custom_mem == rhs.custom_mem &&
37        lhs.pos == rhs.pos && lhs.tensor_bytes == rhs.tensor_bytes &&
38        lhs.rank == rhs.rank && lhs.dtype == rhs.dtype);
39   for (int i = 0; i < lhs.rank; ++i) {
40     is_same &= lhs.shape[i] == rhs.shape[i];
41   }
42   return is_same;
43 }
44 
45 namespace executorch {
46 namespace backends {
47 namespace qnn {
48 
49 using executorch::runtime::Error;
50 
51 namespace {
52 
alignTo(size_t alignment,intptr_t offset)53 intptr_t alignTo(size_t alignment, intptr_t offset) {
54   return offset % alignment == 0 ? offset
55                                  : offset +
56           (static_cast<intptr_t>(alignment) -
57            offset % static_cast<intptr_t>(alignment));
58 }
59 
60 } // namespace
61 
62 std::mutex SharedBuffer::init_mutex_;
63 
GetCustomMemBase(void * buf)64 void* SharedBuffer::GetCustomMemBase(void* buf) {
65   auto it = tensor_addr_to_custom_mem_.find(buf);
66   if (it == tensor_addr_to_custom_mem_.end()) {
67     return nullptr;
68   }
69   return it->second;
70 }
71 
GetUnAlignedAddr(void * buf)72 void* SharedBuffer::GetUnAlignedAddr(void* buf) {
73   auto it = restore_map_.find(buf);
74   if (it == restore_map_.end()) {
75     return nullptr;
76   }
77   return it->second;
78 }
79 
GetAllocatedSize(void * buf)80 size_t SharedBuffer::GetAllocatedSize(void* buf) {
81   auto it = allocated_size_map_.find(buf);
82   if (it == allocated_size_map_.end()) {
83     return 0;
84   }
85   return it->second;
86 }
87 
GetSharedBufferManager()88 SharedBuffer& SharedBuffer::GetSharedBufferManager() {
89   std::lock_guard<std::mutex> lk(init_mutex_);
90   static SharedBuffer shared_buffer_manager;
91   if (!shared_buffer_manager.GetInitialize()) {
92 #if defined(__aarch64__)
93     Error status = shared_buffer_manager.Load();
94 #else
95     // For x86_64 platform
96     Error status = Error::Ok;
97 #endif
98     if (status == Error::Ok) {
99       shared_buffer_manager.SetInitialize(true);
100     }
101   }
102   return shared_buffer_manager;
103 }
104 
~SharedBuffer()105 SharedBuffer::~SharedBuffer() {
106 #if defined(__aarch64__)
107   if (initialize_) {
108     SharedBuffer::GetSharedBufferManager().UnLoad();
109   }
110 #endif
111 };
112 
AllocMem(size_t bytes,size_t alignment)113 void* SharedBuffer::AllocMem(size_t bytes, size_t alignment) {
114   if (!initialize_) {
115     QNN_EXECUTORCH_LOG_ERROR("Shared memory not initialized.");
116     return nullptr;
117   }
118   // do alignment:
119   auto allocate_bytes = static_cast<int32_t>(bytes + alignment);
120   void* buf = rpc_mem_alloc_(
121       RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, allocate_bytes);
122   if (buf == nullptr) {
123     QNN_EXECUTORCH_LOG_WARN("Failed to allocate the tensor by RPC memory.");
124     return nullptr;
125   }
126   allocated_size_map_.insert({buf, allocate_bytes});
127   auto aligned_buf = reinterpret_cast<void*>(
128       alignTo(alignment, reinterpret_cast<intptr_t>(buf)));
129   bool status = restore_map_.insert({aligned_buf, buf}).second;
130   if (!status) {
131     QNN_EXECUTORCH_LOG_ERROR("Failed to allocate the tensor by RPC memory.");
132     rpc_mem_free_(buf);
133   }
134   return aligned_buf;
135 }
136 
MemToFd(void * buf)137 int32_t SharedBuffer::MemToFd(void* buf) {
138   int32_t memFd = -1;
139   if (!initialize_) {
140     QNN_EXECUTORCH_LOG_ERROR("Shared memory not initialized.");
141   } else {
142     memFd = rpc_mem_to_fd_(buf);
143   }
144   return memFd;
145 }
146 
FreeMem(void * buf)147 void SharedBuffer::FreeMem(void* buf) {
148   if (!initialize_) {
149     QNN_EXECUTORCH_LOG_ERROR("Shared memory not initialized.");
150   } else if (restore_map_.count(buf) == 0) {
151     QNN_EXECUTORCH_LOG_WARN("Don't free an unallocated tensor.");
152   } else {
153     rpc_mem_free_(restore_map_[buf]);
154     restore_map_.erase(buf);
155   }
156 }
157 
IsAllocated(void * buf)158 bool SharedBuffer::IsAllocated(void* buf) {
159   return restore_map_.count(buf) != 0U;
160 }
161 
Load()162 Error SharedBuffer::Load() {
163   // On Android, 32-bit and 64-bit libcdsprpc.so can be found at /vendor/lib/
164   // and /vendor/lib64/ respectively.
165   lib_cdsp_rpc_ = dlopen("libcdsprpc.so", RTLD_NOW | RTLD_LOCAL);
166   if (lib_cdsp_rpc_ == nullptr) {
167     QNN_EXECUTORCH_LOG_ERROR(
168         "Unable to load shared buffer. dlerror(): %s", dlerror());
169     return Error::Internal;
170   }
171   rpc_mem_alloc_ = reinterpret_cast<RpcMemAllocFn_t>( // NOLINT
172       dlsym(lib_cdsp_rpc_, "rpcmem_alloc"));
173   rpc_mem_free_ = reinterpret_cast<RpcMemFreeFn_t>( // NOLINT
174       dlsym(lib_cdsp_rpc_, "rpcmem_free"));
175   rpc_mem_to_fd_ = reinterpret_cast<RpcMemToFdFn_t>( // NOLINT
176       dlsym(lib_cdsp_rpc_, "rpcmem_to_fd"));
177   if (nullptr == rpc_mem_alloc_ || nullptr == rpc_mem_free_ ||
178       nullptr == rpc_mem_to_fd_) {
179     QNN_EXECUTORCH_LOG_ERROR(
180         "Unable to access symbols in shared buffer. dlerror(): %s", dlerror());
181     dlclose(lib_cdsp_rpc_);
182     return Error::Internal;
183   }
184   return Error::Ok;
185 }
186 
AddCusomMemTensorAddr(void * tensor_addr,void * custom_mem)187 void SharedBuffer::AddCusomMemTensorAddr(void* tensor_addr, void* custom_mem) {
188   tensor_addr_to_custom_mem_.insert({tensor_addr, custom_mem});
189 };
190 
AddCusomMemTensorInfo(const CustomMemTensorInfo & info)191 void SharedBuffer::AddCusomMemTensorInfo(const CustomMemTensorInfo& info) {
192   custom_mem_tensor_info_set_.insert(info);
193   tensor_addr_to_custom_mem_.insert({info.tensor_addr, info.custom_mem});
194 }
195 
UnLoad()196 Error SharedBuffer::UnLoad() {
197   if (dlclose(lib_cdsp_rpc_) != 0) {
198     QNN_EXECUTORCH_LOG_ERROR(
199         "Unable to close shared buffer. dlerror(): %s", dlerror());
200     return Error::Internal;
201   };
202   return Error::Ok;
203 }
204 } // namespace qnn
205 } // namespace backends
206 } // namespace executorch
207