xref: /aosp_15_r20/external/executorch/backends/qualcomm/runtime/SharedBuffer.cpp (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1*523fa7a6SAndroid Build Coastguard Worker /*
2*523fa7a6SAndroid Build Coastguard Worker  * Copyright (c) Qualcomm Innovation Center, Inc.
3*523fa7a6SAndroid Build Coastguard Worker  * All rights reserved.
4*523fa7a6SAndroid Build Coastguard Worker  *
5*523fa7a6SAndroid Build Coastguard Worker  * This source code is licensed under the BSD-style license found in the
6*523fa7a6SAndroid Build Coastguard Worker  * LICENSE file in the root directory of this source tree.
7*523fa7a6SAndroid Build Coastguard Worker  */
8*523fa7a6SAndroid Build Coastguard Worker #include <dlfcn.h>
9*523fa7a6SAndroid Build Coastguard Worker #include <executorch/backends/qualcomm/runtime/Logging.h>
10*523fa7a6SAndroid Build Coastguard Worker #include <executorch/backends/qualcomm/runtime/SharedBuffer.h>
11*523fa7a6SAndroid Build Coastguard Worker 
12*523fa7a6SAndroid Build Coastguard Worker // Refer to the QNN HTP Shared Buffer Tutorial
13*523fa7a6SAndroid Build Coastguard Worker // in Qualcomm® AI Engine Direct document
14*523fa7a6SAndroid Build Coastguard Worker constexpr uint8_t RPCMEM_HEAP_ID_SYSTEM = 25;
15*523fa7a6SAndroid Build Coastguard Worker constexpr uint8_t RPCMEM_DEFAULT_FLAGS = 1;
16*523fa7a6SAndroid Build Coastguard Worker 
operator ()(const CustomMemTensorInfo & info) const17*523fa7a6SAndroid Build Coastguard Worker std::size_t std::hash<CustomMemTensorInfo>::operator()(
18*523fa7a6SAndroid Build Coastguard Worker     const CustomMemTensorInfo& info) const noexcept {
19*523fa7a6SAndroid Build Coastguard Worker   size_t hash_val = 0;
20*523fa7a6SAndroid Build Coastguard Worker   hash_val ^= std::hash<void*>()(info.tensor_addr);
21*523fa7a6SAndroid Build Coastguard Worker   hash_val ^= std::hash<void*>()(info.custom_mem);
22*523fa7a6SAndroid Build Coastguard Worker   hash_val ^= std::hash<size_t>()(info.pos);
23*523fa7a6SAndroid Build Coastguard Worker   hash_val ^= std::hash<size_t>()(info.tensor_bytes);
24*523fa7a6SAndroid Build Coastguard Worker   for (int i = 0; i < info.rank; ++i) {
25*523fa7a6SAndroid Build Coastguard Worker     hash_val ^= info.shape[i];
26*523fa7a6SAndroid Build Coastguard Worker   }
27*523fa7a6SAndroid Build Coastguard Worker   hash_val ^= std::hash<uint32_t>()(info.rank);
28*523fa7a6SAndroid Build Coastguard Worker   hash_val ^= std::hash<executorch::aten::ScalarType>()(info.dtype);
29*523fa7a6SAndroid Build Coastguard Worker   return hash_val;
30*523fa7a6SAndroid Build Coastguard Worker }
31*523fa7a6SAndroid Build Coastguard Worker 
operator ==(const CustomMemTensorInfo & lhs,const CustomMemTensorInfo & rhs)32*523fa7a6SAndroid Build Coastguard Worker bool operator==(
33*523fa7a6SAndroid Build Coastguard Worker     const CustomMemTensorInfo& lhs,
34*523fa7a6SAndroid Build Coastguard Worker     const CustomMemTensorInfo& rhs) {
35*523fa7a6SAndroid Build Coastguard Worker   bool is_same =
36*523fa7a6SAndroid Build Coastguard Worker       (lhs.tensor_addr == rhs.tensor_addr && lhs.custom_mem == rhs.custom_mem &&
37*523fa7a6SAndroid Build Coastguard Worker        lhs.pos == rhs.pos && lhs.tensor_bytes == rhs.tensor_bytes &&
38*523fa7a6SAndroid Build Coastguard Worker        lhs.rank == rhs.rank && lhs.dtype == rhs.dtype);
39*523fa7a6SAndroid Build Coastguard Worker   for (int i = 0; i < lhs.rank; ++i) {
40*523fa7a6SAndroid Build Coastguard Worker     is_same &= lhs.shape[i] == rhs.shape[i];
41*523fa7a6SAndroid Build Coastguard Worker   }
42*523fa7a6SAndroid Build Coastguard Worker   return is_same;
43*523fa7a6SAndroid Build Coastguard Worker }
44*523fa7a6SAndroid Build Coastguard Worker 
45*523fa7a6SAndroid Build Coastguard Worker namespace executorch {
46*523fa7a6SAndroid Build Coastguard Worker namespace backends {
47*523fa7a6SAndroid Build Coastguard Worker namespace qnn {
48*523fa7a6SAndroid Build Coastguard Worker 
49*523fa7a6SAndroid Build Coastguard Worker using executorch::runtime::Error;
50*523fa7a6SAndroid Build Coastguard Worker 
51*523fa7a6SAndroid Build Coastguard Worker namespace {
52*523fa7a6SAndroid Build Coastguard Worker 
alignTo(size_t alignment,intptr_t offset)53*523fa7a6SAndroid Build Coastguard Worker intptr_t alignTo(size_t alignment, intptr_t offset) {
54*523fa7a6SAndroid Build Coastguard Worker   return offset % alignment == 0 ? offset
55*523fa7a6SAndroid Build Coastguard Worker                                  : offset +
56*523fa7a6SAndroid Build Coastguard Worker           (static_cast<intptr_t>(alignment) -
57*523fa7a6SAndroid Build Coastguard Worker            offset % static_cast<intptr_t>(alignment));
58*523fa7a6SAndroid Build Coastguard Worker }
59*523fa7a6SAndroid Build Coastguard Worker 
60*523fa7a6SAndroid Build Coastguard Worker } // namespace
61*523fa7a6SAndroid Build Coastguard Worker 
62*523fa7a6SAndroid Build Coastguard Worker std::mutex SharedBuffer::init_mutex_;
63*523fa7a6SAndroid Build Coastguard Worker 
GetCustomMemBase(void * buf)64*523fa7a6SAndroid Build Coastguard Worker void* SharedBuffer::GetCustomMemBase(void* buf) {
65*523fa7a6SAndroid Build Coastguard Worker   auto it = tensor_addr_to_custom_mem_.find(buf);
66*523fa7a6SAndroid Build Coastguard Worker   if (it == tensor_addr_to_custom_mem_.end()) {
67*523fa7a6SAndroid Build Coastguard Worker     return nullptr;
68*523fa7a6SAndroid Build Coastguard Worker   }
69*523fa7a6SAndroid Build Coastguard Worker   return it->second;
70*523fa7a6SAndroid Build Coastguard Worker }
71*523fa7a6SAndroid Build Coastguard Worker 
GetUnAlignedAddr(void * buf)72*523fa7a6SAndroid Build Coastguard Worker void* SharedBuffer::GetUnAlignedAddr(void* buf) {
73*523fa7a6SAndroid Build Coastguard Worker   auto it = restore_map_.find(buf);
74*523fa7a6SAndroid Build Coastguard Worker   if (it == restore_map_.end()) {
75*523fa7a6SAndroid Build Coastguard Worker     return nullptr;
76*523fa7a6SAndroid Build Coastguard Worker   }
77*523fa7a6SAndroid Build Coastguard Worker   return it->second;
78*523fa7a6SAndroid Build Coastguard Worker }
79*523fa7a6SAndroid Build Coastguard Worker 
GetAllocatedSize(void * buf)80*523fa7a6SAndroid Build Coastguard Worker size_t SharedBuffer::GetAllocatedSize(void* buf) {
81*523fa7a6SAndroid Build Coastguard Worker   auto it = allocated_size_map_.find(buf);
82*523fa7a6SAndroid Build Coastguard Worker   if (it == allocated_size_map_.end()) {
83*523fa7a6SAndroid Build Coastguard Worker     return 0;
84*523fa7a6SAndroid Build Coastguard Worker   }
85*523fa7a6SAndroid Build Coastguard Worker   return it->second;
86*523fa7a6SAndroid Build Coastguard Worker }
87*523fa7a6SAndroid Build Coastguard Worker 
GetSharedBufferManager()88*523fa7a6SAndroid Build Coastguard Worker SharedBuffer& SharedBuffer::GetSharedBufferManager() {
89*523fa7a6SAndroid Build Coastguard Worker   std::lock_guard<std::mutex> lk(init_mutex_);
90*523fa7a6SAndroid Build Coastguard Worker   static SharedBuffer shared_buffer_manager;
91*523fa7a6SAndroid Build Coastguard Worker   if (!shared_buffer_manager.GetInitialize()) {
92*523fa7a6SAndroid Build Coastguard Worker #if defined(__aarch64__)
93*523fa7a6SAndroid Build Coastguard Worker     Error status = shared_buffer_manager.Load();
94*523fa7a6SAndroid Build Coastguard Worker #else
95*523fa7a6SAndroid Build Coastguard Worker     // For x86_64 platform
96*523fa7a6SAndroid Build Coastguard Worker     Error status = Error::Ok;
97*523fa7a6SAndroid Build Coastguard Worker #endif
98*523fa7a6SAndroid Build Coastguard Worker     if (status == Error::Ok) {
99*523fa7a6SAndroid Build Coastguard Worker       shared_buffer_manager.SetInitialize(true);
100*523fa7a6SAndroid Build Coastguard Worker     }
101*523fa7a6SAndroid Build Coastguard Worker   }
102*523fa7a6SAndroid Build Coastguard Worker   return shared_buffer_manager;
103*523fa7a6SAndroid Build Coastguard Worker }
104*523fa7a6SAndroid Build Coastguard Worker 
~SharedBuffer()105*523fa7a6SAndroid Build Coastguard Worker SharedBuffer::~SharedBuffer() {
106*523fa7a6SAndroid Build Coastguard Worker #if defined(__aarch64__)
107*523fa7a6SAndroid Build Coastguard Worker   if (initialize_) {
108*523fa7a6SAndroid Build Coastguard Worker     SharedBuffer::GetSharedBufferManager().UnLoad();
109*523fa7a6SAndroid Build Coastguard Worker   }
110*523fa7a6SAndroid Build Coastguard Worker #endif
111*523fa7a6SAndroid Build Coastguard Worker };
112*523fa7a6SAndroid Build Coastguard Worker 
AllocMem(size_t bytes,size_t alignment)113*523fa7a6SAndroid Build Coastguard Worker void* SharedBuffer::AllocMem(size_t bytes, size_t alignment) {
114*523fa7a6SAndroid Build Coastguard Worker   if (!initialize_) {
115*523fa7a6SAndroid Build Coastguard Worker     QNN_EXECUTORCH_LOG_ERROR("Shared memory not initialized.");
116*523fa7a6SAndroid Build Coastguard Worker     return nullptr;
117*523fa7a6SAndroid Build Coastguard Worker   }
118*523fa7a6SAndroid Build Coastguard Worker   // do alignment:
119*523fa7a6SAndroid Build Coastguard Worker   auto allocate_bytes = static_cast<int32_t>(bytes + alignment);
120*523fa7a6SAndroid Build Coastguard Worker   void* buf = rpc_mem_alloc_(
121*523fa7a6SAndroid Build Coastguard Worker       RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, allocate_bytes);
122*523fa7a6SAndroid Build Coastguard Worker   if (buf == nullptr) {
123*523fa7a6SAndroid Build Coastguard Worker     QNN_EXECUTORCH_LOG_WARN("Failed to allocate the tensor by RPC memory.");
124*523fa7a6SAndroid Build Coastguard Worker     return nullptr;
125*523fa7a6SAndroid Build Coastguard Worker   }
126*523fa7a6SAndroid Build Coastguard Worker   allocated_size_map_.insert({buf, allocate_bytes});
127*523fa7a6SAndroid Build Coastguard Worker   auto aligned_buf = reinterpret_cast<void*>(
128*523fa7a6SAndroid Build Coastguard Worker       alignTo(alignment, reinterpret_cast<intptr_t>(buf)));
129*523fa7a6SAndroid Build Coastguard Worker   bool status = restore_map_.insert({aligned_buf, buf}).second;
130*523fa7a6SAndroid Build Coastguard Worker   if (!status) {
131*523fa7a6SAndroid Build Coastguard Worker     QNN_EXECUTORCH_LOG_ERROR("Failed to allocate the tensor by RPC memory.");
132*523fa7a6SAndroid Build Coastguard Worker     rpc_mem_free_(buf);
133*523fa7a6SAndroid Build Coastguard Worker   }
134*523fa7a6SAndroid Build Coastguard Worker   return aligned_buf;
135*523fa7a6SAndroid Build Coastguard Worker }
136*523fa7a6SAndroid Build Coastguard Worker 
MemToFd(void * buf)137*523fa7a6SAndroid Build Coastguard Worker int32_t SharedBuffer::MemToFd(void* buf) {
138*523fa7a6SAndroid Build Coastguard Worker   int32_t memFd = -1;
139*523fa7a6SAndroid Build Coastguard Worker   if (!initialize_) {
140*523fa7a6SAndroid Build Coastguard Worker     QNN_EXECUTORCH_LOG_ERROR("Shared memory not initialized.");
141*523fa7a6SAndroid Build Coastguard Worker   } else {
142*523fa7a6SAndroid Build Coastguard Worker     memFd = rpc_mem_to_fd_(buf);
143*523fa7a6SAndroid Build Coastguard Worker   }
144*523fa7a6SAndroid Build Coastguard Worker   return memFd;
145*523fa7a6SAndroid Build Coastguard Worker }
146*523fa7a6SAndroid Build Coastguard Worker 
FreeMem(void * buf)147*523fa7a6SAndroid Build Coastguard Worker void SharedBuffer::FreeMem(void* buf) {
148*523fa7a6SAndroid Build Coastguard Worker   if (!initialize_) {
149*523fa7a6SAndroid Build Coastguard Worker     QNN_EXECUTORCH_LOG_ERROR("Shared memory not initialized.");
150*523fa7a6SAndroid Build Coastguard Worker   } else if (restore_map_.count(buf) == 0) {
151*523fa7a6SAndroid Build Coastguard Worker     QNN_EXECUTORCH_LOG_WARN("Don't free an unallocated tensor.");
152*523fa7a6SAndroid Build Coastguard Worker   } else {
153*523fa7a6SAndroid Build Coastguard Worker     rpc_mem_free_(restore_map_[buf]);
154*523fa7a6SAndroid Build Coastguard Worker     restore_map_.erase(buf);
155*523fa7a6SAndroid Build Coastguard Worker   }
156*523fa7a6SAndroid Build Coastguard Worker }
157*523fa7a6SAndroid Build Coastguard Worker 
IsAllocated(void * buf)158*523fa7a6SAndroid Build Coastguard Worker bool SharedBuffer::IsAllocated(void* buf) {
159*523fa7a6SAndroid Build Coastguard Worker   return restore_map_.count(buf) != 0U;
160*523fa7a6SAndroid Build Coastguard Worker }
161*523fa7a6SAndroid Build Coastguard Worker 
Load()162*523fa7a6SAndroid Build Coastguard Worker Error SharedBuffer::Load() {
163*523fa7a6SAndroid Build Coastguard Worker   // On Android, 32-bit and 64-bit libcdsprpc.so can be found at /vendor/lib/
164*523fa7a6SAndroid Build Coastguard Worker   // and /vendor/lib64/ respectively.
165*523fa7a6SAndroid Build Coastguard Worker   lib_cdsp_rpc_ = dlopen("libcdsprpc.so", RTLD_NOW | RTLD_LOCAL);
166*523fa7a6SAndroid Build Coastguard Worker   if (lib_cdsp_rpc_ == nullptr) {
167*523fa7a6SAndroid Build Coastguard Worker     QNN_EXECUTORCH_LOG_ERROR(
168*523fa7a6SAndroid Build Coastguard Worker         "Unable to load shared buffer. dlerror(): %s", dlerror());
169*523fa7a6SAndroid Build Coastguard Worker     return Error::Internal;
170*523fa7a6SAndroid Build Coastguard Worker   }
171*523fa7a6SAndroid Build Coastguard Worker   rpc_mem_alloc_ = reinterpret_cast<RpcMemAllocFn_t>( // NOLINT
172*523fa7a6SAndroid Build Coastguard Worker       dlsym(lib_cdsp_rpc_, "rpcmem_alloc"));
173*523fa7a6SAndroid Build Coastguard Worker   rpc_mem_free_ = reinterpret_cast<RpcMemFreeFn_t>( // NOLINT
174*523fa7a6SAndroid Build Coastguard Worker       dlsym(lib_cdsp_rpc_, "rpcmem_free"));
175*523fa7a6SAndroid Build Coastguard Worker   rpc_mem_to_fd_ = reinterpret_cast<RpcMemToFdFn_t>( // NOLINT
176*523fa7a6SAndroid Build Coastguard Worker       dlsym(lib_cdsp_rpc_, "rpcmem_to_fd"));
177*523fa7a6SAndroid Build Coastguard Worker   if (nullptr == rpc_mem_alloc_ || nullptr == rpc_mem_free_ ||
178*523fa7a6SAndroid Build Coastguard Worker       nullptr == rpc_mem_to_fd_) {
179*523fa7a6SAndroid Build Coastguard Worker     QNN_EXECUTORCH_LOG_ERROR(
180*523fa7a6SAndroid Build Coastguard Worker         "Unable to access symbols in shared buffer. dlerror(): %s", dlerror());
181*523fa7a6SAndroid Build Coastguard Worker     dlclose(lib_cdsp_rpc_);
182*523fa7a6SAndroid Build Coastguard Worker     return Error::Internal;
183*523fa7a6SAndroid Build Coastguard Worker   }
184*523fa7a6SAndroid Build Coastguard Worker   return Error::Ok;
185*523fa7a6SAndroid Build Coastguard Worker }
186*523fa7a6SAndroid Build Coastguard Worker 
AddCusomMemTensorAddr(void * tensor_addr,void * custom_mem)187*523fa7a6SAndroid Build Coastguard Worker void SharedBuffer::AddCusomMemTensorAddr(void* tensor_addr, void* custom_mem) {
188*523fa7a6SAndroid Build Coastguard Worker   tensor_addr_to_custom_mem_.insert({tensor_addr, custom_mem});
189*523fa7a6SAndroid Build Coastguard Worker };
190*523fa7a6SAndroid Build Coastguard Worker 
AddCusomMemTensorInfo(const CustomMemTensorInfo & info)191*523fa7a6SAndroid Build Coastguard Worker void SharedBuffer::AddCusomMemTensorInfo(const CustomMemTensorInfo& info) {
192*523fa7a6SAndroid Build Coastguard Worker   custom_mem_tensor_info_set_.insert(info);
193*523fa7a6SAndroid Build Coastguard Worker   tensor_addr_to_custom_mem_.insert({info.tensor_addr, info.custom_mem});
194*523fa7a6SAndroid Build Coastguard Worker }
195*523fa7a6SAndroid Build Coastguard Worker 
UnLoad()196*523fa7a6SAndroid Build Coastguard Worker Error SharedBuffer::UnLoad() {
197*523fa7a6SAndroid Build Coastguard Worker   if (dlclose(lib_cdsp_rpc_) != 0) {
198*523fa7a6SAndroid Build Coastguard Worker     QNN_EXECUTORCH_LOG_ERROR(
199*523fa7a6SAndroid Build Coastguard Worker         "Unable to close shared buffer. dlerror(): %s", dlerror());
200*523fa7a6SAndroid Build Coastguard Worker     return Error::Internal;
201*523fa7a6SAndroid Build Coastguard Worker   };
202*523fa7a6SAndroid Build Coastguard Worker   return Error::Ok;
203*523fa7a6SAndroid Build Coastguard Worker }
204*523fa7a6SAndroid Build Coastguard Worker } // namespace qnn
205*523fa7a6SAndroid Build Coastguard Worker } // namespace backends
206*523fa7a6SAndroid Build Coastguard Worker } // namespace executorch
207