1*523fa7a6SAndroid Build Coastguard Worker /*
2*523fa7a6SAndroid Build Coastguard Worker * Copyright (c) Qualcomm Innovation Center, Inc.
3*523fa7a6SAndroid Build Coastguard Worker * All rights reserved.
4*523fa7a6SAndroid Build Coastguard Worker *
5*523fa7a6SAndroid Build Coastguard Worker * This source code is licensed under the BSD-style license found in the
6*523fa7a6SAndroid Build Coastguard Worker * LICENSE file in the root directory of this source tree.
7*523fa7a6SAndroid Build Coastguard Worker */
8*523fa7a6SAndroid Build Coastguard Worker #include <dlfcn.h>
9*523fa7a6SAndroid Build Coastguard Worker #include <executorch/backends/qualcomm/runtime/Logging.h>
10*523fa7a6SAndroid Build Coastguard Worker #include <executorch/backends/qualcomm/runtime/SharedBuffer.h>
11*523fa7a6SAndroid Build Coastguard Worker
12*523fa7a6SAndroid Build Coastguard Worker // Refer to the QNN HTP Shared Buffer Tutorial
13*523fa7a6SAndroid Build Coastguard Worker // in Qualcomm® AI Engine Direct document
14*523fa7a6SAndroid Build Coastguard Worker constexpr uint8_t RPCMEM_HEAP_ID_SYSTEM = 25;
15*523fa7a6SAndroid Build Coastguard Worker constexpr uint8_t RPCMEM_DEFAULT_FLAGS = 1;
16*523fa7a6SAndroid Build Coastguard Worker
operator ()(const CustomMemTensorInfo & info) const17*523fa7a6SAndroid Build Coastguard Worker std::size_t std::hash<CustomMemTensorInfo>::operator()(
18*523fa7a6SAndroid Build Coastguard Worker const CustomMemTensorInfo& info) const noexcept {
19*523fa7a6SAndroid Build Coastguard Worker size_t hash_val = 0;
20*523fa7a6SAndroid Build Coastguard Worker hash_val ^= std::hash<void*>()(info.tensor_addr);
21*523fa7a6SAndroid Build Coastguard Worker hash_val ^= std::hash<void*>()(info.custom_mem);
22*523fa7a6SAndroid Build Coastguard Worker hash_val ^= std::hash<size_t>()(info.pos);
23*523fa7a6SAndroid Build Coastguard Worker hash_val ^= std::hash<size_t>()(info.tensor_bytes);
24*523fa7a6SAndroid Build Coastguard Worker for (int i = 0; i < info.rank; ++i) {
25*523fa7a6SAndroid Build Coastguard Worker hash_val ^= info.shape[i];
26*523fa7a6SAndroid Build Coastguard Worker }
27*523fa7a6SAndroid Build Coastguard Worker hash_val ^= std::hash<uint32_t>()(info.rank);
28*523fa7a6SAndroid Build Coastguard Worker hash_val ^= std::hash<executorch::aten::ScalarType>()(info.dtype);
29*523fa7a6SAndroid Build Coastguard Worker return hash_val;
30*523fa7a6SAndroid Build Coastguard Worker }
31*523fa7a6SAndroid Build Coastguard Worker
operator ==(const CustomMemTensorInfo & lhs,const CustomMemTensorInfo & rhs)32*523fa7a6SAndroid Build Coastguard Worker bool operator==(
33*523fa7a6SAndroid Build Coastguard Worker const CustomMemTensorInfo& lhs,
34*523fa7a6SAndroid Build Coastguard Worker const CustomMemTensorInfo& rhs) {
35*523fa7a6SAndroid Build Coastguard Worker bool is_same =
36*523fa7a6SAndroid Build Coastguard Worker (lhs.tensor_addr == rhs.tensor_addr && lhs.custom_mem == rhs.custom_mem &&
37*523fa7a6SAndroid Build Coastguard Worker lhs.pos == rhs.pos && lhs.tensor_bytes == rhs.tensor_bytes &&
38*523fa7a6SAndroid Build Coastguard Worker lhs.rank == rhs.rank && lhs.dtype == rhs.dtype);
39*523fa7a6SAndroid Build Coastguard Worker for (int i = 0; i < lhs.rank; ++i) {
40*523fa7a6SAndroid Build Coastguard Worker is_same &= lhs.shape[i] == rhs.shape[i];
41*523fa7a6SAndroid Build Coastguard Worker }
42*523fa7a6SAndroid Build Coastguard Worker return is_same;
43*523fa7a6SAndroid Build Coastguard Worker }
44*523fa7a6SAndroid Build Coastguard Worker
45*523fa7a6SAndroid Build Coastguard Worker namespace executorch {
46*523fa7a6SAndroid Build Coastguard Worker namespace backends {
47*523fa7a6SAndroid Build Coastguard Worker namespace qnn {
48*523fa7a6SAndroid Build Coastguard Worker
49*523fa7a6SAndroid Build Coastguard Worker using executorch::runtime::Error;
50*523fa7a6SAndroid Build Coastguard Worker
51*523fa7a6SAndroid Build Coastguard Worker namespace {
52*523fa7a6SAndroid Build Coastguard Worker
alignTo(size_t alignment,intptr_t offset)53*523fa7a6SAndroid Build Coastguard Worker intptr_t alignTo(size_t alignment, intptr_t offset) {
54*523fa7a6SAndroid Build Coastguard Worker return offset % alignment == 0 ? offset
55*523fa7a6SAndroid Build Coastguard Worker : offset +
56*523fa7a6SAndroid Build Coastguard Worker (static_cast<intptr_t>(alignment) -
57*523fa7a6SAndroid Build Coastguard Worker offset % static_cast<intptr_t>(alignment));
58*523fa7a6SAndroid Build Coastguard Worker }
59*523fa7a6SAndroid Build Coastguard Worker
60*523fa7a6SAndroid Build Coastguard Worker } // namespace
61*523fa7a6SAndroid Build Coastguard Worker
62*523fa7a6SAndroid Build Coastguard Worker std::mutex SharedBuffer::init_mutex_;
63*523fa7a6SAndroid Build Coastguard Worker
GetCustomMemBase(void * buf)64*523fa7a6SAndroid Build Coastguard Worker void* SharedBuffer::GetCustomMemBase(void* buf) {
65*523fa7a6SAndroid Build Coastguard Worker auto it = tensor_addr_to_custom_mem_.find(buf);
66*523fa7a6SAndroid Build Coastguard Worker if (it == tensor_addr_to_custom_mem_.end()) {
67*523fa7a6SAndroid Build Coastguard Worker return nullptr;
68*523fa7a6SAndroid Build Coastguard Worker }
69*523fa7a6SAndroid Build Coastguard Worker return it->second;
70*523fa7a6SAndroid Build Coastguard Worker }
71*523fa7a6SAndroid Build Coastguard Worker
GetUnAlignedAddr(void * buf)72*523fa7a6SAndroid Build Coastguard Worker void* SharedBuffer::GetUnAlignedAddr(void* buf) {
73*523fa7a6SAndroid Build Coastguard Worker auto it = restore_map_.find(buf);
74*523fa7a6SAndroid Build Coastguard Worker if (it == restore_map_.end()) {
75*523fa7a6SAndroid Build Coastguard Worker return nullptr;
76*523fa7a6SAndroid Build Coastguard Worker }
77*523fa7a6SAndroid Build Coastguard Worker return it->second;
78*523fa7a6SAndroid Build Coastguard Worker }
79*523fa7a6SAndroid Build Coastguard Worker
GetAllocatedSize(void * buf)80*523fa7a6SAndroid Build Coastguard Worker size_t SharedBuffer::GetAllocatedSize(void* buf) {
81*523fa7a6SAndroid Build Coastguard Worker auto it = allocated_size_map_.find(buf);
82*523fa7a6SAndroid Build Coastguard Worker if (it == allocated_size_map_.end()) {
83*523fa7a6SAndroid Build Coastguard Worker return 0;
84*523fa7a6SAndroid Build Coastguard Worker }
85*523fa7a6SAndroid Build Coastguard Worker return it->second;
86*523fa7a6SAndroid Build Coastguard Worker }
87*523fa7a6SAndroid Build Coastguard Worker
GetSharedBufferManager()88*523fa7a6SAndroid Build Coastguard Worker SharedBuffer& SharedBuffer::GetSharedBufferManager() {
89*523fa7a6SAndroid Build Coastguard Worker std::lock_guard<std::mutex> lk(init_mutex_);
90*523fa7a6SAndroid Build Coastguard Worker static SharedBuffer shared_buffer_manager;
91*523fa7a6SAndroid Build Coastguard Worker if (!shared_buffer_manager.GetInitialize()) {
92*523fa7a6SAndroid Build Coastguard Worker #if defined(__aarch64__)
93*523fa7a6SAndroid Build Coastguard Worker Error status = shared_buffer_manager.Load();
94*523fa7a6SAndroid Build Coastguard Worker #else
95*523fa7a6SAndroid Build Coastguard Worker // For x86_64 platform
96*523fa7a6SAndroid Build Coastguard Worker Error status = Error::Ok;
97*523fa7a6SAndroid Build Coastguard Worker #endif
98*523fa7a6SAndroid Build Coastguard Worker if (status == Error::Ok) {
99*523fa7a6SAndroid Build Coastguard Worker shared_buffer_manager.SetInitialize(true);
100*523fa7a6SAndroid Build Coastguard Worker }
101*523fa7a6SAndroid Build Coastguard Worker }
102*523fa7a6SAndroid Build Coastguard Worker return shared_buffer_manager;
103*523fa7a6SAndroid Build Coastguard Worker }
104*523fa7a6SAndroid Build Coastguard Worker
~SharedBuffer()105*523fa7a6SAndroid Build Coastguard Worker SharedBuffer::~SharedBuffer() {
106*523fa7a6SAndroid Build Coastguard Worker #if defined(__aarch64__)
107*523fa7a6SAndroid Build Coastguard Worker if (initialize_) {
108*523fa7a6SAndroid Build Coastguard Worker SharedBuffer::GetSharedBufferManager().UnLoad();
109*523fa7a6SAndroid Build Coastguard Worker }
110*523fa7a6SAndroid Build Coastguard Worker #endif
111*523fa7a6SAndroid Build Coastguard Worker };
112*523fa7a6SAndroid Build Coastguard Worker
AllocMem(size_t bytes,size_t alignment)113*523fa7a6SAndroid Build Coastguard Worker void* SharedBuffer::AllocMem(size_t bytes, size_t alignment) {
114*523fa7a6SAndroid Build Coastguard Worker if (!initialize_) {
115*523fa7a6SAndroid Build Coastguard Worker QNN_EXECUTORCH_LOG_ERROR("Shared memory not initialized.");
116*523fa7a6SAndroid Build Coastguard Worker return nullptr;
117*523fa7a6SAndroid Build Coastguard Worker }
118*523fa7a6SAndroid Build Coastguard Worker // do alignment:
119*523fa7a6SAndroid Build Coastguard Worker auto allocate_bytes = static_cast<int32_t>(bytes + alignment);
120*523fa7a6SAndroid Build Coastguard Worker void* buf = rpc_mem_alloc_(
121*523fa7a6SAndroid Build Coastguard Worker RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, allocate_bytes);
122*523fa7a6SAndroid Build Coastguard Worker if (buf == nullptr) {
123*523fa7a6SAndroid Build Coastguard Worker QNN_EXECUTORCH_LOG_WARN("Failed to allocate the tensor by RPC memory.");
124*523fa7a6SAndroid Build Coastguard Worker return nullptr;
125*523fa7a6SAndroid Build Coastguard Worker }
126*523fa7a6SAndroid Build Coastguard Worker allocated_size_map_.insert({buf, allocate_bytes});
127*523fa7a6SAndroid Build Coastguard Worker auto aligned_buf = reinterpret_cast<void*>(
128*523fa7a6SAndroid Build Coastguard Worker alignTo(alignment, reinterpret_cast<intptr_t>(buf)));
129*523fa7a6SAndroid Build Coastguard Worker bool status = restore_map_.insert({aligned_buf, buf}).second;
130*523fa7a6SAndroid Build Coastguard Worker if (!status) {
131*523fa7a6SAndroid Build Coastguard Worker QNN_EXECUTORCH_LOG_ERROR("Failed to allocate the tensor by RPC memory.");
132*523fa7a6SAndroid Build Coastguard Worker rpc_mem_free_(buf);
133*523fa7a6SAndroid Build Coastguard Worker }
134*523fa7a6SAndroid Build Coastguard Worker return aligned_buf;
135*523fa7a6SAndroid Build Coastguard Worker }
136*523fa7a6SAndroid Build Coastguard Worker
MemToFd(void * buf)137*523fa7a6SAndroid Build Coastguard Worker int32_t SharedBuffer::MemToFd(void* buf) {
138*523fa7a6SAndroid Build Coastguard Worker int32_t memFd = -1;
139*523fa7a6SAndroid Build Coastguard Worker if (!initialize_) {
140*523fa7a6SAndroid Build Coastguard Worker QNN_EXECUTORCH_LOG_ERROR("Shared memory not initialized.");
141*523fa7a6SAndroid Build Coastguard Worker } else {
142*523fa7a6SAndroid Build Coastguard Worker memFd = rpc_mem_to_fd_(buf);
143*523fa7a6SAndroid Build Coastguard Worker }
144*523fa7a6SAndroid Build Coastguard Worker return memFd;
145*523fa7a6SAndroid Build Coastguard Worker }
146*523fa7a6SAndroid Build Coastguard Worker
FreeMem(void * buf)147*523fa7a6SAndroid Build Coastguard Worker void SharedBuffer::FreeMem(void* buf) {
148*523fa7a6SAndroid Build Coastguard Worker if (!initialize_) {
149*523fa7a6SAndroid Build Coastguard Worker QNN_EXECUTORCH_LOG_ERROR("Shared memory not initialized.");
150*523fa7a6SAndroid Build Coastguard Worker } else if (restore_map_.count(buf) == 0) {
151*523fa7a6SAndroid Build Coastguard Worker QNN_EXECUTORCH_LOG_WARN("Don't free an unallocated tensor.");
152*523fa7a6SAndroid Build Coastguard Worker } else {
153*523fa7a6SAndroid Build Coastguard Worker rpc_mem_free_(restore_map_[buf]);
154*523fa7a6SAndroid Build Coastguard Worker restore_map_.erase(buf);
155*523fa7a6SAndroid Build Coastguard Worker }
156*523fa7a6SAndroid Build Coastguard Worker }
157*523fa7a6SAndroid Build Coastguard Worker
IsAllocated(void * buf)158*523fa7a6SAndroid Build Coastguard Worker bool SharedBuffer::IsAllocated(void* buf) {
159*523fa7a6SAndroid Build Coastguard Worker return restore_map_.count(buf) != 0U;
160*523fa7a6SAndroid Build Coastguard Worker }
161*523fa7a6SAndroid Build Coastguard Worker
Load()162*523fa7a6SAndroid Build Coastguard Worker Error SharedBuffer::Load() {
163*523fa7a6SAndroid Build Coastguard Worker // On Android, 32-bit and 64-bit libcdsprpc.so can be found at /vendor/lib/
164*523fa7a6SAndroid Build Coastguard Worker // and /vendor/lib64/ respectively.
165*523fa7a6SAndroid Build Coastguard Worker lib_cdsp_rpc_ = dlopen("libcdsprpc.so", RTLD_NOW | RTLD_LOCAL);
166*523fa7a6SAndroid Build Coastguard Worker if (lib_cdsp_rpc_ == nullptr) {
167*523fa7a6SAndroid Build Coastguard Worker QNN_EXECUTORCH_LOG_ERROR(
168*523fa7a6SAndroid Build Coastguard Worker "Unable to load shared buffer. dlerror(): %s", dlerror());
169*523fa7a6SAndroid Build Coastguard Worker return Error::Internal;
170*523fa7a6SAndroid Build Coastguard Worker }
171*523fa7a6SAndroid Build Coastguard Worker rpc_mem_alloc_ = reinterpret_cast<RpcMemAllocFn_t>( // NOLINT
172*523fa7a6SAndroid Build Coastguard Worker dlsym(lib_cdsp_rpc_, "rpcmem_alloc"));
173*523fa7a6SAndroid Build Coastguard Worker rpc_mem_free_ = reinterpret_cast<RpcMemFreeFn_t>( // NOLINT
174*523fa7a6SAndroid Build Coastguard Worker dlsym(lib_cdsp_rpc_, "rpcmem_free"));
175*523fa7a6SAndroid Build Coastguard Worker rpc_mem_to_fd_ = reinterpret_cast<RpcMemToFdFn_t>( // NOLINT
176*523fa7a6SAndroid Build Coastguard Worker dlsym(lib_cdsp_rpc_, "rpcmem_to_fd"));
177*523fa7a6SAndroid Build Coastguard Worker if (nullptr == rpc_mem_alloc_ || nullptr == rpc_mem_free_ ||
178*523fa7a6SAndroid Build Coastguard Worker nullptr == rpc_mem_to_fd_) {
179*523fa7a6SAndroid Build Coastguard Worker QNN_EXECUTORCH_LOG_ERROR(
180*523fa7a6SAndroid Build Coastguard Worker "Unable to access symbols in shared buffer. dlerror(): %s", dlerror());
181*523fa7a6SAndroid Build Coastguard Worker dlclose(lib_cdsp_rpc_);
182*523fa7a6SAndroid Build Coastguard Worker return Error::Internal;
183*523fa7a6SAndroid Build Coastguard Worker }
184*523fa7a6SAndroid Build Coastguard Worker return Error::Ok;
185*523fa7a6SAndroid Build Coastguard Worker }
186*523fa7a6SAndroid Build Coastguard Worker
AddCusomMemTensorAddr(void * tensor_addr,void * custom_mem)187*523fa7a6SAndroid Build Coastguard Worker void SharedBuffer::AddCusomMemTensorAddr(void* tensor_addr, void* custom_mem) {
188*523fa7a6SAndroid Build Coastguard Worker tensor_addr_to_custom_mem_.insert({tensor_addr, custom_mem});
189*523fa7a6SAndroid Build Coastguard Worker };
190*523fa7a6SAndroid Build Coastguard Worker
AddCusomMemTensorInfo(const CustomMemTensorInfo & info)191*523fa7a6SAndroid Build Coastguard Worker void SharedBuffer::AddCusomMemTensorInfo(const CustomMemTensorInfo& info) {
192*523fa7a6SAndroid Build Coastguard Worker custom_mem_tensor_info_set_.insert(info);
193*523fa7a6SAndroid Build Coastguard Worker tensor_addr_to_custom_mem_.insert({info.tensor_addr, info.custom_mem});
194*523fa7a6SAndroid Build Coastguard Worker }
195*523fa7a6SAndroid Build Coastguard Worker
UnLoad()196*523fa7a6SAndroid Build Coastguard Worker Error SharedBuffer::UnLoad() {
197*523fa7a6SAndroid Build Coastguard Worker if (dlclose(lib_cdsp_rpc_) != 0) {
198*523fa7a6SAndroid Build Coastguard Worker QNN_EXECUTORCH_LOG_ERROR(
199*523fa7a6SAndroid Build Coastguard Worker "Unable to close shared buffer. dlerror(): %s", dlerror());
200*523fa7a6SAndroid Build Coastguard Worker return Error::Internal;
201*523fa7a6SAndroid Build Coastguard Worker };
202*523fa7a6SAndroid Build Coastguard Worker return Error::Ok;
203*523fa7a6SAndroid Build Coastguard Worker }
204*523fa7a6SAndroid Build Coastguard Worker } // namespace qnn
205*523fa7a6SAndroid Build Coastguard Worker } // namespace backends
206*523fa7a6SAndroid Build Coastguard Worker } // namespace executorch
207