1 /*
2 * Copyright (c) Qualcomm Innovation Center, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8 #include <dlfcn.h>
9 #include <executorch/backends/qualcomm/runtime/Logging.h>
10 #include <executorch/backends/qualcomm/runtime/SharedBuffer.h>
11
12 // Refer to the QNN HTP Shared Buffer Tutorial
13 // in Qualcomm® AI Engine Direct document
14 constexpr uint8_t RPCMEM_HEAP_ID_SYSTEM = 25;
15 constexpr uint8_t RPCMEM_DEFAULT_FLAGS = 1;
16
operator ()(const CustomMemTensorInfo & info) const17 std::size_t std::hash<CustomMemTensorInfo>::operator()(
18 const CustomMemTensorInfo& info) const noexcept {
19 size_t hash_val = 0;
20 hash_val ^= std::hash<void*>()(info.tensor_addr);
21 hash_val ^= std::hash<void*>()(info.custom_mem);
22 hash_val ^= std::hash<size_t>()(info.pos);
23 hash_val ^= std::hash<size_t>()(info.tensor_bytes);
24 for (int i = 0; i < info.rank; ++i) {
25 hash_val ^= info.shape[i];
26 }
27 hash_val ^= std::hash<uint32_t>()(info.rank);
28 hash_val ^= std::hash<executorch::aten::ScalarType>()(info.dtype);
29 return hash_val;
30 }
31
operator ==(const CustomMemTensorInfo & lhs,const CustomMemTensorInfo & rhs)32 bool operator==(
33 const CustomMemTensorInfo& lhs,
34 const CustomMemTensorInfo& rhs) {
35 bool is_same =
36 (lhs.tensor_addr == rhs.tensor_addr && lhs.custom_mem == rhs.custom_mem &&
37 lhs.pos == rhs.pos && lhs.tensor_bytes == rhs.tensor_bytes &&
38 lhs.rank == rhs.rank && lhs.dtype == rhs.dtype);
39 for (int i = 0; i < lhs.rank; ++i) {
40 is_same &= lhs.shape[i] == rhs.shape[i];
41 }
42 return is_same;
43 }
44
45 namespace executorch {
46 namespace backends {
47 namespace qnn {
48
49 using executorch::runtime::Error;
50
51 namespace {
52
alignTo(size_t alignment,intptr_t offset)53 intptr_t alignTo(size_t alignment, intptr_t offset) {
54 return offset % alignment == 0 ? offset
55 : offset +
56 (static_cast<intptr_t>(alignment) -
57 offset % static_cast<intptr_t>(alignment));
58 }
59
60 } // namespace
61
62 std::mutex SharedBuffer::init_mutex_;
63
GetCustomMemBase(void * buf)64 void* SharedBuffer::GetCustomMemBase(void* buf) {
65 auto it = tensor_addr_to_custom_mem_.find(buf);
66 if (it == tensor_addr_to_custom_mem_.end()) {
67 return nullptr;
68 }
69 return it->second;
70 }
71
GetUnAlignedAddr(void * buf)72 void* SharedBuffer::GetUnAlignedAddr(void* buf) {
73 auto it = restore_map_.find(buf);
74 if (it == restore_map_.end()) {
75 return nullptr;
76 }
77 return it->second;
78 }
79
GetAllocatedSize(void * buf)80 size_t SharedBuffer::GetAllocatedSize(void* buf) {
81 auto it = allocated_size_map_.find(buf);
82 if (it == allocated_size_map_.end()) {
83 return 0;
84 }
85 return it->second;
86 }
87
GetSharedBufferManager()88 SharedBuffer& SharedBuffer::GetSharedBufferManager() {
89 std::lock_guard<std::mutex> lk(init_mutex_);
90 static SharedBuffer shared_buffer_manager;
91 if (!shared_buffer_manager.GetInitialize()) {
92 #if defined(__aarch64__)
93 Error status = shared_buffer_manager.Load();
94 #else
95 // For x86_64 platform
96 Error status = Error::Ok;
97 #endif
98 if (status == Error::Ok) {
99 shared_buffer_manager.SetInitialize(true);
100 }
101 }
102 return shared_buffer_manager;
103 }
104
~SharedBuffer()105 SharedBuffer::~SharedBuffer() {
106 #if defined(__aarch64__)
107 if (initialize_) {
108 SharedBuffer::GetSharedBufferManager().UnLoad();
109 }
110 #endif
111 };
112
AllocMem(size_t bytes,size_t alignment)113 void* SharedBuffer::AllocMem(size_t bytes, size_t alignment) {
114 if (!initialize_) {
115 QNN_EXECUTORCH_LOG_ERROR("Shared memory not initialized.");
116 return nullptr;
117 }
118 // do alignment:
119 auto allocate_bytes = static_cast<int32_t>(bytes + alignment);
120 void* buf = rpc_mem_alloc_(
121 RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, allocate_bytes);
122 if (buf == nullptr) {
123 QNN_EXECUTORCH_LOG_WARN("Failed to allocate the tensor by RPC memory.");
124 return nullptr;
125 }
126 allocated_size_map_.insert({buf, allocate_bytes});
127 auto aligned_buf = reinterpret_cast<void*>(
128 alignTo(alignment, reinterpret_cast<intptr_t>(buf)));
129 bool status = restore_map_.insert({aligned_buf, buf}).second;
130 if (!status) {
131 QNN_EXECUTORCH_LOG_ERROR("Failed to allocate the tensor by RPC memory.");
132 rpc_mem_free_(buf);
133 }
134 return aligned_buf;
135 }
136
MemToFd(void * buf)137 int32_t SharedBuffer::MemToFd(void* buf) {
138 int32_t memFd = -1;
139 if (!initialize_) {
140 QNN_EXECUTORCH_LOG_ERROR("Shared memory not initialized.");
141 } else {
142 memFd = rpc_mem_to_fd_(buf);
143 }
144 return memFd;
145 }
146
FreeMem(void * buf)147 void SharedBuffer::FreeMem(void* buf) {
148 if (!initialize_) {
149 QNN_EXECUTORCH_LOG_ERROR("Shared memory not initialized.");
150 } else if (restore_map_.count(buf) == 0) {
151 QNN_EXECUTORCH_LOG_WARN("Don't free an unallocated tensor.");
152 } else {
153 rpc_mem_free_(restore_map_[buf]);
154 restore_map_.erase(buf);
155 }
156 }
157
IsAllocated(void * buf)158 bool SharedBuffer::IsAllocated(void* buf) {
159 return restore_map_.count(buf) != 0U;
160 }
161
Load()162 Error SharedBuffer::Load() {
163 // On Android, 32-bit and 64-bit libcdsprpc.so can be found at /vendor/lib/
164 // and /vendor/lib64/ respectively.
165 lib_cdsp_rpc_ = dlopen("libcdsprpc.so", RTLD_NOW | RTLD_LOCAL);
166 if (lib_cdsp_rpc_ == nullptr) {
167 QNN_EXECUTORCH_LOG_ERROR(
168 "Unable to load shared buffer. dlerror(): %s", dlerror());
169 return Error::Internal;
170 }
171 rpc_mem_alloc_ = reinterpret_cast<RpcMemAllocFn_t>( // NOLINT
172 dlsym(lib_cdsp_rpc_, "rpcmem_alloc"));
173 rpc_mem_free_ = reinterpret_cast<RpcMemFreeFn_t>( // NOLINT
174 dlsym(lib_cdsp_rpc_, "rpcmem_free"));
175 rpc_mem_to_fd_ = reinterpret_cast<RpcMemToFdFn_t>( // NOLINT
176 dlsym(lib_cdsp_rpc_, "rpcmem_to_fd"));
177 if (nullptr == rpc_mem_alloc_ || nullptr == rpc_mem_free_ ||
178 nullptr == rpc_mem_to_fd_) {
179 QNN_EXECUTORCH_LOG_ERROR(
180 "Unable to access symbols in shared buffer. dlerror(): %s", dlerror());
181 dlclose(lib_cdsp_rpc_);
182 return Error::Internal;
183 }
184 return Error::Ok;
185 }
186
AddCusomMemTensorAddr(void * tensor_addr,void * custom_mem)187 void SharedBuffer::AddCusomMemTensorAddr(void* tensor_addr, void* custom_mem) {
188 tensor_addr_to_custom_mem_.insert({tensor_addr, custom_mem});
189 };
190
AddCusomMemTensorInfo(const CustomMemTensorInfo & info)191 void SharedBuffer::AddCusomMemTensorInfo(const CustomMemTensorInfo& info) {
192 custom_mem_tensor_info_set_.insert(info);
193 tensor_addr_to_custom_mem_.insert({info.tensor_addr, info.custom_mem});
194 }
195
UnLoad()196 Error SharedBuffer::UnLoad() {
197 if (dlclose(lib_cdsp_rpc_) != 0) {
198 QNN_EXECUTORCH_LOG_ERROR(
199 "Unable to close shared buffer. dlerror(): %s", dlerror());
200 return Error::Internal;
201 };
202 return Error::Ok;
203 }
204 } // namespace qnn
205 } // namespace backends
206 } // namespace executorch
207