1 /*
2 * Copyright (c) 2018-2021 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
25
26 #include "arm_compute/core/utils/misc/MMappedFile.h"
27 #include "arm_compute/runtime/BlobLifetimeManager.h"
28 #include "arm_compute/runtime/CL/CLBufferAllocator.h"
29 #include "arm_compute/runtime/CL/CLScheduler.h"
30 #include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
31 #include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h"
32 #include "arm_compute/runtime/MemoryGroup.h"
33 #include "arm_compute/runtime/MemoryManagerOnDemand.h"
34 #include "arm_compute/runtime/PoolManager.h"
35 #include "tests/CL/CLAccessor.h"
36 #include "tests/Globals.h"
37 #include "tests/framework/Asserts.h"
38 #include "tests/framework/Macros.h"
39 #include "tests/validation/Validation.h"
40 #include "tests/validation/reference/ActivationLayer.h"
41
42 #include <memory>
43 #include <random>
44
45 namespace arm_compute
46 {
47 namespace test
48 {
49 namespace validation
50 {
51 namespace
52 {
import_malloc_memory_helper(void * ptr,size_t size)53 cl_mem import_malloc_memory_helper(void *ptr, size_t size)
54 {
55 const cl_import_properties_arm import_properties[] =
56 {
57 CL_IMPORT_TYPE_ARM,
58 CL_IMPORT_TYPE_HOST_ARM,
59 0
60 };
61
62 cl_int err = CL_SUCCESS;
63 cl_mem buf = clImportMemoryARM(CLKernelLibrary::get().context().get(), CL_MEM_READ_WRITE, import_properties, ptr, size, &err);
64 ARM_COMPUTE_ASSERT(err == CL_SUCCESS);
65
66 return buf;
67 }
68
69 class DummyAllocator final : public IAllocator
70 {
71 public:
72 DummyAllocator() = default;
73
allocate(size_t size,size_t alignment)74 void *allocate(size_t size, size_t alignment) override
75 {
76 ++_n_calls;
77 return _backend_allocator.allocate(size, alignment);
78 }
free(void * ptr)79 void free(void *ptr) override
80 {
81 return _backend_allocator.free(ptr);
82 }
make_region(size_t size,size_t alignment)83 std::unique_ptr<IMemoryRegion> make_region(size_t size, size_t alignment) override
84 {
85 // Needs to be implemented as is the one that is used internally by the CLTensorAllocator
86 ++_n_calls;
87 return _backend_allocator.make_region(size, alignment);
88 }
get_n_calls() const89 int get_n_calls() const
90 {
91 return _n_calls;
92 }
93
94 private:
95 int _n_calls{};
96 CLBufferAllocator _backend_allocator{};
97 };
98
run_conv2d(std::shared_ptr<IMemoryManager> mm,IAllocator & mm_allocator)99 void run_conv2d(std::shared_ptr<IMemoryManager> mm, IAllocator &mm_allocator)
100 {
101 // Create tensors
102 CLTensor src, weights, bias, dst;
103 src.allocator()->init(TensorInfo(TensorShape(16U, 32U, 32U, 2U), 1, DataType::F32, DataLayout::NHWC));
104 weights.allocator()->init(TensorInfo(TensorShape(16U, 3U, 3U, 32U), 1, DataType::F32, DataLayout::NHWC));
105 bias.allocator()->init(TensorInfo(TensorShape(32U), 1, DataType::F32, DataLayout::NHWC));
106 dst.allocator()->init(TensorInfo(TensorShape(32U, 32U, 32U, 2U), 1, DataType::F32, DataLayout::NHWC));
107
108 // Create and configure function
109 CLGEMMConvolutionLayer conv(mm);
110 conv.configure(&src, &weights, &bias, &dst, PadStrideInfo(1U, 1U, 1U, 1U));
111
112 // Allocate tensors
113 src.allocator()->allocate();
114 weights.allocator()->allocate();
115 bias.allocator()->allocate();
116 dst.allocator()->allocate();
117
118 // Finalize memory manager
119 if(mm != nullptr)
120 {
121 mm->populate(mm_allocator, 1 /* num_pools */);
122 ARM_COMPUTE_EXPECT(mm->lifetime_manager()->are_all_finalized(), framework::LogLevel::ERRORS);
123 ARM_COMPUTE_EXPECT(mm->pool_manager()->num_pools() == 1, framework::LogLevel::ERRORS);
124 }
125
126 conv.run();
127 }
128 } // namespace
129
130 TEST_SUITE(CL)
TEST_SUITE(UNIT)131 TEST_SUITE(UNIT)
132 TEST_SUITE(TensorAllocator)
133
134 /* Validate that an external global allocator can be used for all internal allocations */
135 TEST_CASE(ExternalGlobalAllocator, framework::DatasetMode::ALL)
136 {
137 DummyAllocator global_tensor_alloc;
138 CLTensorAllocator::set_global_allocator(&global_tensor_alloc);
139
140 // Run a convolution
141 run_conv2d(nullptr /* mm */, global_tensor_alloc);
142
143 // Check that allocator has been called multiple times > 4
144 ARM_COMPUTE_EXPECT(global_tensor_alloc.get_n_calls() > 4, framework::LogLevel::ERRORS);
145
146 // Nullify global allocator
147 CLTensorAllocator::set_global_allocator(nullptr);
148 }
149
150 /* Validate that an external global allocator can be used for the pool manager */
TEST_CASE(ExternalGlobalAllocatorMemoryPool,framework::DatasetMode::ALL)151 TEST_CASE(ExternalGlobalAllocatorMemoryPool, framework::DatasetMode::ALL)
152 {
153 auto lifetime_mgr = std::make_shared<BlobLifetimeManager>();
154 auto pool_mgr = std::make_shared<PoolManager>();
155 auto mm = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr);
156
157 DummyAllocator global_tensor_alloc;
158 CLTensorAllocator::set_global_allocator(&global_tensor_alloc);
159
160 // Run a convolution
161 run_conv2d(mm, global_tensor_alloc);
162
163 // Check that allocator has been called multiple times > 4
164 ARM_COMPUTE_EXPECT(global_tensor_alloc.get_n_calls() > 4, framework::LogLevel::ERRORS);
165
166 // Nullify global allocator
167 CLTensorAllocator::set_global_allocator(nullptr);
168 }
169
170 /** Validates import memory interface when importing cl buffer objects */
TEST_CASE(ImportMemoryBuffer,framework::DatasetMode::ALL)171 TEST_CASE(ImportMemoryBuffer, framework::DatasetMode::ALL)
172 {
173 // Init tensor info
174 const TensorInfo info(TensorShape(24U, 16U, 3U), 1, DataType::F32);
175
176 // Allocate memory buffer
177 const size_t total_size = info.total_size();
178 auto buf = cl::Buffer(CLScheduler::get().context(), CL_MEM_READ_WRITE, total_size);
179
180 // Negative case : Import nullptr
181 CLTensor t1;
182 t1.allocator()->init(info);
183 ARM_COMPUTE_ASSERT(!bool(t1.allocator()->import_memory(cl::Buffer())));
184 ARM_COMPUTE_ASSERT(t1.info()->is_resizable());
185
186 // Negative case : Import memory to a tensor that is memory managed
187 CLTensor t2;
188 MemoryGroup mg;
189 t2.allocator()->set_associated_memory_group(&mg);
190 ARM_COMPUTE_ASSERT(!bool(t2.allocator()->import_memory(buf)));
191 ARM_COMPUTE_ASSERT(t2.info()->is_resizable());
192
193 // Negative case : Invalid buffer size
194 CLTensor t3;
195 const TensorInfo info_neg(TensorShape(32U, 16U, 3U), 1, DataType::F32);
196 t3.allocator()->init(info_neg);
197 ARM_COMPUTE_ASSERT(!bool(t3.allocator()->import_memory(buf)));
198 ARM_COMPUTE_ASSERT(t3.info()->is_resizable());
199
200 // Positive case : Set raw pointer
201 CLTensor t4;
202 t4.allocator()->init(info);
203 ARM_COMPUTE_ASSERT(bool(t4.allocator()->import_memory(buf)));
204 ARM_COMPUTE_ASSERT(!t4.info()->is_resizable());
205 ARM_COMPUTE_EXPECT(t4.cl_buffer().get() == buf.get(), framework::LogLevel::ERRORS);
206 t4.allocator()->free();
207 ARM_COMPUTE_ASSERT(t4.info()->is_resizable());
208 ARM_COMPUTE_EXPECT(t4.cl_buffer().get() != buf.get(), framework::LogLevel::ERRORS);
209 }
210
211 /** Validates import memory interface when importing malloced memory */
TEST_CASE(ImportMemoryMalloc,framework::DatasetMode::ALL)212 TEST_CASE(ImportMemoryMalloc, framework::DatasetMode::ALL)
213 {
214 // Check if import extension is supported
215 if(!device_supports_extension(CLKernelLibrary::get().get_device(), "cl_arm_import_memory_host"))
216 {
217 return;
218 }
219 else
220 {
221 const ActivationLayerInfo act_info(ActivationLayerInfo::ActivationFunction::RELU);
222 const TensorShape shape = TensorShape(24U, 16U, 3U);
223 const DataType data_type = DataType::F32;
224
225 // Create tensor
226 const TensorInfo info(shape, 1, data_type);
227 CLTensor tensor;
228 tensor.allocator()->init(info);
229
230 // Create and configure activation function
231 CLActivationLayer act_func;
232 act_func.configure(&tensor, nullptr, act_info);
233
234 // Allocate and import tensor
235 const size_t total_size_in_elems = tensor.info()->tensor_shape().total_size();
236 const size_t total_size_in_bytes = tensor.info()->total_size();
237 const size_t alignment = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
238 size_t space = total_size_in_bytes + alignment;
239 auto raw_data = std::make_unique<uint8_t[]>(space);
240
241 void *aligned_ptr = raw_data.get();
242 std::align(alignment, total_size_in_bytes, aligned_ptr, space);
243
244 cl::Buffer wrapped_buffer(import_malloc_memory_helper(aligned_ptr, total_size_in_bytes));
245 ARM_COMPUTE_ASSERT(bool(tensor.allocator()->import_memory(wrapped_buffer)));
246 ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable());
247
248 // Fill tensor
249 std::uniform_real_distribution<float> distribution(-5.f, 5.f);
250 std::mt19937 gen(library->seed());
251 auto *typed_ptr = reinterpret_cast<float *>(aligned_ptr);
252 for(unsigned int i = 0; i < total_size_in_elems; ++i)
253 {
254 typed_ptr[i] = distribution(gen);
255 }
256
257 // Execute function and sync
258 act_func.run();
259 CLScheduler::get().sync();
260
261 // Validate result by checking that the input has no negative values
262 for(unsigned int i = 0; i < total_size_in_elems; ++i)
263 {
264 ARM_COMPUTE_EXPECT(typed_ptr[i] >= 0, framework::LogLevel::ERRORS);
265 }
266
267 // Release resources
268 tensor.allocator()->free();
269 ARM_COMPUTE_EXPECT(tensor.info()->is_resizable(), framework::LogLevel::ERRORS);
270 }
271 }
272
273 #if !defined(BARE_METAL)
274 /** Validates import memory interface when importing memory mapped objects */
TEST_CASE(ImportMemoryMappedFile,framework::DatasetMode::ALL)275 TEST_CASE(ImportMemoryMappedFile, framework::DatasetMode::ALL)
276 {
277 // Check if import extension is supported
278 if(!device_supports_extension(CLKernelLibrary::get().get_device(), "cl_arm_import_memory_host"))
279 {
280 return;
281 }
282 else
283 {
284 const ActivationLayerInfo act_info(ActivationLayerInfo::ActivationFunction::RELU);
285 const TensorShape shape = TensorShape(24U, 16U, 3U);
286 const DataType data_type = DataType::F32;
287
288 // Create tensor
289 const TensorInfo info(shape, 1, data_type);
290 CLTensor tensor;
291 tensor.allocator()->init(info);
292
293 // Create and configure activation function
294 CLActivationLayer act_func;
295 act_func.configure(&tensor, nullptr, act_info);
296
297 // Get number of elements
298 const size_t total_size_in_elems = tensor.info()->tensor_shape().total_size();
299 const size_t total_size_in_bytes = tensor.info()->total_size();
300
301 // Create file
302 std::ofstream output_file("test_mmap_import.bin", std::ios::binary | std::ios::out);
303 output_file.seekp(total_size_in_bytes - 1);
304 output_file.write("", 1);
305 output_file.close();
306
307 // Map file
308 utils::mmap_io::MMappedFile mmapped_file("test_mmap_import.bin", 0 /** Whole file */, 0);
309 ARM_COMPUTE_ASSERT(mmapped_file.is_mapped());
310 unsigned char *data = mmapped_file.data();
311
312 cl::Buffer wrapped_buffer(import_malloc_memory_helper(data, total_size_in_bytes));
313 ARM_COMPUTE_ASSERT(bool(tensor.allocator()->import_memory(wrapped_buffer)));
314 ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable());
315
316 // Fill tensor
317 std::uniform_real_distribution<float> distribution(-5.f, 5.f);
318 std::mt19937 gen(library->seed());
319 auto *typed_ptr = reinterpret_cast<float *>(data);
320 for(unsigned int i = 0; i < total_size_in_elems; ++i)
321 {
322 typed_ptr[i] = distribution(gen);
323 }
324
325 // Execute function and sync
326 act_func.run();
327 CLScheduler::get().sync();
328
329 // Validate result by checking that the input has no negative values
330 for(unsigned int i = 0; i < total_size_in_elems; ++i)
331 {
332 ARM_COMPUTE_EXPECT(typed_ptr[i] >= 0, framework::LogLevel::ERRORS);
333 }
334
335 // Release resources
336 tensor.allocator()->free();
337 ARM_COMPUTE_ASSERT(tensor.info()->is_resizable());
338 }
339 }
340 #endif // !defined(BARE_METAL)
341
342 /** Validates symmetric per channel quantization */
TEST_CASE(Symm8PerChannelQuantizationInfo,framework::DatasetMode::ALL)343 TEST_CASE(Symm8PerChannelQuantizationInfo, framework::DatasetMode::ALL)
344 {
345 // Create tensor
346 CLTensor tensor;
347 const std::vector<float> scale = { 0.25f, 1.4f, 3.2f, 2.3f, 4.7f };
348 const TensorInfo info(TensorShape(32U, 16U), 1, DataType::QSYMM8_PER_CHANNEL, QuantizationInfo(scale));
349 tensor.allocator()->init(info);
350
351 // Check quantization information
352 ARM_COMPUTE_EXPECT(!tensor.info()->quantization_info().empty(), framework::LogLevel::ERRORS);
353 ARM_COMPUTE_EXPECT(!tensor.info()->quantization_info().scale().empty(), framework::LogLevel::ERRORS);
354 ARM_COMPUTE_EXPECT(tensor.info()->quantization_info().scale().size() == scale.size(), framework::LogLevel::ERRORS);
355 ARM_COMPUTE_EXPECT(tensor.info()->quantization_info().offset().empty(), framework::LogLevel::ERRORS);
356
357 CLQuantization quantization = tensor.quantization();
358 ARM_COMPUTE_ASSERT(quantization.scale != nullptr);
359 ARM_COMPUTE_ASSERT(quantization.offset != nullptr);
360
361 // Check OpenCL quantization arrays before allocating
362 ARM_COMPUTE_EXPECT(quantization.scale->max_num_values() == 0, framework::LogLevel::ERRORS);
363 ARM_COMPUTE_EXPECT(quantization.offset->max_num_values() == 0, framework::LogLevel::ERRORS);
364
365 // Check OpenCL quantization arrays after allocating
366 tensor.allocator()->allocate();
367 ARM_COMPUTE_EXPECT(quantization.scale->max_num_values() == scale.size(), framework::LogLevel::ERRORS);
368 ARM_COMPUTE_EXPECT(quantization.offset->max_num_values() == 0, framework::LogLevel::ERRORS);
369
370 // Validate that the scale values are the same
371 auto cl_scale_buffer = quantization.scale->cl_buffer();
372 void *mapped_ptr = CLScheduler::get().queue().enqueueMapBuffer(cl_scale_buffer, CL_TRUE, CL_MAP_READ, 0, scale.size());
373 auto cl_scale_ptr = static_cast<float *>(mapped_ptr);
374 for(unsigned int i = 0; i < scale.size(); ++i)
375 {
376 ARM_COMPUTE_EXPECT(cl_scale_ptr[i] == scale[i], framework::LogLevel::ERRORS);
377 }
378 CLScheduler::get().queue().enqueueUnmapMemObject(cl_scale_buffer, mapped_ptr);
379 }
380
381 TEST_SUITE_END() // TensorAllocator
382 TEST_SUITE_END() // UNIT
383 TEST_SUITE_END() // CL
384 } // namespace validation
385 } // namespace test
386 } // namespace arm_compute
387