xref: /aosp_15_r20/external/ComputeLibrary/tests/validation/CL/UNIT/TensorAllocator.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2018-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
25 
26 #include "arm_compute/core/utils/misc/MMappedFile.h"
27 #include "arm_compute/runtime/BlobLifetimeManager.h"
28 #include "arm_compute/runtime/CL/CLBufferAllocator.h"
29 #include "arm_compute/runtime/CL/CLScheduler.h"
30 #include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
31 #include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h"
32 #include "arm_compute/runtime/MemoryGroup.h"
33 #include "arm_compute/runtime/MemoryManagerOnDemand.h"
34 #include "arm_compute/runtime/PoolManager.h"
35 #include "tests/CL/CLAccessor.h"
36 #include "tests/Globals.h"
37 #include "tests/framework/Asserts.h"
38 #include "tests/framework/Macros.h"
39 #include "tests/validation/Validation.h"
40 #include "tests/validation/reference/ActivationLayer.h"
41 
42 #include <memory>
43 #include <random>
44 
45 namespace arm_compute
46 {
47 namespace test
48 {
49 namespace validation
50 {
51 namespace
52 {
import_malloc_memory_helper(void * ptr,size_t size)53 cl_mem import_malloc_memory_helper(void *ptr, size_t size)
54 {
55     const cl_import_properties_arm import_properties[] =
56     {
57         CL_IMPORT_TYPE_ARM,
58         CL_IMPORT_TYPE_HOST_ARM,
59         0
60     };
61 
62     cl_int err = CL_SUCCESS;
63     cl_mem buf = clImportMemoryARM(CLKernelLibrary::get().context().get(), CL_MEM_READ_WRITE, import_properties, ptr, size, &err);
64     ARM_COMPUTE_ASSERT(err == CL_SUCCESS);
65 
66     return buf;
67 }
68 
69 class DummyAllocator final : public IAllocator
70 {
71 public:
72     DummyAllocator() = default;
73 
allocate(size_t size,size_t alignment)74     void *allocate(size_t size, size_t alignment) override
75     {
76         ++_n_calls;
77         return _backend_allocator.allocate(size, alignment);
78     }
free(void * ptr)79     void free(void *ptr) override
80     {
81         return _backend_allocator.free(ptr);
82     }
make_region(size_t size,size_t alignment)83     std::unique_ptr<IMemoryRegion> make_region(size_t size, size_t alignment) override
84     {
85         // Needs to be implemented as is the one that is used internally by the CLTensorAllocator
86         ++_n_calls;
87         return _backend_allocator.make_region(size, alignment);
88     }
get_n_calls() const89     int get_n_calls() const
90     {
91         return _n_calls;
92     }
93 
94 private:
95     int               _n_calls{};
96     CLBufferAllocator _backend_allocator{};
97 };
98 
run_conv2d(std::shared_ptr<IMemoryManager> mm,IAllocator & mm_allocator)99 void run_conv2d(std::shared_ptr<IMemoryManager> mm, IAllocator &mm_allocator)
100 {
101     // Create tensors
102     CLTensor src, weights, bias, dst;
103     src.allocator()->init(TensorInfo(TensorShape(16U, 32U, 32U, 2U), 1, DataType::F32, DataLayout::NHWC));
104     weights.allocator()->init(TensorInfo(TensorShape(16U, 3U, 3U, 32U), 1, DataType::F32, DataLayout::NHWC));
105     bias.allocator()->init(TensorInfo(TensorShape(32U), 1, DataType::F32, DataLayout::NHWC));
106     dst.allocator()->init(TensorInfo(TensorShape(32U, 32U, 32U, 2U), 1, DataType::F32, DataLayout::NHWC));
107 
108     // Create and configure function
109     CLGEMMConvolutionLayer conv(mm);
110     conv.configure(&src, &weights, &bias, &dst, PadStrideInfo(1U, 1U, 1U, 1U));
111 
112     // Allocate tensors
113     src.allocator()->allocate();
114     weights.allocator()->allocate();
115     bias.allocator()->allocate();
116     dst.allocator()->allocate();
117 
118     // Finalize memory manager
119     if(mm != nullptr)
120     {
121         mm->populate(mm_allocator, 1 /* num_pools */);
122         ARM_COMPUTE_EXPECT(mm->lifetime_manager()->are_all_finalized(), framework::LogLevel::ERRORS);
123         ARM_COMPUTE_EXPECT(mm->pool_manager()->num_pools() == 1, framework::LogLevel::ERRORS);
124     }
125 
126     conv.run();
127 }
128 } // namespace
129 
130 TEST_SUITE(CL)
TEST_SUITE(UNIT)131 TEST_SUITE(UNIT)
132 TEST_SUITE(TensorAllocator)
133 
134 /* Validate that an external global allocator can be used for all internal allocations */
135 TEST_CASE(ExternalGlobalAllocator, framework::DatasetMode::ALL)
136 {
137     DummyAllocator global_tensor_alloc;
138     CLTensorAllocator::set_global_allocator(&global_tensor_alloc);
139 
140     // Run a convolution
141     run_conv2d(nullptr /* mm */, global_tensor_alloc);
142 
143     // Check that allocator has been called multiple times > 4
144     ARM_COMPUTE_EXPECT(global_tensor_alloc.get_n_calls() > 4, framework::LogLevel::ERRORS);
145 
146     // Nullify global allocator
147     CLTensorAllocator::set_global_allocator(nullptr);
148 }
149 
150 /* Validate that an external global allocator can be used for the pool manager */
TEST_CASE(ExternalGlobalAllocatorMemoryPool,framework::DatasetMode::ALL)151 TEST_CASE(ExternalGlobalAllocatorMemoryPool, framework::DatasetMode::ALL)
152 {
153     auto lifetime_mgr = std::make_shared<BlobLifetimeManager>();
154     auto pool_mgr     = std::make_shared<PoolManager>();
155     auto mm           = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr);
156 
157     DummyAllocator global_tensor_alloc;
158     CLTensorAllocator::set_global_allocator(&global_tensor_alloc);
159 
160     // Run a convolution
161     run_conv2d(mm, global_tensor_alloc);
162 
163     // Check that allocator has been called multiple times > 4
164     ARM_COMPUTE_EXPECT(global_tensor_alloc.get_n_calls() > 4, framework::LogLevel::ERRORS);
165 
166     // Nullify global allocator
167     CLTensorAllocator::set_global_allocator(nullptr);
168 }
169 
170 /** Validates import memory interface when importing cl buffer objects */
TEST_CASE(ImportMemoryBuffer,framework::DatasetMode::ALL)171 TEST_CASE(ImportMemoryBuffer, framework::DatasetMode::ALL)
172 {
173     // Init tensor info
174     const TensorInfo info(TensorShape(24U, 16U, 3U), 1, DataType::F32);
175 
176     // Allocate memory buffer
177     const size_t total_size = info.total_size();
178     auto         buf        = cl::Buffer(CLScheduler::get().context(), CL_MEM_READ_WRITE, total_size);
179 
180     // Negative case : Import nullptr
181     CLTensor t1;
182     t1.allocator()->init(info);
183     ARM_COMPUTE_ASSERT(!bool(t1.allocator()->import_memory(cl::Buffer())));
184     ARM_COMPUTE_ASSERT(t1.info()->is_resizable());
185 
186     // Negative case : Import memory to a tensor that is memory managed
187     CLTensor    t2;
188     MemoryGroup mg;
189     t2.allocator()->set_associated_memory_group(&mg);
190     ARM_COMPUTE_ASSERT(!bool(t2.allocator()->import_memory(buf)));
191     ARM_COMPUTE_ASSERT(t2.info()->is_resizable());
192 
193     // Negative case : Invalid buffer size
194     CLTensor         t3;
195     const TensorInfo info_neg(TensorShape(32U, 16U, 3U), 1, DataType::F32);
196     t3.allocator()->init(info_neg);
197     ARM_COMPUTE_ASSERT(!bool(t3.allocator()->import_memory(buf)));
198     ARM_COMPUTE_ASSERT(t3.info()->is_resizable());
199 
200     // Positive case : Set raw pointer
201     CLTensor t4;
202     t4.allocator()->init(info);
203     ARM_COMPUTE_ASSERT(bool(t4.allocator()->import_memory(buf)));
204     ARM_COMPUTE_ASSERT(!t4.info()->is_resizable());
205     ARM_COMPUTE_EXPECT(t4.cl_buffer().get() == buf.get(), framework::LogLevel::ERRORS);
206     t4.allocator()->free();
207     ARM_COMPUTE_ASSERT(t4.info()->is_resizable());
208     ARM_COMPUTE_EXPECT(t4.cl_buffer().get() != buf.get(), framework::LogLevel::ERRORS);
209 }
210 
211 /** Validates import memory interface when importing malloced memory */
TEST_CASE(ImportMemoryMalloc,framework::DatasetMode::ALL)212 TEST_CASE(ImportMemoryMalloc, framework::DatasetMode::ALL)
213 {
214     // Check if import extension is supported
215     if(!device_supports_extension(CLKernelLibrary::get().get_device(), "cl_arm_import_memory_host"))
216     {
217         return;
218     }
219     else
220     {
221         const ActivationLayerInfo act_info(ActivationLayerInfo::ActivationFunction::RELU);
222         const TensorShape         shape     = TensorShape(24U, 16U, 3U);
223         const DataType            data_type = DataType::F32;
224 
225         // Create tensor
226         const TensorInfo info(shape, 1, data_type);
227         CLTensor         tensor;
228         tensor.allocator()->init(info);
229 
230         // Create and configure activation function
231         CLActivationLayer act_func;
232         act_func.configure(&tensor, nullptr, act_info);
233 
234         // Allocate and import tensor
235         const size_t total_size_in_elems = tensor.info()->tensor_shape().total_size();
236         const size_t total_size_in_bytes = tensor.info()->total_size();
237         const size_t alignment           = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
238         size_t       space               = total_size_in_bytes + alignment;
239         auto         raw_data            = std::make_unique<uint8_t[]>(space);
240 
241         void *aligned_ptr = raw_data.get();
242         std::align(alignment, total_size_in_bytes, aligned_ptr, space);
243 
244         cl::Buffer wrapped_buffer(import_malloc_memory_helper(aligned_ptr, total_size_in_bytes));
245         ARM_COMPUTE_ASSERT(bool(tensor.allocator()->import_memory(wrapped_buffer)));
246         ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable());
247 
248         // Fill tensor
249         std::uniform_real_distribution<float> distribution(-5.f, 5.f);
250         std::mt19937                          gen(library->seed());
251         auto                                 *typed_ptr = reinterpret_cast<float *>(aligned_ptr);
252         for(unsigned int i = 0; i < total_size_in_elems; ++i)
253         {
254             typed_ptr[i] = distribution(gen);
255         }
256 
257         // Execute function and sync
258         act_func.run();
259         CLScheduler::get().sync();
260 
261         // Validate result by checking that the input has no negative values
262         for(unsigned int i = 0; i < total_size_in_elems; ++i)
263         {
264             ARM_COMPUTE_EXPECT(typed_ptr[i] >= 0, framework::LogLevel::ERRORS);
265         }
266 
267         // Release resources
268         tensor.allocator()->free();
269         ARM_COMPUTE_EXPECT(tensor.info()->is_resizable(), framework::LogLevel::ERRORS);
270     }
271 }
272 
273 #if !defined(BARE_METAL)
274 /** Validates import memory interface when importing memory mapped objects */
TEST_CASE(ImportMemoryMappedFile,framework::DatasetMode::ALL)275 TEST_CASE(ImportMemoryMappedFile, framework::DatasetMode::ALL)
276 {
277     // Check if import extension is supported
278     if(!device_supports_extension(CLKernelLibrary::get().get_device(), "cl_arm_import_memory_host"))
279     {
280         return;
281     }
282     else
283     {
284         const ActivationLayerInfo act_info(ActivationLayerInfo::ActivationFunction::RELU);
285         const TensorShape         shape     = TensorShape(24U, 16U, 3U);
286         const DataType            data_type = DataType::F32;
287 
288         // Create tensor
289         const TensorInfo info(shape, 1, data_type);
290         CLTensor         tensor;
291         tensor.allocator()->init(info);
292 
293         // Create and configure activation function
294         CLActivationLayer act_func;
295         act_func.configure(&tensor, nullptr, act_info);
296 
297         // Get number of elements
298         const size_t total_size_in_elems = tensor.info()->tensor_shape().total_size();
299         const size_t total_size_in_bytes = tensor.info()->total_size();
300 
301         // Create file
302         std::ofstream output_file("test_mmap_import.bin", std::ios::binary | std::ios::out);
303         output_file.seekp(total_size_in_bytes - 1);
304         output_file.write("", 1);
305         output_file.close();
306 
307         // Map file
308         utils::mmap_io::MMappedFile mmapped_file("test_mmap_import.bin", 0 /** Whole file */, 0);
309         ARM_COMPUTE_ASSERT(mmapped_file.is_mapped());
310         unsigned char *data = mmapped_file.data();
311 
312         cl::Buffer wrapped_buffer(import_malloc_memory_helper(data, total_size_in_bytes));
313         ARM_COMPUTE_ASSERT(bool(tensor.allocator()->import_memory(wrapped_buffer)));
314         ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable());
315 
316         // Fill tensor
317         std::uniform_real_distribution<float> distribution(-5.f, 5.f);
318         std::mt19937                          gen(library->seed());
319         auto                                 *typed_ptr = reinterpret_cast<float *>(data);
320         for(unsigned int i = 0; i < total_size_in_elems; ++i)
321         {
322             typed_ptr[i] = distribution(gen);
323         }
324 
325         // Execute function and sync
326         act_func.run();
327         CLScheduler::get().sync();
328 
329         // Validate result by checking that the input has no negative values
330         for(unsigned int i = 0; i < total_size_in_elems; ++i)
331         {
332             ARM_COMPUTE_EXPECT(typed_ptr[i] >= 0, framework::LogLevel::ERRORS);
333         }
334 
335         // Release resources
336         tensor.allocator()->free();
337         ARM_COMPUTE_ASSERT(tensor.info()->is_resizable());
338     }
339 }
340 #endif // !defined(BARE_METAL)
341 
342 /** Validates symmetric per channel quantization */
TEST_CASE(Symm8PerChannelQuantizationInfo,framework::DatasetMode::ALL)343 TEST_CASE(Symm8PerChannelQuantizationInfo, framework::DatasetMode::ALL)
344 {
345     // Create tensor
346     CLTensor                 tensor;
347     const std::vector<float> scale = { 0.25f, 1.4f, 3.2f, 2.3f, 4.7f };
348     const TensorInfo         info(TensorShape(32U, 16U), 1, DataType::QSYMM8_PER_CHANNEL, QuantizationInfo(scale));
349     tensor.allocator()->init(info);
350 
351     // Check quantization information
352     ARM_COMPUTE_EXPECT(!tensor.info()->quantization_info().empty(), framework::LogLevel::ERRORS);
353     ARM_COMPUTE_EXPECT(!tensor.info()->quantization_info().scale().empty(), framework::LogLevel::ERRORS);
354     ARM_COMPUTE_EXPECT(tensor.info()->quantization_info().scale().size() == scale.size(), framework::LogLevel::ERRORS);
355     ARM_COMPUTE_EXPECT(tensor.info()->quantization_info().offset().empty(), framework::LogLevel::ERRORS);
356 
357     CLQuantization quantization = tensor.quantization();
358     ARM_COMPUTE_ASSERT(quantization.scale != nullptr);
359     ARM_COMPUTE_ASSERT(quantization.offset != nullptr);
360 
361     // Check OpenCL quantization arrays before allocating
362     ARM_COMPUTE_EXPECT(quantization.scale->max_num_values() == 0, framework::LogLevel::ERRORS);
363     ARM_COMPUTE_EXPECT(quantization.offset->max_num_values() == 0, framework::LogLevel::ERRORS);
364 
365     // Check OpenCL quantization arrays after allocating
366     tensor.allocator()->allocate();
367     ARM_COMPUTE_EXPECT(quantization.scale->max_num_values() == scale.size(), framework::LogLevel::ERRORS);
368     ARM_COMPUTE_EXPECT(quantization.offset->max_num_values() == 0, framework::LogLevel::ERRORS);
369 
370     // Validate that the scale values are the same
371     auto  cl_scale_buffer = quantization.scale->cl_buffer();
372     void *mapped_ptr      = CLScheduler::get().queue().enqueueMapBuffer(cl_scale_buffer, CL_TRUE, CL_MAP_READ, 0, scale.size());
373     auto  cl_scale_ptr    = static_cast<float *>(mapped_ptr);
374     for(unsigned int i = 0; i < scale.size(); ++i)
375     {
376         ARM_COMPUTE_EXPECT(cl_scale_ptr[i] == scale[i], framework::LogLevel::ERRORS);
377     }
378     CLScheduler::get().queue().enqueueUnmapMemObject(cl_scale_buffer, mapped_ptr);
379 }
380 
381 TEST_SUITE_END() // TensorAllocator
382 TEST_SUITE_END() // UNIT
383 TEST_SUITE_END() // CL
384 } // namespace validation
385 } // namespace test
386 } // namespace arm_compute
387