xref: /aosp_15_r20/external/ComputeLibrary/tests/validation/NEON/GEMM.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2017-2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/core/Types.h"
25 #include "arm_compute/runtime/NEON/functions/NEGEMM.h"
26 #include "arm_compute/runtime/Tensor.h"
27 #include "arm_compute/runtime/TensorAllocator.h"
28 #include "src/core/helpers/MemoryHelpers.h"
29 #include "src/cpu/kernels/CpuGemmInterleave4x4Kernel.h"
30 #include "src/cpu/kernels/CpuGemmMatrixMultiplyKernel.h"
31 #include "src/cpu/kernels/CpuGemmTranspose1xWKernel.h"
32 #include "src/cpu/operators/CpuGemm.h"
33 #include "tests/NEON/Accessor.h"
34 #include "tests/NEON/Helper.h"
35 #include "tests/PaddingCalculator.h"
36 #include "tests/datasets/LargeGEMMDataset.h"
37 #include "tests/datasets/SmallGEMMDataset.h"
38 #include "tests/datasets/TinyGEMMDataset.h"
39 #include "tests/framework/Asserts.h"
40 #include "tests/framework/Macros.h"
41 #include "tests/framework/datasets/Datasets.h"
42 #include "tests/validation/Validation.h"
43 #include "tests/validation/fixtures/GEMMFixture.h"
44 #include "tests/validation/fixtures/GEMMInterleave4x4Fixture.h"
45 #include "tests/validation/fixtures/GEMMTranspose1xWFixture.h"
46 
47 namespace arm_compute
48 {
49 namespace test
50 {
51 namespace validation
52 {
53 namespace
54 {
55 constexpr AbsoluteTolerance<float> tolerance_f(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for FP32 data types */
56 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
57 RelativeTolerance<half_float::half> rel_tolerance_f16(half(0.2)); /**< Relative tolerance value for comparing reference's output against implementation's output for FP16 data types */
58 const AbsoluteTolerance<float>      abs_tolerance_f16(0.2f);      /**< Absolute tolerance value for comparing reference's output against implementation's output for FP16 data types */
59 constexpr float                     tolerance_num = 0.07f;        /**< Tolerance number for FP16 data types */
60 #endif                                                            /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
61 /** CNN data types */
62 const auto CNNDataTypes = framework::dataset::make("DataType",
63 {
64 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
65     DataType::F16,
66 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
67     DataType::F32,
68 });
69 
70 const auto data_interleave = framework::dataset::make("M", 8, 12) * framework::dataset::make("N", 8, 12);
71 const auto data_transpose  = framework::dataset::make("M", 8, 14) * framework::dataset::make("N", 7, 14);
72 
73 /** Zero padding test */
74 template <typename FunctionType>
validate_zero_padding(unsigned int dim0_value,unsigned int dim1_value)75 bool validate_zero_padding(unsigned int dim0_value, unsigned int dim1_value)
76 {
77     const TensorShape in_shape(dim0_value, dim1_value);
78     TensorInfo        in(in_shape, 1, DataType::U32);
79     TensorInfo        dst;
80 
81     ARM_COMPUTE_EXPECT(in.is_resizable(), framework::LogLevel::ERRORS);
82 
83     // Validate zero-padding
84     FunctionType func;
85 
86     func.configure(&in, &dst);
87 
88     return in.padding().empty();
89 }
90 
91 /* Zero padding test for GEMM kernels */
validate_gemm_zero_padding(const TensorShape shape0,const TensorShape shape1)92 bool validate_gemm_zero_padding(const TensorShape shape0, const TensorShape shape1)
93 {
94     // Create tensors
95     TensorInfo in0(shape0, 1, DataType::F32);
96     TensorInfo in1(shape1, 1, DataType::F32);
97     TensorInfo dst;
98 
99     // Validate zero-padding
100     cpu::kernels::CpuGemmMatrixMultiplyKernel gemm;
101     gemm.configure(&in0, &in1, &dst, 1.0, false);
102 
103     return in0.padding().empty() && in1.padding().empty() && dst.padding().empty();
104 }
105 } // namespace
106 
107 TEST_SUITE(NEON)
TEST_SUITE(GEMM)108 TEST_SUITE(GEMM)
109 
110 /** Test case for memory injection in @ref cpu::CpuGemm.
111  *
112  * Configure the operator once and inject memory at run-time in multiple executions.
113  *
114  * Checks performed in order:
115  * - Both runs compute the same output
116  */
117 TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
118 {
119     auto       gemm      = std::make_unique<cpu::CpuGemm>();
120     const auto lhs_info  = TensorInfo(TensorShape(3U, 3U), 1, DataType::F32);
121     const auto rhs_info  = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
122     const auto c_info    = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
123     auto       dst_info  = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
124     const auto gemm_info = GEMMInfo{};
125     gemm->configure(&lhs_info, &rhs_info, &c_info, &dst_info, 1.f, 1.f, gemm_info);
126 
127     // telhs are newly created every call of this lambda function
128     auto lhs = create_tensor<Tensor>(lhs_info);
129     auto rhs = create_tensor<Tensor>(rhs_info);
130     auto c   = create_tensor<Tensor>(c_info);
131     lhs.allocator()->allocate();
132     rhs.allocator()->allocate();
133     c.allocator()->allocate();
134 
135     ITensorPack run_pack{ { TensorType::ACL_SRC_0, &lhs }, { TensorType::ACL_SRC_1, &rhs }, { TensorType::ACL_SRC_2, &c } };
136     ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &rhs }, { TensorType::ACL_SRC_2, &c } };
137 
138     auto mg = MemoryGroup{};
139     auto ws = manage_workspace<Tensor>(gemm->workspace(), mg, run_pack, prep_pack);
140 
141     auto run_conv = [&]() -> Tensor
142     {
143         auto dst = create_tensor<Tensor>(dst_info);
144         dst.allocator()->allocate();
145         run_pack.add_tensor(TensorType::ACL_DST, &dst);
146 
147         library->fill_tensor_value(Accessor(lhs), 1.f);
148         library->fill_tensor_value(Accessor(rhs), 2.f);
149         library->fill_tensor_value(Accessor(c), 3.f);
150         // This operator is configured once and captured by this lambda.
151         gemm->prepare(prep_pack);
152         gemm->run(run_pack);
153         return dst;
154     };
155     auto result_0 = run_conv();
156     auto result_1 = run_conv();
157     for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
158     {
159         ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
160     }
161 }
162 
163 /** Test case for memory injection in @ref NEGEMM.
164  *
165  * Make sure @ref NEGEMM still works through injecting the memory at configure time using the old API.
166  *
167  * Checks performed in order:
168  * - Both runs compute the same output
169  */
TEST_CASE(MultipleExecutionWithConfigure,framework::DatasetMode::ALL)170 TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
171 {
172     auto       gemm      = std::make_unique<NEGEMM>();
173     const auto lhs_info  = TensorInfo(TensorShape(3U, 3U), 1, DataType::F32);
174     const auto rhs_info  = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
175     const auto c_info    = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
176     auto       dst_info  = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
177     const auto gemm_info = GEMMInfo{};
178     auto       run_conv  = [&]()
179     {
180         auto lhs = create_tensor<Tensor>(lhs_info);
181         auto rhs = create_tensor<Tensor>(rhs_info);
182         auto c   = create_tensor<Tensor>(c_info);
183         auto dst = create_tensor<Tensor>(dst_info);
184         gemm->configure(&lhs, &rhs, &c, &dst, 1.f, 1.f, gemm_info);
185         lhs.allocator()->allocate();
186         rhs.allocator()->allocate();
187         c.allocator()->allocate();
188         dst.allocator()->allocate();
189         library->fill_tensor_value(Accessor(lhs), 1.f);
190         library->fill_tensor_value(Accessor(rhs), 2.f);
191         library->fill_tensor_value(Accessor(c), 3.f);
192         gemm->run();
193         return dst;
194     };
195     auto result_0 = run_conv();
196     auto result_1 = run_conv();
197     for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
198     {
199         ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
200     }
201 }
202 
203 // *INDENT-OFF*
204 // clang-format off
205 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
206                framework::dataset::make("LhsInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::S32), // Unsupported data type
207                                                        TensorInfo(TensorShape(27U, 13U), 1, DataType::F32),
208                                                      }),
209                framework::dataset::make("RhsInfo",{ TensorInfo(TensorShape(8U, 27U), 1, DataType::S32),
210                                                         TensorInfo(TensorShape(8U, 27U), 1, DataType::F32),
211                                                      })),
212                framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(8U, 13U), 1, DataType::S32),
213                                                         TensorInfo(TensorShape(8U, 13U), 1, DataType::F32),
214                                                      })),
215                framework::dataset::make("Expected", { false, true })),
216                lhs_info, rhs_info, output_info, expected)
217 {
218     constexpr float alpha = 1.0;
219     constexpr float beta = 0.0;
220     const auto gemm_info = GEMMInfo();
221     bool is_valid = bool(NEGEMM::validate(&lhs_info.clone()->set_is_resizable(true), &rhs_info.clone()->set_is_resizable(true), nullptr, &output_info.clone()->set_is_resizable(true), alpha, beta, gemm_info));
222     ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
223 }
224 // clang-format on
225 // *INDENT-ON*
226 TEST_SUITE(KERNEL_SELECTION)
227 DATA_TEST_CASE(KernelSelection_mul_and_add, framework::DatasetMode::ALL,
228                combine(framework::dataset::make("CpuExt", std::string("NEON")),
229                        framework::dataset::make("DataType", { DataType::F32,
230                                                               DataType::F16
231                                                             })),
232                cpu_ext, data_type)
233 {
234     using namespace cpu::kernels;
235 
236     cpuinfo::CpuIsaInfo cpu_isa{};
237     cpu_isa.neon = (cpu_ext == "NEON");
238     cpu_isa.fp16 = (data_type == DataType::F16);
239 
240     const auto *selected_impl_mul = CpuGemmMatrixMultiplyKernel::get_implementation(DataTypeISASelectorData{ data_type, cpu_isa }, cpu::KernelSelectionType::Preferred);
241 
242     ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl_mul);
243 
244     std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_gemm_matrix_mul";
245     std::string actual   = selected_impl_mul->name;
246 
247     ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
248 
249     const auto *selected_impl_add = CpuGemmMatrixAdditionKernel::get_implementation(DataTypeISASelectorData{ data_type, cpu_isa }, cpu::KernelSelectionType::Preferred);
250 
251     ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl_add);
252 
253     expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_gemm_matrix_add";
254     actual   = selected_impl_add->name;
255 
256     ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
257 }
258 TEST_SUITE_END() // KERNEL_SELECTION
259 
260 TEST_SUITE(TRANSPOSE_1XW)
261 using CpuGemmTranspose1xW = NESynthetizeFunctionWithZeroConstantKernelBorder<cpu::kernels::CpuGemmTranspose1xWKernel>;
262 DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip(
263                    framework::dataset::make("N", { 1, 23, 63, 101 }),
264                    framework::dataset::make("K", { 1, 47, 29, 27 })),
265                n_value, k_value)
266 {
267     bool status = validate_zero_padding<CpuGemmTranspose1xW>(n_value, k_value);
268     ARM_COMPUTE_EXPECT(status, framework::LogLevel::ERRORS);
269 }
270 
271 TEST_SUITE(U32)
272 using CpuGemmTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, CpuGemmTranspose1xW, uint32_t>;
273 FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * framework::dataset::make("DataType", DataType::U32))
274 {
275     // Validate output
276     validate(Accessor(_target), _reference);
277 }
278 TEST_SUITE_END() // U32
279 
280 TEST_SUITE(U16)
281 using CpuGemmTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, CpuGemmTranspose1xW, uint16_t>;
282 FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * framework::dataset::make("DataType", DataType::U16))
283 {
284     // Validate output
285     validate(Accessor(_target), _reference);
286 }
287 TEST_SUITE_END() // U16
288 
289 TEST_SUITE(U8)
290 using CpuGemmTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, CpuGemmTranspose1xW, uint8_t>;
291 FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * framework::dataset::make("DataType", DataType::U8))
292 {
293     // Validate output
294     validate(Accessor(_target), _reference);
295 }
296 TEST_SUITE_END() // U8
297 
298 TEST_SUITE_END() // TRANSPOSE_1XW
299 
300 TEST_SUITE(INTERLEAVE_4X4)
301 using CpuGemmInterleave4x4 = NESynthetizeFunctionWithZeroConstantKernelBorder<cpu::kernels::CpuGemmInterleave4x4Kernel>;
302 
303 DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip(
304                    framework::dataset::make("M", { 1, 23, 63, 101 }),
305                    framework::dataset::make("K", { 1, 47, 29, 27 })),
306                m_value, k_value)
307 {
308     bool status = validate_zero_padding<cpu::kernels::CpuGemmInterleave4x4Kernel>(m_value, k_value);
309     ARM_COMPUTE_EXPECT(status, framework::LogLevel::ERRORS);
310 }
311 
312 TEST_SUITE(U32)
313 using CpuGemmInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, CpuGemmInterleave4x4, uint32_t>;
314 FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * framework::dataset::make("DataType", DataType::U32))
315 {
316     // Validate output
317     validate(Accessor(_target), _reference);
318 }
319 TEST_SUITE_END() // U32
320 
321 TEST_SUITE(U16)
322 using CpuGemmInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, CpuGemmInterleave4x4, uint16_t>;
323 FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * framework::dataset::make("DataType", DataType::U16))
324 {
325     // Validate output
326     validate(Accessor(_target), _reference);
327 }
328 TEST_SUITE_END() // U16
329 
330 TEST_SUITE(U8)
331 using CpuGemmInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, CpuGemmInterleave4x4, uint8_t>;
332 FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * framework::dataset::make("DataType", DataType::QASYMM8))
333 {
334     // Validate output
335     validate(Accessor(_target), _reference);
336 }
337 TEST_SUITE_END() // U8
338 
339 TEST_SUITE_END() // INTERLEAVE_4X4
340 
341 template <typename T>
342 using NEGEMMFixture = GEMMValidationFixture<Tensor, Accessor, NEGEMM, T>;
343 
344 template <typename T>
345 using NEBatchedMatMulFixture = GEMMValidationFixture<Tensor, Accessor, NEGEMM, T, true, false, false, false, false, true>;
346 
347 TEST_SUITE(Float)
348 DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip(framework::dataset::make("In0", { TensorShape(21U, 13U),
349                                                                                                        TensorShape(31U, 1U),
350                                                                                                        TensorShape(31U, 1U),
351                                                                                                        TensorShape(8U, 2U),
352                                                                                                        TensorShape(38U, 12U),
353                                                                                                        TensorShape(32U, 1U)
354                                                                                                      }),
355                                                                      framework::dataset::make("In1", { TensorShape(33U, 21U),
356                                                                                                        TensorShape(23U, 31U),
357                                                                                                        TensorShape(23U, 31U),
358                                                                                                        TensorShape(16U, 8U),
359                                                                                                        TensorShape(21U, 38U),
360                                                                                                        TensorShape(17U, 32U)
361                                                                                                      })),
362                shape0, shape1)
363 {
364     bool status = validate_gemm_zero_padding(shape0, shape1);
365     ARM_COMPUTE_EXPECT(status, framework::LogLevel::ERRORS);
366 }
367 
368 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
369 TEST_SUITE(FP16)
370 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(),
371                                                                                                          framework::dataset::make("ReshapeWeights", { true, false })),
372                                                                                                  framework::dataset::make("DataType", DataType::F16)))
373 {
374     // Validate output
375     validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
376 }
377 
378 TEST_SUITE(BATCHED_MATMUL)
379 
380 FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchedMatMulFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchedMatMulDataset(),
381                                                                                                                   framework::dataset::make("ReshapeWeights", { false })),
382                                                                                                           framework::dataset::make("DataType", DataType::F16)))
383 {
384     // Validate output
385     validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
386 }
387 TEST_SUITE_END()
388 
389 FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMDataset(),
390                                                                                                        framework::dataset::make("ReshapeWeights", { true, false })),
391 
392                                                                                                framework::dataset::make("DataType", DataType::F16)))
393 {
394     // Validate output
395     validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
396 }
397 TEST_SUITE_END()
398 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
399 
TEST_SUITE(FP32)400 TEST_SUITE(FP32)
401 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(),
402                                                                                                           framework::dataset::make("ReshapeWeights", { true, false })),
403 
404                                                                                                   framework::dataset::make("DataType", DataType::F32)))
405 {
406     // Validate output
407     validate(Accessor(_target), _reference, tolerance_f);
408 }
409 FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMDataset(),
410                                                                                                         framework::dataset::make("ReshapeWeights", { true, false })),
411 
412                                                                                                 framework::dataset::make("DataType", DataType::F32)))
413 {
414     // Validate output
415     validate(Accessor(_target), _reference, tolerance_f);
416 }
417 
418 TEST_SUITE(BATCHED_MATMUL)
419 
TEST_SUITE(FP32)420 TEST_SUITE(FP32)
421 FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchedMatMulFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchedMatMulDataset(),
422                                                                                                                    framework::dataset::make("ReshapeWeights", { false })),
423                                                                                                            framework::dataset::make("DataType", DataType::F32)))
424 {
425     // Validate output
426     validate(Accessor(_target), _reference, tolerance_f);
427 }
428 TEST_SUITE_END()
429 
430 TEST_SUITE_END()
431 
432 TEST_SUITE_END()
433 TEST_SUITE_END()
434 
435 TEST_SUITE_END()
436 TEST_SUITE_END()
437 } // namespace validation
438 } // namespace test
439 } // namespace arm_compute
440