xref: /aosp_15_r20/external/ComputeLibrary/tests/validation/NEON/GEMMLowp.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2017-2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/core/Types.h"
25 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
26 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
27 #include "arm_compute/runtime/Tensor.h"
28 #include "arm_compute/runtime/TensorAllocator.h"
29 #include "src/core/helpers/MemoryHelpers.h"
30 #include "src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.h"
31 #include "tests/NEON/Accessor.h"
32 #include "tests/NEON/Helper.h"
33 #include "tests/PaddingCalculator.h"
34 #include "tests/datasets/GEMMLowpFusedOffsetOutputDataset.h"
35 #include "tests/datasets/LargeGEMMLowpDataset.h"
36 #include "tests/datasets/ShapeDatasets.h"
37 #include "tests/datasets/SmallGEMMLowpDataset.h"
38 #include "tests/framework/Asserts.h"
39 #include "tests/framework/Macros.h"
40 #include "tests/framework/datasets/Datasets.h"
41 #include "tests/validation/Validation.h"
42 #include "tests/validation/fixtures/GEMMLowpFixture.h"
43 
44 namespace arm_compute
45 {
46 namespace test
47 {
48 namespace validation
49 {
50 TEST_SUITE(NEON)
51 TEST_SUITE(GEMMLowp)
52 TEST_SUITE(MatrixMultiplyCore)
53 using NEGEMMLowpMatrixMultiplyCoreFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>;
54 using NEGEMMLowpBatchedMatMulFixture      = GEMMLowpMatrixMultiplyCoreValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, true>;
55 
DATA_TEST_CASE(Configuration,framework::DatasetMode::ALL,framework::dataset::concat (datasets::SmallGEMMLowpDataset (),datasets::LargeGEMMLowpDataset ()),shape_a,shape_b,shape_c,a_offset,b_offset)56 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::concat(datasets::SmallGEMMLowpDataset(), datasets::LargeGEMMLowpDataset()),
57                shape_a, shape_b, shape_c, a_offset, b_offset)
58 {
59     // Create tensors
60     Tensor a = create_tensor<Tensor>(shape_a, DataType::QASYMM8);
61     Tensor b = create_tensor<Tensor>(shape_b, DataType::QASYMM8);
62     Tensor c = create_tensor<Tensor>(shape_c, DataType::S32);
63 
64     a.info()->set_quantization_info(QuantizationInfo(1.0f / 255, a_offset));
65     b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset));
66 
67     ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS);
68     ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS);
69     ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS);
70 
71     // Create and configure function
72     NEGEMMLowpMatrixMultiplyCore gemmlowp_mm;
73     gemmlowp_mm.configure(&a, &b, nullptr, &c);
74 
75     // Validate padding is zero
76     validate(a.info()->padding(), PaddingSize());
77     validate(b.info()->padding(), PaddingSize());
78     validate(c.info()->padding(), PaddingSize());
79 }
80 
81 // *INDENT-OFF*
82 // clang-format off
83 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
84     framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Input not a multiple of 4
85                                              TensorInfo(TensorShape(21U, 13U), 1, DataType::S32),                                 // Mismatching data type
86                                              TensorInfo(TensorShape(20U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions
87                                              TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions
88                                              TensorInfo(TensorShape(16U, 32U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)),
89                                           }),
90     framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
91                                             TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
92                                             TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
93                                             TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
94                                             TensorInfo(TensorShape(64U, 16U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
95                                           })),
96     framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
97                                             TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
98                                             TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
99                                             TensorInfo(TensorShape(8U, 11U), 1, DataType::S32),
100                                             TensorInfo(TensorShape(64U, 32U), 1, DataType::S32),
101                                            })),
102     framework::dataset::make("Expected", { true, false, false, false, true })),
103     a_info, b_info, output_info, expected)
104 {
105     // Lock tensors
106     Status status =  NEGEMMLowpMatrixMultiplyCore::validate(&a_info.clone()->set_is_resizable(false),
107                                                             &b_info.clone()->set_is_resizable(false),
108                                                             nullptr,
109                                                             &output_info.clone()->set_is_resizable(false));
110     ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
111 }
112 // clang-format on
113 // *INDENT-ON*
114 
115 /** Test case for memory injection in @ref cpu::CpuGemmLowpMatrixMultiplyCore.
116  *
117  * Configure the operator once and inject memory at run-time in multiple executions.
118  *
119  * Checks performed in order:
120  * - Both runs compute the same output
121  */
TEST_CASE(MemoryInjection,framework::DatasetMode::ALL)122 TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
123 {
124     auto gemm     = std::make_unique<cpu::CpuGemmLowpMatrixMultiplyCore>();
125     auto a_info   = TensorInfo(TensorShape(32U, 72U), 1, DataType::QASYMM8);
126     auto b_info   = TensorInfo(TensorShape(17U, 32U), 1, DataType::QASYMM8);
127     auto dst_info = TensorInfo(TensorShape(17U, 72U), 1, DataType::S32);
128     a_info.set_quantization_info(QuantizationInfo(1.0f / 255, -9));
129     b_info.set_quantization_info(QuantizationInfo(1.0f / 255, 1));
130     const auto gemm_info = GEMMInfo{};
131     gemm->configure(&a_info, &b_info, nullptr, &dst_info, gemm_info);
132 
133     // telhs are newly created every call of this lambda function
134     auto a   = create_tensor<Tensor>(a_info);
135     auto b   = create_tensor<Tensor>(b_info);
136     auto dst = create_tensor<Tensor>(dst_info);
137     a.allocator()->allocate();
138     b.allocator()->allocate();
139     dst.allocator()->allocate();
140 
141     ITensorPack run_pack =
142     {
143         { TensorType::ACL_SRC_0, &a },
144         { TensorType::ACL_SRC_1, &b },
145         { TensorType::ACL_DST, &dst }
146     };
147     ITensorPack prep_pack =
148     {
149         { TensorType::ACL_SRC_1, &b },
150     };
151 
152     auto mg = MemoryGroup{};
153     auto ws = manage_workspace<Tensor>(gemm->workspace(), mg, run_pack, prep_pack);
154 
155     auto run_conv = [&]() -> Tensor
156     {
157         auto dst = create_tensor<Tensor>(dst_info);
158         dst.allocator()->allocate();
159         run_pack.add_tensor(TensorType::ACL_DST, &dst);
160 
161         library->fill_tensor_value(Accessor(a), static_cast<uint8_t>(1));
162         library->fill_tensor_value(Accessor(b), static_cast<uint8_t>(2));
163         // This operator is configured once and captured by this lambda.
164         gemm->prepare(prep_pack);
165         gemm->run(run_pack);
166         return dst;
167     };
168     auto result_0 = run_conv();
169     auto result_1 = run_conv();
170     for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
171     {
172         ARM_COMPUTE_EXPECT(((uint8_t *)result_0.buffer())[i] == ((uint8_t *)result_1.buffer())[i], framework::LogLevel::ERRORS);
173     }
174 }
175 
176 /** Test case for memory injection in @ref NEGEMMLowpMatrixMultiplyCore.
177  *
178  * Make sure @ref NEGEMMLowpMatrixMultiplyCore still works through injecting the memory at configure time using the old API.
179  *
180  * Checks performed in order:
181  * - Both runs compute the same output
182  */
TEST_CASE(MultipleExecutionWithConfigure,framework::DatasetMode::ALL)183 TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
184 {
185     auto gemm     = std::make_unique<NEGEMMLowpMatrixMultiplyCore>();
186     auto a_info   = TensorInfo(TensorShape(32U, 72U), 1, DataType::QASYMM8);
187     auto b_info   = TensorInfo(TensorShape(17U, 32U), 1, DataType::QASYMM8);
188     auto dst_info = TensorInfo(TensorShape(17U, 72U), 1, DataType::S32);
189     a_info.set_quantization_info(QuantizationInfo(1.0f / 255, -9));
190     b_info.set_quantization_info(QuantizationInfo(1.0f / 255, 1));
191     const auto gemm_info = GEMMInfo{};
192     auto       run_conv  = [&]()
193     {
194         auto a   = create_tensor<Tensor>(a_info);
195         auto b   = create_tensor<Tensor>(b_info);
196         auto dst = create_tensor<Tensor>(dst_info);
197         gemm->configure(&a, &b, nullptr, &dst, gemm_info);
198         a.allocator()->allocate();
199         b.allocator()->allocate();
200         dst.allocator()->allocate();
201         library->fill_tensor_value(Accessor(a), static_cast<uint8_t>(1));
202         library->fill_tensor_value(Accessor(b), static_cast<uint8_t>(2));
203         gemm->run();
204         return dst;
205     };
206     auto result_0 = run_conv();
207     auto result_1 = run_conv();
208     for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
209     {
210         ARM_COMPUTE_EXPECT(((uint8_t *)result_0.buffer())[i] == ((uint8_t *)result_1.buffer())[i], framework::LogLevel::ERRORS);
211     }
212 }
213 
FIXTURE_DATA_TEST_CASE(RunSmall,NEGEMMLowpMatrixMultiplyCoreFixture,framework::DatasetMode::ALL,datasets::SmallGEMMLowpDataset ())214 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset())
215 {
216     // Validate output
217     validate(Accessor(_target), _reference);
218 }
219 
FIXTURE_DATA_TEST_CASE(RunLarge,NEGEMMLowpMatrixMultiplyCoreFixture,framework::DatasetMode::NIGHTLY,datasets::LargeGEMMLowpDataset ())220 FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset())
221 {
222     // Validate output
223     validate(Accessor(_target), _reference);
224 }
225 
226 constexpr AbsoluteTolerance<float> tolerance_batched(1);
227 
228 using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned =
229     GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, uint8_t, uint8_t, true>;
230 
231 TEST_SUITE(BatchedMatMul)
TEST_SUITE(QASYMM8)232 TEST_SUITE(QASYMM8)
233 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned, framework::DatasetMode::ALL,
234                        combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetUnsigned(),
235                                        framework::dataset::make("DataType", { DataType::QASYMM8 })),
236                                framework::dataset::make("bool", { false })))
237 {
238     validate(Accessor(_target), _reference, tolerance_batched);
239 }
240 TEST_SUITE_END() // QASYMM8
241 
242 using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned =
243     GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t, true>;
244 TEST_SUITE(QASYMM8_SIGNED)
245 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned, framework::DatasetMode::ALL,
246                        combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetSigned(),
247                                        framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })),
248                                framework::dataset::make("bool", { false })))
249 {
250     validate(Accessor(_target), _reference, tolerance_batched);
251 }
252 TEST_SUITE_END() // QASYMM8_SIGNED
253 TEST_SUITE_END() // BatchedMatMul
254 
255 using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>;
256 constexpr AbsoluteTolerance<float> tolerance_quant(1);
257 
258 TEST_SUITE(FusedOffsetOutput)
259 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(),
260                        framework::dataset::make("DataType", { DataType::QASYMM8 })))
261 {
262     // Validate output
263     validate(Accessor(_target), _reference, tolerance_quant);
264 }
265 
266 FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(),
267                        framework::dataset::make("DataType", { DataType::QASYMM8 })))
268 {
269     // Validate output
270     validate(Accessor(_target), _reference, tolerance_quant);
271 }
272 TEST_SUITE_END() // FusedOffsetOutput
273 TEST_SUITE_END() // MatrixMultiplyCore
274 TEST_SUITE_END() // GEMMLowp
275 TEST_SUITE_END() // NEON
276 } // namespace validation
277 } // namespace test
278 } // namespace arm_compute
279