1 /*
2 * Copyright (c) 2017-2023 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #include "arm_compute/core/Types.h"
25 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
26 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
27 #include "arm_compute/runtime/Tensor.h"
28 #include "arm_compute/runtime/TensorAllocator.h"
29 #include "src/core/helpers/MemoryHelpers.h"
30 #include "src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.h"
31 #include "tests/NEON/Accessor.h"
32 #include "tests/NEON/Helper.h"
33 #include "tests/PaddingCalculator.h"
34 #include "tests/datasets/GEMMLowpFusedOffsetOutputDataset.h"
35 #include "tests/datasets/LargeGEMMLowpDataset.h"
36 #include "tests/datasets/ShapeDatasets.h"
37 #include "tests/datasets/SmallGEMMLowpDataset.h"
38 #include "tests/framework/Asserts.h"
39 #include "tests/framework/Macros.h"
40 #include "tests/framework/datasets/Datasets.h"
41 #include "tests/validation/Validation.h"
42 #include "tests/validation/fixtures/GEMMLowpFixture.h"
43
44 namespace arm_compute
45 {
46 namespace test
47 {
48 namespace validation
49 {
50 TEST_SUITE(NEON)
51 TEST_SUITE(GEMMLowp)
52 TEST_SUITE(MatrixMultiplyCore)
53 using NEGEMMLowpMatrixMultiplyCoreFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>;
54 using NEGEMMLowpBatchedMatMulFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, true>;
55
DATA_TEST_CASE(Configuration,framework::DatasetMode::ALL,framework::dataset::concat (datasets::SmallGEMMLowpDataset (),datasets::LargeGEMMLowpDataset ()),shape_a,shape_b,shape_c,a_offset,b_offset)56 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::concat(datasets::SmallGEMMLowpDataset(), datasets::LargeGEMMLowpDataset()),
57 shape_a, shape_b, shape_c, a_offset, b_offset)
58 {
59 // Create tensors
60 Tensor a = create_tensor<Tensor>(shape_a, DataType::QASYMM8);
61 Tensor b = create_tensor<Tensor>(shape_b, DataType::QASYMM8);
62 Tensor c = create_tensor<Tensor>(shape_c, DataType::S32);
63
64 a.info()->set_quantization_info(QuantizationInfo(1.0f / 255, a_offset));
65 b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset));
66
67 ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS);
68 ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS);
69 ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS);
70
71 // Create and configure function
72 NEGEMMLowpMatrixMultiplyCore gemmlowp_mm;
73 gemmlowp_mm.configure(&a, &b, nullptr, &c);
74
75 // Validate padding is zero
76 validate(a.info()->padding(), PaddingSize());
77 validate(b.info()->padding(), PaddingSize());
78 validate(c.info()->padding(), PaddingSize());
79 }
80
81 // *INDENT-OFF*
82 // clang-format off
83 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
84 framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Input not a multiple of 4
85 TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Mismatching data type
86 TensorInfo(TensorShape(20U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions
87 TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions
88 TensorInfo(TensorShape(16U, 32U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)),
89 }),
90 framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
91 TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
92 TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
93 TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
94 TensorInfo(TensorShape(64U, 16U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
95 })),
96 framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
97 TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
98 TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
99 TensorInfo(TensorShape(8U, 11U), 1, DataType::S32),
100 TensorInfo(TensorShape(64U, 32U), 1, DataType::S32),
101 })),
102 framework::dataset::make("Expected", { true, false, false, false, true })),
103 a_info, b_info, output_info, expected)
104 {
105 // Lock tensors
106 Status status = NEGEMMLowpMatrixMultiplyCore::validate(&a_info.clone()->set_is_resizable(false),
107 &b_info.clone()->set_is_resizable(false),
108 nullptr,
109 &output_info.clone()->set_is_resizable(false));
110 ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
111 }
112 // clang-format on
113 // *INDENT-ON*
114
115 /** Test case for memory injection in @ref cpu::CpuGemmLowpMatrixMultiplyCore.
116 *
117 * Configure the operator once and inject memory at run-time in multiple executions.
118 *
119 * Checks performed in order:
120 * - Both runs compute the same output
121 */
TEST_CASE(MemoryInjection,framework::DatasetMode::ALL)122 TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
123 {
124 auto gemm = std::make_unique<cpu::CpuGemmLowpMatrixMultiplyCore>();
125 auto a_info = TensorInfo(TensorShape(32U, 72U), 1, DataType::QASYMM8);
126 auto b_info = TensorInfo(TensorShape(17U, 32U), 1, DataType::QASYMM8);
127 auto dst_info = TensorInfo(TensorShape(17U, 72U), 1, DataType::S32);
128 a_info.set_quantization_info(QuantizationInfo(1.0f / 255, -9));
129 b_info.set_quantization_info(QuantizationInfo(1.0f / 255, 1));
130 const auto gemm_info = GEMMInfo{};
131 gemm->configure(&a_info, &b_info, nullptr, &dst_info, gemm_info);
132
133 // telhs are newly created every call of this lambda function
134 auto a = create_tensor<Tensor>(a_info);
135 auto b = create_tensor<Tensor>(b_info);
136 auto dst = create_tensor<Tensor>(dst_info);
137 a.allocator()->allocate();
138 b.allocator()->allocate();
139 dst.allocator()->allocate();
140
141 ITensorPack run_pack =
142 {
143 { TensorType::ACL_SRC_0, &a },
144 { TensorType::ACL_SRC_1, &b },
145 { TensorType::ACL_DST, &dst }
146 };
147 ITensorPack prep_pack =
148 {
149 { TensorType::ACL_SRC_1, &b },
150 };
151
152 auto mg = MemoryGroup{};
153 auto ws = manage_workspace<Tensor>(gemm->workspace(), mg, run_pack, prep_pack);
154
155 auto run_conv = [&]() -> Tensor
156 {
157 auto dst = create_tensor<Tensor>(dst_info);
158 dst.allocator()->allocate();
159 run_pack.add_tensor(TensorType::ACL_DST, &dst);
160
161 library->fill_tensor_value(Accessor(a), static_cast<uint8_t>(1));
162 library->fill_tensor_value(Accessor(b), static_cast<uint8_t>(2));
163 // This operator is configured once and captured by this lambda.
164 gemm->prepare(prep_pack);
165 gemm->run(run_pack);
166 return dst;
167 };
168 auto result_0 = run_conv();
169 auto result_1 = run_conv();
170 for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
171 {
172 ARM_COMPUTE_EXPECT(((uint8_t *)result_0.buffer())[i] == ((uint8_t *)result_1.buffer())[i], framework::LogLevel::ERRORS);
173 }
174 }
175
176 /** Test case for memory injection in @ref NEGEMMLowpMatrixMultiplyCore.
177 *
178 * Make sure @ref NEGEMMLowpMatrixMultiplyCore still works through injecting the memory at configure time using the old API.
179 *
180 * Checks performed in order:
181 * - Both runs compute the same output
182 */
TEST_CASE(MultipleExecutionWithConfigure,framework::DatasetMode::ALL)183 TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
184 {
185 auto gemm = std::make_unique<NEGEMMLowpMatrixMultiplyCore>();
186 auto a_info = TensorInfo(TensorShape(32U, 72U), 1, DataType::QASYMM8);
187 auto b_info = TensorInfo(TensorShape(17U, 32U), 1, DataType::QASYMM8);
188 auto dst_info = TensorInfo(TensorShape(17U, 72U), 1, DataType::S32);
189 a_info.set_quantization_info(QuantizationInfo(1.0f / 255, -9));
190 b_info.set_quantization_info(QuantizationInfo(1.0f / 255, 1));
191 const auto gemm_info = GEMMInfo{};
192 auto run_conv = [&]()
193 {
194 auto a = create_tensor<Tensor>(a_info);
195 auto b = create_tensor<Tensor>(b_info);
196 auto dst = create_tensor<Tensor>(dst_info);
197 gemm->configure(&a, &b, nullptr, &dst, gemm_info);
198 a.allocator()->allocate();
199 b.allocator()->allocate();
200 dst.allocator()->allocate();
201 library->fill_tensor_value(Accessor(a), static_cast<uint8_t>(1));
202 library->fill_tensor_value(Accessor(b), static_cast<uint8_t>(2));
203 gemm->run();
204 return dst;
205 };
206 auto result_0 = run_conv();
207 auto result_1 = run_conv();
208 for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
209 {
210 ARM_COMPUTE_EXPECT(((uint8_t *)result_0.buffer())[i] == ((uint8_t *)result_1.buffer())[i], framework::LogLevel::ERRORS);
211 }
212 }
213
FIXTURE_DATA_TEST_CASE(RunSmall,NEGEMMLowpMatrixMultiplyCoreFixture,framework::DatasetMode::ALL,datasets::SmallGEMMLowpDataset ())214 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset())
215 {
216 // Validate output
217 validate(Accessor(_target), _reference);
218 }
219
FIXTURE_DATA_TEST_CASE(RunLarge,NEGEMMLowpMatrixMultiplyCoreFixture,framework::DatasetMode::NIGHTLY,datasets::LargeGEMMLowpDataset ())220 FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset())
221 {
222 // Validate output
223 validate(Accessor(_target), _reference);
224 }
225
226 constexpr AbsoluteTolerance<float> tolerance_batched(1);
227
228 using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned =
229 GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, uint8_t, uint8_t, true>;
230
231 TEST_SUITE(BatchedMatMul)
TEST_SUITE(QASYMM8)232 TEST_SUITE(QASYMM8)
233 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned, framework::DatasetMode::ALL,
234 combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetUnsigned(),
235 framework::dataset::make("DataType", { DataType::QASYMM8 })),
236 framework::dataset::make("bool", { false })))
237 {
238 validate(Accessor(_target), _reference, tolerance_batched);
239 }
240 TEST_SUITE_END() // QASYMM8
241
242 using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned =
243 GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t, true>;
244 TEST_SUITE(QASYMM8_SIGNED)
245 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned, framework::DatasetMode::ALL,
246 combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetSigned(),
247 framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })),
248 framework::dataset::make("bool", { false })))
249 {
250 validate(Accessor(_target), _reference, tolerance_batched);
251 }
252 TEST_SUITE_END() // QASYMM8_SIGNED
253 TEST_SUITE_END() // BatchedMatMul
254
255 using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>;
256 constexpr AbsoluteTolerance<float> tolerance_quant(1);
257
258 TEST_SUITE(FusedOffsetOutput)
259 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(),
260 framework::dataset::make("DataType", { DataType::QASYMM8 })))
261 {
262 // Validate output
263 validate(Accessor(_target), _reference, tolerance_quant);
264 }
265
266 FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(),
267 framework::dataset::make("DataType", { DataType::QASYMM8 })))
268 {
269 // Validate output
270 validate(Accessor(_target), _reference, tolerance_quant);
271 }
272 TEST_SUITE_END() // FusedOffsetOutput
273 TEST_SUITE_END() // MatrixMultiplyCore
274 TEST_SUITE_END() // GEMMLowp
275 TEST_SUITE_END() // NEON
276 } // namespace validation
277 } // namespace test
278 } // namespace arm_compute
279