1 /*
2 * Copyright (c) 2017-2023 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #include "arm_compute/core/Helpers.h"
25 #include "arm_compute/core/Types.h"
26 #include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
27 #include "arm_compute/runtime/Tensor.h"
28 #include "arm_compute/runtime/TensorAllocator.h"
29 #include "src/common/cpuinfo/CpuIsaInfo.h"
30 #include "src/cpu/kernels/CpuDirectConv2dKernel.h"
31 #include "tests/NEON/Accessor.h"
32 #include "tests/PaddingCalculator.h"
33 #include "tests/datasets/ShapeDatasets.h"
34 #include "tests/framework/Asserts.h"
35 #include "tests/framework/Macros.h"
36 #include "tests/framework/datasets/Datasets.h"
37 #include "tests/validation/Validation.h"
38 #include "tests/validation/fixtures/DirectConvolutionLayerFixture.h"
39
40 namespace arm_compute
41 {
42 namespace test
43 {
44 namespace validation
45 {
46 namespace
47 {
48 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
49 const RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.2f)); /**< Relative tolerance value for FP16 types */
50 const AbsoluteTolerance<float> abs_tolerance_f16(0.2f); /**< Absolute tolerance for FP16 types */
51 constexpr float tolerance_num = 0.07f; /**< Tolerance number for the FP16 implementation */
52 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
53 constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
54
55 /** Direct convolution data set.for FP32 */
56 const auto data_pad_f32 = concat(concat(combine(framework::dataset::make("PadX", { 0, 1 }),
57 combine(framework::dataset::make("PadY", { 0, 1 }),
58 framework::dataset::make("KernelSize", 3))),
59 combine(framework::dataset::make("PadX", { 0, 2 }),
60 combine(framework::dataset::make("PadY", { 0, 2 }),
61 framework::dataset::make("KernelSize", 3)))),
62 combine(framework::dataset::make("PadX", { 0, 3 }),
63 combine(framework::dataset::make("PadY", { 0, 3 }),
64 framework::dataset::make("KernelSize", 5))));
65
66 /** Direct convolution data set.for FP16 */
67 const auto data_pad_f16 = concat(combine(framework::dataset::make("PadX", { 0, 1 }),
68 combine(framework::dataset::make("PadY", { 0, 1 }),
69 framework::dataset::make("KernelSize", 3))),
70 combine(framework::dataset::make("PadX", { 0 }),
71 combine(framework::dataset::make("PadY", { 0 }),
72 framework::dataset::make("KernelSize", 1))));
73
74 const auto data_f32 = combine(datasets::SmallDirectConvolutionShapes(),
75 combine(framework::dataset::make("StrideX", { 1, 2, 3, 4 }),
76 combine(framework::dataset::make("StrideY", { 1, 2, 3, 4 }),
77 data_pad_f32)));
78
79 const auto data_f16 = combine(datasets::SmallDirectConvolutionShapes(),
80 combine(framework::dataset::make("StrideX", { 1, 2, 3 }),
81 combine(framework::dataset::make("StrideY", { 1, 2, 3 }),
82 data_pad_f16)));
83
84 const auto data_prec = combine(datasets::SmallDirectConvolutionShapes(),
85 combine(framework::dataset::make("StrideX", { 1 }),
86 combine(framework::dataset::make("StrideY", { 1 }),
87 combine(framework::dataset::make("PadX", { 1 }),
88 combine(framework::dataset::make("PadY", { 1 }),
89 framework::dataset::make("KernelSize", 3))))));
90
91 const auto data9x9 = combine(datasets::SmallDirectConvolutionShapes(),
92 combine(framework::dataset::make("StrideX", { 1, 2, 3 }),
93 combine(framework::dataset::make("StrideY", { 1, 2, 3 }),
94 combine(framework::dataset::make("PadX", { 0, 2 }),
95 combine(framework::dataset::make("PadY", { 0, 3 }),
96 framework::dataset::make("KernelSize", 9))))));
97
98 const auto data8x8 = combine(datasets::SmallDirectConvolutionShapes(),
99 combine(framework::dataset::make("StrideX", { 1, 2, 3 }),
100 combine(framework::dataset::make("StrideY", { 1, 2, 3 }),
101 combine(framework::dataset::make("PadX", { 0 }),
102 combine(framework::dataset::make("PadY", { 0 }),
103 framework::dataset::make("KernelSize", 8))))));
104
105 const auto data_f32_nightly = combine(data_f32, framework::dataset::make("NumKernels", { 1, 4, 5 }));
106 const auto data_f16_nightly = combine(data_f16, framework::dataset::make("NumKernels", { 1, 4, 5 }));
107
108 const auto data_precommit = combine(data_prec, framework::dataset::make("NumKernels", { 1 }));
109 const auto data_precommit9x9 = combine(data9x9, framework::dataset::make("NumKernels", { 4 }));
110 const auto data_precommit8x8 = combine(data8x8, framework::dataset::make("NumKernels", { 4 }));
111
112 /* The following tests is from real use-case that made DirectConvolution
113 * overflows in terms of its tensor indexing. This test case is using
114 * a separate tolerance due to the following reason.
115 * - It has shown that it requires generally larger absolute tolerance
116 * for large numbers or larger relative tolerance for small numbers.
117 * - With the first reason, since it is mainly testing index overflow,
118 * a value with a margin is used to avoid uninteded test failures
119 * during nightly.
120 */
121 constexpr AbsoluteTolerance<float> usecase_tolerance_fp32(0.05f);
122
123 const auto data_nightly_usecase = combine(framework::dataset::make("InputShape", { TensorShape{ 3U, 800U, 800U } }),
124 combine(framework::dataset::make("StrideX", { 1 }),
125 combine(framework::dataset::make("StrideY", { 1 }),
126 combine(framework::dataset::make("PadX", { 4 }),
127 combine(framework::dataset::make("PadY", { 4 }),
128 combine(framework::dataset::make("KernelSize", 9),
129 framework::dataset::make("NumKernels", { 16 })))))));
130
131 /** Activation function Dataset*/
132 const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
133 {
134 ActivationLayerInfo(),
135 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f)
136 });
137 } // namespace
138
139 TEST_SUITE(NEON)
TEST_SUITE(DirectConvolutionLayer)140 TEST_SUITE(DirectConvolutionLayer)
141
142 /** Check whether the configuration of a Direct Convolution layer with no
143 * bias leads to a successful execution.
144 */
145 TEST_CASE(NoBias, framework::DatasetMode::PRECOMMIT)
146 {
147 const auto src_shape = TensorShape(27U, 13U, 2U);
148 const auto weights_shape = TensorShape(3U, 3U, 2U, 4U);
149 const auto bias_shape = TensorShape(4U);
150 const auto dst_shape = TensorShape(25U, 11U, 4U);
151 constexpr auto dt = DataType::F32;
152
153 auto src = create_tensor<Tensor>(src_shape, dt);
154 auto weights = create_tensor<Tensor>(weights_shape, dt);
155 auto dst = create_tensor<Tensor>(dst_shape, dt);
156
157 const auto conv_info = PadStrideInfo(1, 1, 0, 0);
158
159 // Create Direct Convolution function
160 NEDirectConvolutionLayer conv{};
161 conv.configure(&src, &weights, nullptr, &dst, conv_info);
162
163 src.allocator()->allocate();
164 weights.allocator()->allocate();
165 dst.allocator()->allocate();
166
167 library->fill_tensor_value(Accessor(src), 1.f);
168 library->fill_tensor_value(Accessor(weights), 1.f);
169
170 conv.run();
171
172 // Compute reference to compare
173 SimpleTensor<float> ref_src{ src_shape, dt };
174 SimpleTensor<float> ref_weights{ weights_shape, dt };
175 SimpleTensor<float> ref_bias{ bias_shape, dt };
176 library->fill_tensor_value(ref_src, 1.f);
177 library->fill_tensor_value(ref_weights, 1.f);
178 // No bias
179 library->fill_tensor_value(ref_bias, 0.f);
180 auto ref_dst = reference::convolution_layer<float>(ref_src, ref_weights, ref_bias, dst_shape, conv_info);
181
182 validate(Accessor(dst), ref_dst);
183 }
184
185 DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL,
186 concat(combine(combine(framework::dataset::make("CpuExt", std::string("NEON")),
187 framework::dataset::make("DataType", { DataType::F32 })),
188 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
189 combine(combine(framework::dataset::make("CpuExt", std::string("NEON")),
190 framework::dataset::make("DataType", { DataType::F16 })),
191 framework::dataset::make("DataLayout", { DataLayout::NCHW }))),
192 cpu_ext, data_type, data_layout)
193 {
194 using namespace cpu::kernels;
195
196 cpuinfo::CpuIsaInfo cpu_isa{};
197 cpu_isa.neon = (cpu_ext == "NEON");
198 cpu_isa.fp16 = (data_type == DataType::F16);
199
200 const auto *selected_impl = CpuDirectConv2dKernel::get_implementation(DataTypeDataLayoutISASelectorData{ data_type, data_layout, cpu_isa }, cpu::KernelSelectionType::Preferred);
201
202 ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
203
204 std::string data_layout_str;
205 if(data_layout == DataLayout::NCHW)
206 {
207 data_layout_str = "nchw";
208 }
209 else
210 {
211 data_layout_str = "nhwc";
212 }
213
214 std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_" + data_layout_str + "_directconv2d";
215 std::string actual = selected_impl->name;
216
217 ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);
218 }
219
220 // *INDENT-OFF*
221 // clang-format off
222 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
223 framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid: Mismatching data type input/weights
224 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid: Mismatching input feature maps
225 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported kernel width
226 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported non-rectangular weights dimensions
227 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid weights dimensions
228 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported stride
229 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported biases size
230 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported biases dimensions
231 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid output size
232 }),
233 framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F16),
234 TensorInfo(TensorShape(3U, 3U, 3U, 4U), 1, DataType::F32),
235 TensorInfo(TensorShape(9U, 9U, 2U, 4U), 1, DataType::F32),
236 TensorInfo(TensorShape(5U, 3U, 2U, 4U), 1, DataType::F32),
237 TensorInfo(TensorShape(3U, 3U, 2U, 4U, 3U), 1, DataType::F32),
238 TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
239 TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
240 TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
241 TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
242 })),
243 framework::dataset::make("BiasesInfo",{ TensorInfo(TensorShape(4U), 1, DataType::F32),
244 TensorInfo(TensorShape(4U), 1, DataType::F32),
245 TensorInfo(TensorShape(4U), 1, DataType::F32),
246 TensorInfo(TensorShape(4U), 1, DataType::F32),
247 TensorInfo(TensorShape(4U), 1, DataType::F32),
248 TensorInfo(TensorShape(4U), 1, DataType::F32),
249 TensorInfo(TensorShape(3U), 1, DataType::F32),
250 TensorInfo(TensorShape(4U, 2U), 1, DataType::F32),
251 TensorInfo(TensorShape(4U), 1, DataType::F32),
252 })),
253 framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
254 TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
255 TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
256 TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
257 TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
258 TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
259 TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
260 TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
261 TensorInfo(TensorShape(26U, 11U, 4U), 1, DataType::F32),
262 })),
263 framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
264 PadStrideInfo(1, 1, 0, 0),
265 PadStrideInfo(1, 1, 0, 0),
266 PadStrideInfo(1, 1, 0, 0),
267 PadStrideInfo(1, 1, 0, 0),
268 PadStrideInfo(3, 3, 0, 0),
269 PadStrideInfo(1, 1, 0, 0),
270 PadStrideInfo(1, 1, 0, 0),
271 PadStrideInfo(1, 1, 0, 0),
272 })),
273 framework::dataset::make("ActivationInfo",
274 {
275 ActivationLayerInfo(),
276 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
277 ActivationLayerInfo(),
278 ActivationLayerInfo(),
279 ActivationLayerInfo(),
280 ActivationLayerInfo(),
281 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
282 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
283 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
284 })),
285 framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false })),
286 input_info, weights_info, biases_info, output_info, conv_info, act_info, expected)
287 {
288 bool is_valid = bool(NEDirectConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, act_info));
289 ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
290 }
291 // clang-format on
292 // *INDENT-ON*
293
294 DATA_TEST_CASE(NoPaddingNHWCKernel, framework::DatasetMode::ALL, combine(combine(combine(data_precommit,
295 framework::dataset::make("DataType", DataType::F32)),
296 ActivationFunctionsDataset),
297 framework::dataset::make("DataLayout", { DataLayout::NHWC })),
298
299 shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, act_info, data_layout)
300 {
301 TensorShape input_shape = TensorShape(shape);
302 TensorShape weights_shape(kernel_size, kernel_size, input_shape.z(), num_kernels);
303 const PadStrideInfo info(stride_x, stride_y, pad_x, pad_y, DimensionRoundingType::FLOOR);
304
305 TensorInfo input_info = TensorInfo(input_shape, 1, data_type);
306 TensorInfo weights_info = TensorInfo(weights_shape, 1, data_type);
307
308 TensorShape output_shape = compute_deep_convolution_shape(input_info, weights_info, info);
309
310 if(data_layout == DataLayout::NHWC)
311 {
312 permute(input_shape, PermutationVector(2U, 0U, 1U));
313 permute(weights_shape, PermutationVector(2U, 0U, 1U));
314 permute(output_shape, PermutationVector(2U, 0U, 1U));
315 }
316
317 // Create tensors
318 Tensor src = create_tensor<Tensor>(input_shape, data_type, 1, QuantizationInfo(), data_layout);
319 Tensor weights = create_tensor<Tensor>(weights_shape, data_type, 1, QuantizationInfo(), data_layout);
320 Tensor dst = create_tensor<Tensor>(output_shape, data_type, 1, QuantizationInfo(), data_layout);
321
322 // Create and configure function
323 NEDirectConvolutionLayer conv;
324 conv.configure(&src, &weights, nullptr, &dst, info, act_info);
325
326 validate(src.info()->padding(), PaddingSize(0, 0, 0, 0));
327 validate(weights.info()->padding(), PaddingSize(0, 0, 0, 0));
328 validate(dst.info()->padding(), PaddingSize(0, 0, 0, 0));
329 }
330
331 template <typename T>
332 using NEDirectConvolutionLayerFixture = DirectConvolutionValidationFixture<Tensor, Accessor, NEDirectConvolutionLayer, T>;
333 template <typename T>
334 using NEDirectConvolutionLayerMixedDataLayoutFixture = DirectConvolutionValidationFixture<Tensor, Accessor, NEDirectConvolutionLayer, T, true>;
335
336 TEST_SUITE(Float)
337 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)338 TEST_SUITE(FP16)
339 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit, framework::dataset::make("DataType",
340 DataType::F16)),
341 ActivationFunctionsDataset),
342 framework::dataset::make("DataLayout", DataLayout::NCHW)))
343 {
344 // Validate output
345 validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
346 }
347 FIXTURE_DATA_TEST_CASE(RunLarge, NEDirectConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_f16_nightly, framework::dataset::make("DataType", DataType::F16)),
348 ActivationFunctionsDataset),
349 framework::dataset::make("DataLayout", DataLayout::NCHW)))
350 {
351 // Validate output
352 validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
353 }
354 TEST_SUITE_END() // FP16
355 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
356
TEST_SUITE(FP32)357 TEST_SUITE(FP32)
358 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit, framework::dataset::make("DataType",
359 DataType::F32)),
360 ActivationFunctionsDataset),
361 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
362 {
363 // Validate output
364 validate(Accessor(_target), _reference, tolerance_fp32);
365 }
366 FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEDirectConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit,
367 framework::dataset::make("DataType", DataType::F32)),
368 ActivationFunctionsDataset),
369 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
370 {
371 // Validate output
372 validate(Accessor(_target), _reference, tolerance_fp32);
373 }
374
375 FIXTURE_DATA_TEST_CASE(RunSmall8x8, NEDirectConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit8x8, framework::dataset::make("DataType",
376 DataType::F32)),
377 ActivationFunctionsDataset),
378 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
379 {
380 // Validate output
381 validate(Accessor(_target), _reference, tolerance_fp32);
382 }
383
384 FIXTURE_DATA_TEST_CASE(RunSmall9x9, NEDirectConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit9x9, framework::dataset::make("DataType",
385 DataType::F32)),
386 ActivationFunctionsDataset),
387 framework::dataset::make("DataLayout", { DataLayout::NHWC })))
388 {
389 // Validate output
390 validate(Accessor(_target), _reference, tolerance_fp32);
391 }
392 FIXTURE_DATA_TEST_CASE(RunLarge, NEDirectConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_f32_nightly, framework::dataset::make("DataType",
393 DataType::F32)),
394 ActivationFunctionsDataset),
395 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
396 {
397 // Validate output
398 validate(Accessor(_target), _reference, tolerance_fp32);
399 }
400 FIXTURE_DATA_TEST_CASE(RunLargeUsecase, NEDirectConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_nightly_usecase, framework::dataset::make("DataType",
401 DataType::F32)),
402 framework::dataset::make("ActivationInfo", { ActivationLayerInfo() })),
403 framework::dataset::make("DataLayout", { DataLayout::NHWC })))
404 {
405 // Validate output
406 validate(Accessor(_target), _reference, usecase_tolerance_fp32);
407 }
408 TEST_SUITE_END() // FP32
409 TEST_SUITE_END() // Float
410 TEST_SUITE_END() // DirectConvolutionLayer
411 TEST_SUITE_END() // Neon
412 } // namespace validation
413 } // namespace test
414 } // namespace arm_compute
415