validation/NEON/ConvolutionLayer.cpp

*c217d954SCole Faust/*
*c217d954SCole Faust * Copyright (c) 2017-2023 Arm Limited.
*c217d954SCole Faust *
*c217d954SCole Faust * SPDX-License-Identifier: MIT
*c217d954SCole Faust *
*c217d954SCole Faust * Permission is hereby granted, free of charge, to any person obtaining a copy
*c217d954SCole Faust * of this software and associated documentation files (the "Software"), to
*c217d954SCole Faust * deal in the Software without restriction, including without limitation the
*c217d954SCole Faust * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
*c217d954SCole Faust * sell copies of the Software, and to permit persons to whom the Software is
*c217d954SCole Faust * furnished to do so, subject to the following conditions:
*c217d954SCole Faust *
*c217d954SCole Faust * The above copyright notice and this permission notice shall be included in all
*c217d954SCole Faust * copies or substantial portions of the Software.
*c217d954SCole Faust *
*c217d954SCole Faust * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
*c217d954SCole Faust * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
*c217d954SCole Faust * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
*c217d954SCole Faust * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
*c217d954SCole Faust * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
*c217d954SCole Faust * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
*c217d954SCole Faust * SOFTWARE.
*c217d954SCole Faust */
*c217d954SCole Faust#include "arm_compute/core/Types.h"
*c217d954SCole Faust#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
*c217d954SCole Faust#include "arm_compute/runtime/NEON/functions/NEGEMMConv2d.h"
*c217d954SCole Faust#include "arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h"
*c217d954SCole Faust#include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h"
*c217d954SCole Faust#include "arm_compute/runtime/Tensor.h"
*c217d954SCole Faust#include "arm_compute/runtime/TensorAllocator.h"
*c217d954SCole Faust#include "src/core/helpers/MemoryHelpers.h"
*c217d954SCole Faust#include "src/cpu/operators/CpuGemmConv2d.h"
*c217d954SCole Faust#include "src/cpu/operators/CpuGemmDirectConv2d.h"
*c217d954SCole Faust#include "src/cpu/operators/CpuWinogradConv2d.h"
*c217d954SCole Faust#include "tests/NEON/Accessor.h"
*c217d954SCole Faust#include "tests/PaddingCalculator.h"
*c217d954SCole Faust#include "tests/datasets/LargeConvolutionLayerDataset.h"
*c217d954SCole Faust#include "tests/datasets/SmallConvolutionLayerDataset.h"
*c217d954SCole Faust#include "tests/datasets/TinyConvolutionLayerDataset.h"
*c217d954SCole Faust#include "tests/framework/Asserts.h"
*c217d954SCole Faust#include "tests/framework/Macros.h"
*c217d954SCole Faust#include "tests/framework/datasets/Datasets.h"
*c217d954SCole Faust#include "tests/validation/Validation.h"
*c217d954SCole Faust#include "tests/validation/fixtures/ConvolutionLayerFixture.h"
*c217d954SCole Faust#include "tests/validation/fixtures/WinogradConvolutionLayerFixture.h"
*c217d954SCole Faust
*c217d954SCole Faustnamespace arm_compute
*c217d954SCole Faust{
*c217d954SCole Faustnamespace test
*c217d954SCole Faust{
*c217d954SCole Faustnamespace validation
*c217d954SCole Faust{
*c217d954SCole Faustnamespace detail
*c217d954SCole Faust{
*c217d954SCole Fausttemplate <>
*c217d954SCole Faustvoid configure_conv_function<NEGEMMConv2d, Tensor>(NEGEMMConv2d &func,
*c217d954SCole Faust                                                   Tensor *src, const Tensor *weights, const Tensor *bias, Tensor *dst,
*c217d954SCole Faust                                                   const PadStrideInfo &info, const WeightsInfo &weights_info,
*c217d954SCole Faust                                                   const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
*c217d954SCole Faust{
*c217d954SCole Faust    ARM_COMPUTE_UNUSED(weights_info);
*c217d954SCole Faust
*c217d954SCole Faust    Conv2dInfo conv_info(info, dilation, act_info, false, num_groups);
*c217d954SCole Faust    func.configure(src, weights, bias, dst, conv_info);
*c217d954SCole Faust}
*c217d954SCole Faust} // namespace detail
*c217d954SCole Faustnamespace
*c217d954SCole Faust{
*c217d954SCole Faustconst RelativeTolerance<float> rel_tolerance_f32(0.01f);              /**< Relative tolerance for FP32 types */
*c217d954SCole Faustconst RelativeTolerance<float> rel_tolerance_winograd_3x3_f32(0.05f); /**< Relative tolerance for FP32 types */
*c217d954SCole Faustconst AbsoluteTolerance<float> abs_tolerance_f32(0.002f);             /**< Absolute tolerance for FP32 types */
*c217d954SCole Faustconst AbsoluteTolerance<float> abs_tolerance_1xN_f32(0.0041f);        /**< Absolute tolerance for FP32 types */
*c217d954SCole Faust
*c217d954SCole Faust#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
*c217d954SCole Faustconst AbsoluteTolerance<half> tolerance_convolution_layer_f16(half(0.4f));
*c217d954SCole Faustconstexpr float               tolerance_num_f16 = 0.15f;
*c217d954SCole Faust#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
*c217d954SCole Faust
*c217d954SCole Faust#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
*c217d954SCole Faustconst RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.2f)); /**< Relative tolerance value for FP16 types */
*c217d954SCole Faustconst AbsoluteTolerance<float>            abs_tolerance_f16(0.2f);                   /**< Absolute tolerance for FP16 types */
*c217d954SCole Faustconstexpr float                           tolerance_num = 0.07f;                     /**< Tolerance number for the FP16 implementation */
*c217d954SCole Faust#endif                                                                               /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
*c217d954SCole Faustconstexpr AbsoluteTolerance<float> tolerance_qasymm8(0.0);                           /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
*c217d954SCole Faust
*c217d954SCole Faust/** CNN data types */
*c217d954SCole Faustconst auto CNNDataTypes = framework::dataset::make("DataType",
*c217d954SCole Faust{
*c217d954SCole Faust#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
*c217d954SCole Faust    DataType::F16,
*c217d954SCole Faust#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
*c217d954SCole Faust    DataType::F32,
*c217d954SCole Faust    DataType::QASYMM8,
*c217d954SCole Faust});
*c217d954SCole Faustconst auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
*c217d954SCole Faust{
*c217d954SCole Faust    ActivationLayerInfo(),
*c217d954SCole Faust    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
*c217d954SCole Faust    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f)
*c217d954SCole Faust});
*c217d954SCole Faust
*c217d954SCole Faustconst auto QuantizationData = framework::dataset::make("QuantizationInfo",
*c217d954SCole Faust{
*c217d954SCole Faust    QuantizationInfo(0.5f, 10),
*c217d954SCole Faust    QuantizationInfo(0.3f, 3),
*c217d954SCole Faust    QuantizationInfo(1.f, 10),
*c217d954SCole Faust    QuantizationInfo(1.1f, 10),
*c217d954SCole Faust});
*c217d954SCole Faust} // namespace
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(NEON)
*c217d954SCole FaustTEST_SUITE(ConvolutionLayer)
*c217d954SCole Faust
*c217d954SCole Faust// *INDENT-OFF*
*c217d954SCole Faust// clang-format off
*c217d954SCole FaustDATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
*c217d954SCole Faust                                          framework::dataset::make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F32),
*c217d954SCole Faust                                                                                  TensorInfo(TensorShape(23U, 27U, 32U, 4U), 1, DataType::F32),
*c217d954SCole Faust                                                                                  TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32),
*c217d954SCole Faust                                                                                  TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32)
*c217d954SCole Faust                                          }),
*c217d954SCole Faust                                          framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F32),
*c217d954SCole Faust                                                                                    TensorInfo(TensorShape(5U, 5U, 32U, 21U), 1, DataType::F32),
*c217d954SCole Faust                                                                                    TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
*c217d954SCole Faust                                                                                    TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16)
*c217d954SCole Faust                                          })),
*c217d954SCole Faust                                          framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32),
*c217d954SCole Faust                                                                                   TensorInfo(TensorShape(19U, 23U, 21U, 4U), 1, DataType::F32),
*c217d954SCole Faust                                                                                   TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32),
*c217d954SCole Faust                                                                                   TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32)
*c217d954SCole Faust                                          })),
*c217d954SCole Faust                                          framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
*c217d954SCole Faust                                                                                 PadStrideInfo(1, 1, 0, 0),
*c217d954SCole Faust                                                                                 PadStrideInfo(2, 1, 0, 0),
*c217d954SCole Faust                                                                                 PadStrideInfo(3, 2, 1, 0)
*c217d954SCole Faust                                          })),
*c217d954SCole Faust                                          framework::dataset::make("FastMath", { true,
*c217d954SCole Faust                                                                                 true,
*c217d954SCole Faust                                                                                 false,
*c217d954SCole Faust                                                                                 false
*c217d954SCole Faust                                          })),
*c217d954SCole Faust                                                                           framework::dataset::make("Expected", { ConvolutionMethod::WINOGRAD, ConvolutionMethod::WINOGRAD, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM })),
*c217d954SCole Faust               input_info, weights_info, output_info, conv_info, fast_math, expected)
*c217d954SCole Faust{
*c217d954SCole Faust    ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(true),
*c217d954SCole Faust                                                                            &weights_info.clone()->set_is_resizable(true),
*c217d954SCole Faust                                                                            &output_info.clone()->set_is_resizable(true), conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math);
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
*c217d954SCole Faust}
*c217d954SCole Faust// clang-format on
*c217d954SCole Faust// *INDENT-ON*
*c217d954SCole FaustTEST_SUITE_END() // ConvolutionLayer
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(WinogradLayer)
*c217d954SCole Fausttemplate <typename T>
*c217d954SCole Faustusing NEWinogradConvolutionLayerFixture = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, T>;
*c217d954SCole Fausttemplate <typename T>
*c217d954SCole Faustusing NEWinogradConvolutionLayerMixedDataLayoutFixture = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, T, T, true, true>;
*c217d954SCole Faust
*c217d954SCole Fausttemplate <typename T>
*c217d954SCole Faustusing NEWinogradConvolutionLayerNoBiasFixture = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, T, T, false>;
*c217d954SCole Faust
*c217d954SCole Faust/** Test case for memory injection in @ref cpu::CpuWinogradConv2d.
*c217d954SCole Faust *
*c217d954SCole Faust * Configure the operator once and inject memory at run-time in multiple executions.
*c217d954SCole Faust *
*c217d954SCole Faust * Checks performed in order:
*c217d954SCole Faust * - Both runs compute the same output
*c217d954SCole Faust */
*c217d954SCole FaustTEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
*c217d954SCole Faust{
*c217d954SCole Faust    auto                winograd = std::make_unique<cpu::CpuWinogradConv2d>();
*c217d954SCole Faust    const auto          src_info = TensorInfo(TensorShape(8U, 8U, 32U), 1, DataType::F32);
*c217d954SCole Faust    const auto          w_info   = TensorInfo(TensorShape(1U), 1, DataType::F32);
*c217d954SCole Faust    const auto          b_info   = TensorInfo(TensorShape(1U, 3U, 32U, 1U), 1, DataType::F32);
*c217d954SCole Faust    auto                dst_info = TensorInfo(TensorShape(8U, 6U, 1U), 1, DataType::F32);
*c217d954SCole Faust    const PadStrideInfo pad_info{};
*c217d954SCole Faust
*c217d954SCole Faust    winograd->configure(&src_info, &b_info, &w_info, &dst_info, pad_info);
*c217d954SCole Faust
*c217d954SCole Faust    // telhs are newly created every call of this lambda function
*c217d954SCole Faust    auto a = create_tensor<Tensor>(src_info);
*c217d954SCole Faust    auto b = create_tensor<Tensor>(b_info);
*c217d954SCole Faust    auto c = create_tensor<Tensor>(w_info);
*c217d954SCole Faust    a.allocator()->allocate();
*c217d954SCole Faust    b.allocator()->allocate();
*c217d954SCole Faust    c.allocator()->allocate();
*c217d954SCole Faust
*c217d954SCole Faust    ITensorPack run_pack{ { TensorType::ACL_SRC_0, &a }, { TensorType::ACL_SRC_1, &b }, { TensorType::ACL_SRC_2, &c } };
*c217d954SCole Faust    ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &b }, { TensorType::ACL_SRC_2, &c } };
*c217d954SCole Faust
*c217d954SCole Faust    auto mg       = MemoryGroup{};
*c217d954SCole Faust    auto ws       = manage_workspace<Tensor>(winograd->workspace(), mg, run_pack, prep_pack);
*c217d954SCole Faust    auto run_conv = [&]() -> Tensor
*c217d954SCole Faust    {
*c217d954SCole Faust        auto dst = create_tensor<Tensor>(dst_info);
*c217d954SCole Faust        dst.allocator()->allocate();
*c217d954SCole Faust
*c217d954SCole Faust        run_pack.add_tensor(TensorType::ACL_DST, &dst);
*c217d954SCole Faust        library->fill_tensor_value(Accessor(a), 1.f);
*c217d954SCole Faust        library->fill_tensor_value(Accessor(b), 2.f);
*c217d954SCole Faust        library->fill_tensor_value(Accessor(c), 3.f);
*c217d954SCole Faust
*c217d954SCole Faust        // This operator is configured once and captured by this lambda.
*c217d954SCole Faust        winograd->prepare(prep_pack);
*c217d954SCole Faust        winograd->run(run_pack);
*c217d954SCole Faust        return dst;
*c217d954SCole Faust    };
*c217d954SCole Faust
*c217d954SCole Faust    auto result_0 = run_conv();
*c217d954SCole Faust    auto result_1 = run_conv();
*c217d954SCole Faust
*c217d954SCole Faust    for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
*c217d954SCole Faust    {
*c217d954SCole Faust        ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
*c217d954SCole Faust    }
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole Faust/** Test case for memory injection in @ref NEWinogradConvolutionLayer.
*c217d954SCole Faust *
*c217d954SCole Faust * Make sure @ref NEWinogradConvolutionLayer still works through injecting the memory at configure time using the old API.
*c217d954SCole Faust *
*c217d954SCole Faust * Checks performed in order:
*c217d954SCole Faust * - Both runs compute the same output
*c217d954SCole Faust */
*c217d954SCole FaustTEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
*c217d954SCole Faust{
*c217d954SCole Faust    auto                gemm     = std::make_unique<NEWinogradConvolutionLayer>();
*c217d954SCole Faust    const auto          src_info = TensorInfo(TensorShape(8U, 8U, 32U), 1, DataType::F32);
*c217d954SCole Faust    const auto          w_info   = TensorInfo(TensorShape(1U), 1, DataType::F32);
*c217d954SCole Faust    const auto          b_info   = TensorInfo(TensorShape(1U, 3U, 32U, 1U), 1, DataType::F32);
*c217d954SCole Faust    auto                dst_info = TensorInfo(TensorShape(8U, 6U, 1U), 1, DataType::F32);
*c217d954SCole Faust    const PadStrideInfo pad_info{};
*c217d954SCole Faust
*c217d954SCole Faust    auto run_conv = [&]()
*c217d954SCole Faust    {
*c217d954SCole Faust        auto src = create_tensor<Tensor>(src_info);
*c217d954SCole Faust        auto w   = create_tensor<Tensor>(w_info);
*c217d954SCole Faust        auto b   = create_tensor<Tensor>(b_info);
*c217d954SCole Faust        auto dst = create_tensor<Tensor>(dst_info);
*c217d954SCole Faust
*c217d954SCole Faust        gemm->configure(&src, &b, &w, &dst, pad_info);
*c217d954SCole Faust
*c217d954SCole Faust        src.allocator()->allocate();
*c217d954SCole Faust        b.allocator()->allocate();
*c217d954SCole Faust        w.allocator()->allocate();
*c217d954SCole Faust        dst.allocator()->allocate();
*c217d954SCole Faust
*c217d954SCole Faust        library->fill_tensor_value(Accessor(src), 1.f);
*c217d954SCole Faust        library->fill_tensor_value(Accessor(b), 2.f);
*c217d954SCole Faust        library->fill_tensor_value(Accessor(w), 3.f);
*c217d954SCole Faust        gemm->run();
*c217d954SCole Faust        return dst;
*c217d954SCole Faust    };
*c217d954SCole Faust
*c217d954SCole Faust    auto result_0 = run_conv();
*c217d954SCole Faust    auto result_1 = run_conv();
*c217d954SCole Faust
*c217d954SCole Faust    for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
*c217d954SCole Faust    {
*c217d954SCole Faust        ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
*c217d954SCole Faust    }
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(FP32)
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(Conv1x3)
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
*c217d954SCole Faust                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(),
*c217d954SCole Faust                                               framework::dataset::make("DataType", { DataType::F32 })),
*c217d954SCole Faust                                       ActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, abs_tolerance_f32);
*c217d954SCole Faust}
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEWinogradConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT,
*c217d954SCole Faust                       combine(combine(combine(combine(combine(combine(combine(combine(
*c217d954SCole Faust                                                                                   framework::dataset::make("Input", TensorShape(8U, 8U, 32U)),
*c217d954SCole Faust                                                                                   framework::dataset::make("Weight", TensorShape(1U, 3U, 32U, 1U))),
*c217d954SCole Faust                                                                               framework::dataset::make("Bias", TensorShape(1U))),
*c217d954SCole Faust                                                                       framework::dataset::make("Output", TensorShape(8U, 6U, 1U))),
*c217d954SCole Faust                                                               framework::dataset::make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0))),
*c217d954SCole Faust                                                       framework::dataset::make("Dilation", Size2D(1U, 1U))),
*c217d954SCole Faust                                               framework::dataset::make("DataType", { DataType::F32 })),
*c217d954SCole Faust                                       ActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, abs_tolerance_f32);
*c217d954SCole Faust}
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
*c217d954SCole Faust                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(),
*c217d954SCole Faust                                               framework::dataset::make("DataType", { DataType::F32 })),
*c217d954SCole Faust                                       ActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE_END() // Conv1x3
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(Conv3x1)
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
*c217d954SCole Faust                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(),
*c217d954SCole Faust                                               framework::dataset::make("DataType", { DataType::F32 })),
*c217d954SCole Faust                                       ActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, abs_tolerance_f32);
*c217d954SCole Faust}
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
*c217d954SCole Faust                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(),
*c217d954SCole Faust                                               framework::dataset::make("DataType", { DataType::F32 })),
*c217d954SCole Faust                                       ActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE_END() // Conv3x1
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(Conv1x5)
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
*c217d954SCole Faust                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
*c217d954SCole Faust                                               framework::dataset::make("DataType", { DataType::F32 })),
*c217d954SCole Faust                                       ActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, abs_tolerance_f32);
*c217d954SCole Faust}
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
*c217d954SCole Faust                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(),
*c217d954SCole Faust                                               framework::dataset::make("DataType", { DataType::F32 })),
*c217d954SCole Faust                                       ActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE_END() // Conv1x5
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(Conv5x1)
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
*c217d954SCole Faust                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
*c217d954SCole Faust                                               framework::dataset::make("DataType", { DataType::F32 })),
*c217d954SCole Faust                                       ActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, abs_tolerance_f32);
*c217d954SCole Faust}
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
*c217d954SCole Faust                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(),
*c217d954SCole Faust                                               framework::dataset::make("DataType", { DataType::F32 })),
*c217d954SCole Faust                                       ActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE_END() // Conv5x1
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(Conv7x1)
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
*c217d954SCole Faust                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(),
*c217d954SCole Faust                                               framework::dataset::make("DataType", { DataType::F32 })),
*c217d954SCole Faust                                       ActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, abs_tolerance_f32);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
*c217d954SCole Faust                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(),
*c217d954SCole Faust                                               framework::dataset::make("DataType", { DataType::F32 })),
*c217d954SCole Faust                                       ActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
*c217d954SCole Faust}
*c217d954SCole FaustTEST_SUITE_END() // Conv7x1
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(Conv1x7)
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
*c217d954SCole Faust                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(),
*c217d954SCole Faust                                               framework::dataset::make("DataType", { DataType::F32 })),
*c217d954SCole Faust                                       ActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, abs_tolerance_f32);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
*c217d954SCole Faust                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(),
*c217d954SCole Faust                                               framework::dataset::make("DataType", { DataType::F32 })),
*c217d954SCole Faust                                       ActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
*c217d954SCole Faust}
*c217d954SCole FaustTEST_SUITE_END() // Conv1x7
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(Conv3x3)
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
*c217d954SCole Faust                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
*c217d954SCole Faust                                               framework::dataset::make("DataType", { DataType::F32 })),
*c217d954SCole Faust                                       ActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
*c217d954SCole Faust
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, abs_tolerance_f32);
*c217d954SCole Faust}
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
*c217d954SCole Faust                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
*c217d954SCole Faust                                               framework::dataset::make("DataType", { DataType::F32 })),
*c217d954SCole Faust                                       ActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
*c217d954SCole Faust
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    // floating point arithmetic the Winograd results will not be exactly the same as direct convolution, especially for big shapes
*c217d954SCole Faust    validate(Accessor(_target), _reference, rel_tolerance_winograd_3x3_f32, 0.f, float(abs_tolerance_f32));
*c217d954SCole Faust}
*c217d954SCole FaustTEST_SUITE_END() // Conv3x3
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(Conv5x5)
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
*c217d954SCole Faust                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(),
*c217d954SCole Faust                                               framework::dataset::make("DataType", { DataType::F32 })),
*c217d954SCole Faust                                       ActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
*c217d954SCole Faust
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, abs_tolerance_f32);
*c217d954SCole Faust}
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
*c217d954SCole Faust                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(),
*c217d954SCole Faust                                               framework::dataset::make("DataType", { DataType::F32 })),
*c217d954SCole Faust                                       ActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
*c217d954SCole Faust
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, abs_tolerance_f32);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE_END() // Conv5x5
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmallNoBias, NEWinogradConvolutionLayerNoBiasFixture<float>, framework::DatasetMode::PRECOMMIT,
*c217d954SCole Faust                       combine(combine(combine(framework::dataset::concat(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
*c217d954SCole Faust                                                                          datasets::SmallWinogradConvolutionLayer5x5Dataset()),
*c217d954SCole Faust                                               framework::dataset::make("DataType", { DataType::F32 })),
*c217d954SCole Faust                                       ActivationFunctionsDataset),
*c217d954SCole Faust
*c217d954SCole Faust                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, abs_tolerance_f32);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE_END() // FP32
*c217d954SCole Faust
*c217d954SCole Faust#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
*c217d954SCole FaustTEST_SUITE(FP16)
*c217d954SCole Faustusing CLWinogradConvolutionLayerFastMathFixture16 = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, half, float>;
*c217d954SCole Faust
*c217d954SCole FaustDATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
*c217d954SCole Faust                                          framework::dataset::make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16),
*c217d954SCole Faust                                                                                  TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16)
*c217d954SCole Faust                                          }),
*c217d954SCole Faust                                          framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F16),
*c217d954SCole Faust                                                                                    TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F16)
*c217d954SCole Faust                                          })),
*c217d954SCole Faust                                          framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32),
*c217d954SCole Faust                                                                                   TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F16)
*c217d954SCole Faust                                          })),
*c217d954SCole Faust                                          framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
*c217d954SCole Faust                                                                                 PadStrideInfo(1, 1, 0, 0)
*c217d954SCole Faust                                          })),
*c217d954SCole Faust                                          framework::dataset::make("FastMath", { false, // case fp16 and fast_math False then disable Winograd
*c217d954SCole Faust                                                                                 true   // case fp16 and fast_math True then enable Winograd
*c217d954SCole Faust                                          })),
*c217d954SCole Faust                                                                           framework::dataset::make("Expected", { ConvolutionMethod::GEMM, ConvolutionMethod::WINOGRAD })),
*c217d954SCole Faust               input_info, weights_info, output_info, conv_info, fast_math, expected)
*c217d954SCole Faust{
*c217d954SCole Faust    ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(true),
*c217d954SCole Faust                                                                            &weights_info.clone()->set_is_resizable(true),
*c217d954SCole Faust                                                                            &output_info.clone()->set_is_resizable(true), conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math);
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(Conv3x3)
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
*c217d954SCole Faust                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
*c217d954SCole Faust                                               framework::dataset::make("DataType", { DataType::F16 })),
*c217d954SCole Faust                                       ActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
*c217d954SCole Faust
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
*c217d954SCole Faust                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
*c217d954SCole Faust                                               framework::dataset::make("DataType", { DataType::F16 })),
*c217d954SCole Faust                                       ActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
*c217d954SCole Faust
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
*c217d954SCole Faust}
*c217d954SCole FaustTEST_SUITE_END() // Conv3x3
*c217d954SCole FaustTEST_SUITE_END() // FP16
*c217d954SCole Faust#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
*c217d954SCole FaustTEST_SUITE_END() // WinogradLayer
*c217d954SCole Faust
*c217d954SCole Faust#ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
*c217d954SCole FaustTEST_SUITE(FIXED_FORMAT_KERNELS)
*c217d954SCole FaustTEST_SUITE(VariableWeightUtils)
*c217d954SCole Faust
*c217d954SCole Faust// UC2_1_* tests: the user requests a specific fixed format, but there is no kernel that supports it.
*c217d954SCole Faust
*c217d954SCole Fausttemplate <typename ConvolutionClass>
*c217d954SCole Faustusing HasOptImplFixtureNoFastMath = HasOptImplFixture<ConvolutionClass, /*enable_fast_math*/ false>;
*c217d954SCole Faust
*c217d954SCole Fausttemplate <typename ConvolutionClass>
*c217d954SCole Faustusing HasOptImplFixtureFastMath = HasOptImplFixture<ConvolutionClass, /*enable_fast_math*/ true>;
*c217d954SCole Faust
*c217d954SCole Faust// UC2_1
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(UC2_1_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(framework::dataset::make("DataType", { DataType::F32 }),
*c217d954SCole Faust                               framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 })))
*c217d954SCole Faust{
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
*c217d954SCole Faust}
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(UC2_1_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(framework::dataset::make("DataType", { DataType::F32 }),
*c217d954SCole Faust                               framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 })))
*c217d954SCole Faust{
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(UC2_1_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(framework::dataset::make("DataType", { DataType::F32 }),
*c217d954SCole Faust                               framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 })))
*c217d954SCole Faust{
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(UC2_1_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(framework::dataset::make("DataType", { DataType::F32 }),
*c217d954SCole Faust                               framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 })))
*c217d954SCole Faust{
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole Faust// UC2_2_* tests: the user requests a specific fixed format, and a
*c217d954SCole Faust// kernel that support that fixed format is found.
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(framework::dataset::make("DataType", { DataType::F32 }),
*c217d954SCole Faust                               framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo4 })))
*c217d954SCole Faust{
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo4, framework::LogLevel::ERRORS);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(framework::dataset::make("DataType", { DataType::F32 }),
*c217d954SCole Faust                               framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo4 })))
*c217d954SCole Faust{
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo4, framework::LogLevel::ERRORS);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole Faust#if defined(ARM_COMPUTE_ENABLE_BF16)
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(framework::dataset::make("DataType", { DataType::F32 }),
*c217d954SCole Faust                               framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo8i4_bf16 })))
*c217d954SCole Faust{
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
*c217d954SCole Faust    ARM_COMPUTE_EXPECT_EQUAL(_computed_weight_format, arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(framework::dataset::make("DataType", { DataType::F32 }),
*c217d954SCole Faust                               framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo8i4_bf16 })))
*c217d954SCole Faust{
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole Faust#endif // ARM_COMPUTE_ENABLE_BF16
*c217d954SCole Faust
*c217d954SCole Faust// UC3_1_* tests: the user queries for ANY fixed format, but there is
*c217d954SCole Faust// no kernel that support the use case specified by the user (for
*c217d954SCole Faust// example, there is no fixed format kernel for the datatype of the
*c217d954SCole Faust// problem).
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(UC3_1_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(framework::dataset::make("DataType", { DataType::S32 }),
*c217d954SCole Faust                               framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
*c217d954SCole Faust{
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(UC3_1_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(framework::dataset::make("DataType", { DataType::S32 }),
*c217d954SCole Faust                               framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
*c217d954SCole Faust{
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(UC3_1_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(framework::dataset::make("DataType", { DataType::S32 }),
*c217d954SCole Faust                               framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
*c217d954SCole Faust{
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(UC3_1_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(framework::dataset::make("DataType", { DataType::S32 }),
*c217d954SCole Faust                               framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
*c217d954SCole Faust{
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole Faust// UC3_2_* tests: the user queries for ANY fixed format. The search
*c217d954SCole Faust// succeeded and the fixed format found is prompted back for
*c217d954SCole Faust// consumption by the user. Note that we just test the
*c217d954SCole Faust// _computed_weight_format to be anything but not the formats that are
*c217d954SCole Faust// not fixed formats (ANY and UNSPECIFIED). This is because the weight
*c217d954SCole Faust// format that the runtime produces depends on the size of the vector
*c217d954SCole Faust// units of the hardware where the tests is executed. For example, a
*c217d954SCole Faust// format like OHWIo4 for FP32 data returned for 128-bit NEON hardware
*c217d954SCole Faust// is replaced by OHWIo8 when running on 256-bit SVE.
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(UC3_2_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(framework::dataset::make("DataType", { DataType::F32 }),
*c217d954SCole Faust                               framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
*c217d954SCole Faust{
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS);
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(framework::dataset::make("DataType", { DataType::F32 }),
*c217d954SCole Faust                               framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
*c217d954SCole Faust{
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS);
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole Faust#if defined(ARM_COMPUTE_ENABLE_BF16)
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(UC3_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(framework::dataset::make("DataType", { DataType::F32 }),
*c217d954SCole Faust                               framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
*c217d954SCole Faust{
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS);
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS);
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(framework::dataset::make("DataType", { DataType::F32 }),
*c217d954SCole Faust                               framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
*c217d954SCole Faust{
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS);
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS);
*c217d954SCole Faust    ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole Faust#endif // ARM_COMPUTE_ENABLE_BF16
*c217d954SCole Faust
*c217d954SCole Faustnamespace
*c217d954SCole Faust{
*c217d954SCole Faustusing TestCaseType          = std::tuple<TensorShape, TensorShape, arm_compute::WeightFormat>;
*c217d954SCole Faustauto prepare_weights_shapes = framework::dataset::make("TensorShape",
*c217d954SCole Faust{
*c217d954SCole Faust    // OHWIo<interleave_by>i<block_by>
*c217d954SCole Faust    //
*c217d954SCole Faust    // OHWI --> O'HWI', where:
*c217d954SCole Faust    //
*c217d954SCole Faust    //   O'= smallest multiple of <interleave_by> such that O<=O'
*c217d954SCole Faust    //   I'= smallest multiple of <block_by> such that I<=I'
*c217d954SCole Faust    //
*c217d954SCole Faust
*c217d954SCole Faust    // Change N for OHWIo4
*c217d954SCole Faust    TestCaseType({ { 1U, 1U, 1U, 1U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust    TestCaseType({ { 1U, 1U, 1U, 2U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust    TestCaseType({ { 1U, 1U, 1U, 3U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust    TestCaseType({ { 1U, 1U, 1U, 4U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust    TestCaseType({ { 1U, 1U, 1U, 5U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust    TestCaseType({ { 1U, 1U, 1U, 6U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust    TestCaseType({ { 1U, 1U, 1U, 7U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust    TestCaseType({ { 1U, 1U, 1U, 8U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust    TestCaseType({ { 1U, 1U, 1U, 9U }, { 1U, 1U, 1U, 12U }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust    // // Change N for OHWIo8
*c217d954SCole Faust    TestCaseType({ { 1U, 1U, 1U, 1U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
*c217d954SCole Faust    TestCaseType({ { 1U, 1U, 1U, 2U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
*c217d954SCole Faust    TestCaseType({ { 1U, 1U, 1U, 3U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
*c217d954SCole Faust    TestCaseType({ { 1U, 1U, 1U, 4U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
*c217d954SCole Faust    TestCaseType({ { 1U, 1U, 1U, 5U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
*c217d954SCole Faust    TestCaseType({ { 1U, 1U, 1U, 6U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
*c217d954SCole Faust    TestCaseType({ { 1U, 1U, 1U, 7U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
*c217d954SCole Faust    TestCaseType({ { 1U, 1U, 1U, 8U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
*c217d954SCole Faust    TestCaseType({ { 1U, 1U, 1U, 9U }, { 1U, 1U, 1U, 16U }, arm_compute::WeightFormat::OHWIo8 }),
*c217d954SCole Faust    // // Change N for OHWIo4 when H, W and C are not 1
*c217d954SCole Faust    TestCaseType({ { 3U, 4U, 2U, 1U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust    TestCaseType({ { 3U, 4U, 2U, 2U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust    TestCaseType({ { 3U, 4U, 2U, 3U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust    TestCaseType({ { 3U, 4U, 2U, 4U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust    TestCaseType({ { 3U, 4U, 2U, 5U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust    TestCaseType({ { 3U, 4U, 2U, 6U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust    TestCaseType({ { 3U, 4U, 2U, 7U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust    TestCaseType({ { 3U, 4U, 2U, 8U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust    TestCaseType({ { 3U, 4U, 2U, 9U }, { 3, 4, 2, 12 }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust
*c217d954SCole Faust    // // Fix N and move HWI around, with different data layouts and formats
*c217d954SCole Faust    TestCaseType({ { 2U, 4U, 3U, 5U }, { 2, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust    TestCaseType({ { 3U, 4U, 2U, 5U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust    TestCaseType({ { 2U, 4U, 3U, 9U }, { 2, 4, 3, 16 }, arm_compute::WeightFormat::OHWIo8 }),
*c217d954SCole Faust    TestCaseType({ { 3U, 4U, 2U, 9U }, { 3, 4, 2, 16 }, arm_compute::WeightFormat::OHWIo8 }),
*c217d954SCole Faust    TestCaseType({ { 1024U, 1U, 1U, 1001U }, { 1024, 1, 1, 1008 }, arm_compute::WeightFormat::OHWIo8 }),
*c217d954SCole Faust
*c217d954SCole Faust    // // Adding <block_by> on I (=C)
*c217d954SCole Faust    TestCaseType({ { 1U, 4U, 3U, 5U }, { 2, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4i2 }),
*c217d954SCole Faust    TestCaseType({ { 2U, 4U, 3U, 5U }, { 2, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4i2 }),
*c217d954SCole Faust    TestCaseType({ { 3U, 4U, 3U, 5U }, { 4, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4i2 }),
*c217d954SCole Faust
*c217d954SCole Faust    // ---------
*c217d954SCole Faust    TestCaseType({ { 2, 2, 1, 5 }, { 2, 2, 1, 8 }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust    TestCaseType({ { 1, 2, 2, 5 }, { 1, 2, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
*c217d954SCole Faust
*c217d954SCole Faust});
*c217d954SCole Faust} // unnamed namespace
*c217d954SCole Faust
*c217d954SCole FaustDATA_TEST_CASE(PrepareWeightShape, framework::DatasetMode::ALL,
*c217d954SCole Faust               prepare_weights_shapes, shapes)
*c217d954SCole Faust{
*c217d954SCole Faust    const TensorShape               input_shape    = std::get<0>(shapes);
*c217d954SCole Faust    const TensorShape               expected_shape = std::get<1>(shapes);
*c217d954SCole Faust    const arm_compute::WeightFormat wf             = std::get<2>(shapes);
*c217d954SCole Faust    const DataType                  DT             = DataType::F32;
*c217d954SCole Faust    const DataLayout                DL             = DataLayout::NHWC;
*c217d954SCole Faust    const auto                      TI             = TensorInfo(input_shape, 1 /*num_channels, deprecated*/, DT, DL);
*c217d954SCole Faust    const TensorInfo                computed_info  = ::arm_compute::test::validation::prepare_weights(TI, wf);
*c217d954SCole Faust    ARM_COMPUTE_EXPECT_EQUAL(computed_info.tensor_shape(), expected_shape, framework::LogLevel::ERRORS);
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE_END() // VariableWeightUtils
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(ExperimentalCpuAPIVariableWeightWithFixtures)
*c217d954SCole Faust
*c217d954SCole Fausttemplate <typename ScalarType>
*c217d954SCole Faustusing VarWidth = VariableWeightsFixture<cpu::CpuGemmConv2d, Tensor, Accessor, ScalarType, /*enable_fast_math*/ false>;
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmallFloat, VarWidth<float>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(combine(datasets::SmallConvolutionLayerDataset(),
*c217d954SCole Faust                                       framework::dataset::make("DataLayout", { DataLayout::NHWC })),
*c217d954SCole Faust                               framework::dataset::make("ACL Scalar type", { DataType::F32 })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmallHalf, VarWidth<half>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(combine(datasets::SmallConvolutionLayerDataset(),
*c217d954SCole Faust                                       framework::dataset::make("DataLayout", { DataLayout::NHWC })),
*c217d954SCole Faust                               framework::dataset::make("ACL Scalar type", { DataType::F16 })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16));
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole Faust#if defined(ARM_COMPUTE_ENABLE_BF16)
*c217d954SCole Fausttemplate <typename ScalarType>
*c217d954SCole Faustusing VarWidthFastMath = VariableWeightsFixture<cpu::CpuGemmConv2d, Tensor, Accessor, ScalarType, /*enable_fast_math*/ true>;
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmallFloatFastMath, VarWidthFastMath<float>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(combine(datasets::SmallConvolutionLayerDataset(),
*c217d954SCole Faust                                       framework::dataset::make("DataLayout", { DataLayout::NHWC })),
*c217d954SCole Faust                               framework::dataset::make("ACL Scalar type", { DataType::F32 })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
*c217d954SCole Faust}
*c217d954SCole Faust#endif // ARM_COMPUTE_ENABLE_BF16
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE_END() // ExperimentalCpuAPIVariableWeightWithFixtures
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(ExperimentalNEAPIVariableWeightWithFixtures)
*c217d954SCole Faust
*c217d954SCole Fausttemplate <typename ScalarType>
*c217d954SCole Faustusing NEGEMMVarWidth = VariableWeightsFixtureNEInterface<NEGEMMConvolutionLayer, Tensor, Accessor, ScalarType, /*enable_fast_math*/ false>;
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(NEGEMMRunSmallFloat, NEGEMMVarWidth<float>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(combine(datasets::SmallConvolutionLayerDataset(),
*c217d954SCole Faust                                       framework::dataset::make("DataLayout", { DataLayout::NHWC })),
*c217d954SCole Faust                               framework::dataset::make("ACL Scalar type", { DataType::F32 })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(NEGEMMRunSmallHalf, NEGEMMVarWidth<half>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(combine(datasets::SmallConvolutionLayerDataset(),
*c217d954SCole Faust                                       framework::dataset::make("DataLayout", { DataLayout::NHWC })),
*c217d954SCole Faust                               framework::dataset::make("ACL Scalar type", { DataType::F16 })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16));
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole Faust#if defined(ARM_COMPUTE_ENABLE_BF16)
*c217d954SCole Fausttemplate <typename ScalarType>
*c217d954SCole Faustusing NEGEMMVarWidthFastMath = VariableWeightsFixtureNEInterface<NEGEMMConvolutionLayer, Tensor, Accessor, ScalarType, /*enable_fast_math*/ true>;
*c217d954SCole Faust
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(NEGEMMRunSmallFloatFastMath, NEGEMMVarWidthFastMath<float>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(combine(datasets::SmallConvolutionLayerDataset(),
*c217d954SCole Faust                                       framework::dataset::make("DataLayout", { DataLayout::NHWC })),
*c217d954SCole Faust                               framework::dataset::make("ACL Scalar type", { DataType::F32 })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
*c217d954SCole Faust}
*c217d954SCole Faust#endif // ARM_COMPUTE_ENABLE_BF16
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE_END() // ExperimentalNEAPIVariableWeightWithFixtures
*c217d954SCole FaustTEST_SUITE_END() // FIXED_FORMAT_KERNELS
*c217d954SCole Faust
*c217d954SCole Faust#endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(GEMMConvolutionLayer)
*c217d954SCole Fausttemplate <typename T>
*c217d954SCole Faustusing NEGEMMConvolutionLayerFixture = ConvolutionValidationFixture<Tensor, Accessor, NEConvolutionLayer, T>;
*c217d954SCole Fausttemplate <typename T>
*c217d954SCole Faustusing NEGEMMConvolutionLayerMixedDataLayoutFixture = ConvolutionValidationFixture<Tensor, Accessor, NEConvolutionLayer, T, true>;
*c217d954SCole Faust
*c217d954SCole Faust/** Test case for memory injection in @ref cpu::CpuGemmConv2d.
*c217d954SCole Faust *
*c217d954SCole Faust * Configure the operator once and inject memory at run-time in multiple executions.
*c217d954SCole Faust *
*c217d954SCole Faust * Checks performed in order:
*c217d954SCole Faust * - Both runs compute the same output
*c217d954SCole Faust */
*c217d954SCole FaustTEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
*c217d954SCole Faust{
*c217d954SCole Faust    auto        conv        = std::make_unique<cpu::CpuGemmConv2d>();
*c217d954SCole Faust    const auto  src_info    = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NCHW);
*c217d954SCole Faust    const auto  weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NCHW);
*c217d954SCole Faust    const auto  bias_info   = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NCHW);
*c217d954SCole Faust    auto        dst_info    = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NCHW);
*c217d954SCole Faust    const auto  conv_info   = PadStrideInfo(1, 1, 0, 0, 2, 2, DimensionRoundingType::FLOOR);
*c217d954SCole Faust    WeightsInfo weights_info(false, 3U, 3U, 1U);
*c217d954SCole Faust    conv->configure(&src_info, &weight_info, &bias_info, &dst_info, conv_info, weights_info);
*c217d954SCole Faust
*c217d954SCole Faust    // tensors are newly created every call of this lambda function
*c217d954SCole Faust    auto src    = create_tensor<Tensor>(src_info);
*c217d954SCole Faust    auto weight = create_tensor<Tensor>(weight_info);
*c217d954SCole Faust    auto bias   = create_tensor<Tensor>(bias_info);
*c217d954SCole Faust    src.allocator()->allocate();
*c217d954SCole Faust    weight.allocator()->allocate();
*c217d954SCole Faust    bias.allocator()->allocate();
*c217d954SCole Faust
*c217d954SCole Faust    ITensorPack run_pack{ { TensorType::ACL_SRC_0, &src }, { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
*c217d954SCole Faust    ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
*c217d954SCole Faust
*c217d954SCole Faust    auto mg = MemoryGroup{};
*c217d954SCole Faust    auto ws = manage_workspace<Tensor>(conv->workspace(), mg, run_pack, prep_pack);
*c217d954SCole Faust
*c217d954SCole Faust    auto run_conv = [&]() -> Tensor
*c217d954SCole Faust    {
*c217d954SCole Faust        auto dst = create_tensor<Tensor>(dst_info);
*c217d954SCole Faust        dst.allocator()->allocate();
*c217d954SCole Faust        run_pack.add_tensor(TensorType::ACL_DST, &dst);
*c217d954SCole Faust
*c217d954SCole Faust        library->fill_tensor_value(Accessor(src), 1.f);
*c217d954SCole Faust        library->fill_tensor_value(Accessor(weight), 2.f);
*c217d954SCole Faust        library->fill_tensor_value(Accessor(bias), 3.f);
*c217d954SCole Faust        // This operator is configured once and captured by this lambda.
*c217d954SCole Faust        conv->prepare(prep_pack);
*c217d954SCole Faust        conv->run(run_pack);
*c217d954SCole Faust        return dst;
*c217d954SCole Faust    };
*c217d954SCole Faust    auto result_0 = run_conv();
*c217d954SCole Faust    auto result_1 = run_conv();
*c217d954SCole Faust    for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
*c217d954SCole Faust    {
*c217d954SCole Faust        ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
*c217d954SCole Faust    }
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole Faust/** Test case for memory injection in @ref NEGEMMConvolutionLayer.
*c217d954SCole Faust *
*c217d954SCole Faust * Make sure @ref NEGEMMConvolutionLayer still works through injecting the memory at configure time using the old API.
*c217d954SCole Faust *
*c217d954SCole Faust * Checks performed in order:
*c217d954SCole Faust * - Both runs compute the same output
*c217d954SCole Faust */
*c217d954SCole FaustTEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
*c217d954SCole Faust{
*c217d954SCole Faust    auto        conv        = std::make_unique<NEGEMMConvolutionLayer>();
*c217d954SCole Faust    const auto  src_info    = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NCHW);
*c217d954SCole Faust    const auto  weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NCHW);
*c217d954SCole Faust    const auto  bias_info   = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NCHW);
*c217d954SCole Faust    auto        dst_info    = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NCHW);
*c217d954SCole Faust    const auto  conv_info   = PadStrideInfo(1, 1, 0, 0, 2, 2, DimensionRoundingType::FLOOR);
*c217d954SCole Faust    WeightsInfo weights_info(false, 3U, 3U, 1U);
*c217d954SCole Faust    auto        run_conv = [&]()
*c217d954SCole Faust    {
*c217d954SCole Faust        auto src    = create_tensor<Tensor>(src_info);
*c217d954SCole Faust        auto weight = create_tensor<Tensor>(weight_info);
*c217d954SCole Faust        auto bias   = create_tensor<Tensor>(bias_info);
*c217d954SCole Faust        auto dst    = create_tensor<Tensor>(dst_info);
*c217d954SCole Faust        conv->configure(&src, &weight, &bias, &dst, conv_info, weights_info);
*c217d954SCole Faust        src.allocator()->allocate();
*c217d954SCole Faust        weight.allocator()->allocate();
*c217d954SCole Faust        bias.allocator()->allocate();
*c217d954SCole Faust        dst.allocator()->allocate();
*c217d954SCole Faust        library->fill_tensor_value(Accessor(src), 1.f);
*c217d954SCole Faust        library->fill_tensor_value(Accessor(weight), 2.f);
*c217d954SCole Faust        library->fill_tensor_value(Accessor(bias), 3.f);
*c217d954SCole Faust        conv->run();
*c217d954SCole Faust        return dst;
*c217d954SCole Faust    };
*c217d954SCole Faust    auto result_0 = run_conv();
*c217d954SCole Faust    auto result_1 = run_conv();
*c217d954SCole Faust    for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
*c217d954SCole Faust    {
*c217d954SCole Faust        ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
*c217d954SCole Faust    }
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(Float)
*c217d954SCole Faust#if defined(ARM_COMPUTE_ENABLE_BF16)
*c217d954SCole FaustTEST_SUITE(BFLOAT16)
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
*c217d954SCole Faust                                                                                                                    framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::BFLOAT16)), framework::dataset::make("DataLayout", { DataLayout::NHWC })),
*c217d954SCole Faust                                                                                                            ActivationFunctionsDataset))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
*c217d954SCole Faust}
*c217d954SCole FaustTEST_SUITE_END() // BFLOAT16
*c217d954SCole Faust#endif           /* defined(ARM_COMPUTE_ENABLE_BF16) */
*c217d954SCole Faust
*c217d954SCole Faust#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
*c217d954SCole FaustTEST_SUITE(FP16)
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
*c217d954SCole Faust                                                                                                                   framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW })), ActivationFunctionsDataset))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
*c217d954SCole Faust}
*c217d954SCole FaustTEST_SUITE_END() // FP16
*c217d954SCole Faust#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(FP32)
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
*c217d954SCole Faust                                                                                                                    framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
*c217d954SCole Faust                                                                                                            ActivationFunctionsDataset))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
*c217d954SCole Faust}
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEGEMMConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(combine(combine(combine(combine(combine(combine(combine(combine(
*c217d954SCole Faust                                                                                           framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
*c217d954SCole Faust                                                                                           framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
*c217d954SCole Faust                                                                                       framework::dataset::make("Bias", TensorShape(2U))),
*c217d954SCole Faust                                                                               framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
*c217d954SCole Faust                                                                       framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
*c217d954SCole Faust                                                               framework::dataset::make("Dilation", Size2D(1, 1))),
*c217d954SCole Faust                                                       framework::dataset::make("ReshapeWeights", { true })),
*c217d954SCole Faust                                               framework::dataset::make("DataType", DataType::F32)),
*c217d954SCole Faust                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
*c217d954SCole Faust                               ActivationFunctionsDataset))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
*c217d954SCole Faust}
*c217d954SCole FaustTEST_SUITE_END() // FP32
*c217d954SCole FaustTEST_SUITE_END() // Float
*c217d954SCole Faust
*c217d954SCole Fausttemplate <typename T>
*c217d954SCole Faustusing NEGEMMConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEConvolutionLayer, T>;
*c217d954SCole Fausttemplate <typename T>
*c217d954SCole Faustusing NEGEMMConvolutionLayerQuantizedMixedDataLayoutFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEConvolutionLayer, T, true>;
*c217d954SCole Faust
*c217d954SCole Fausttemplate <typename T>
*c217d954SCole Faustusing NEGEMMConvolutionLayerQuantizedPerChannelFixture = ConvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEConvolutionLayer, T, int8_t>;
*c217d954SCole Faust
*c217d954SCole Faustconst auto QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
*c217d954SCole Faust{
*c217d954SCole Faust    ActivationLayerInfo(),
*c217d954SCole Faust    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
*c217d954SCole Faust    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)
*c217d954SCole Faust});
*c217d954SCole FaustTEST_SUITE(Quantized)
*c217d954SCole FaustTEST_SUITE(QASYMM8)
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
*c217d954SCole Faust                                                                                                                       framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
*c217d954SCole Faust                                                                                                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), QuantizedActivationFunctionsDataset))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, tolerance_qasymm8);
*c217d954SCole Faust}
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
*c217d954SCole Faust                                                                                                   framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
*c217d954SCole Faust                                                                                                   framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
*c217d954SCole Faust                                                                                               framework::dataset::make("Bias", TensorShape(2U))),
*c217d954SCole Faust                                                                                       framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
*c217d954SCole Faust                                                                               framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
*c217d954SCole Faust                                                                       framework::dataset::make("Dilation", Size2D(1, 1))),
*c217d954SCole Faust                                                               framework::dataset::make("ReshapeWeights", { true })),
*c217d954SCole Faust                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
*c217d954SCole Faust                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
*c217d954SCole Faust                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
*c217d954SCole Faust                               QuantizedActivationFunctionsDataset))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, tolerance_qasymm8);
*c217d954SCole Faust}
*c217d954SCole FaustTEST_SUITE_END() // QASYMM8
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(QASYMM8_SIGNED)
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
*c217d954SCole Faust                                                                                                                      framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
*c217d954SCole Faust                                                                                                                      framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), QuantizedActivationFunctionsDataset))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, tolerance_qasymm8);
*c217d954SCole Faust}
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEGEMMConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
*c217d954SCole Faust                                                                                                   framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
*c217d954SCole Faust                                                                                                   framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
*c217d954SCole Faust                                                                                               framework::dataset::make("Bias", TensorShape(2U))),
*c217d954SCole Faust                                                                                       framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
*c217d954SCole Faust                                                                               framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
*c217d954SCole Faust                                                                       framework::dataset::make("Dilation", Size2D(1, 1))),
*c217d954SCole Faust                                                               framework::dataset::make("ReshapeWeights", { true })),
*c217d954SCole Faust                                                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
*c217d954SCole Faust                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
*c217d954SCole Faust                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
*c217d954SCole Faust                               QuantizedActivationFunctionsDataset))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, tolerance_qasymm8);
*c217d954SCole Faust}
*c217d954SCole FaustTEST_SUITE_END() // QASYMM8_SIGNED
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(QSYMM8_PER_CHANNEL)
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
*c217d954SCole Faust                                                                       framework::dataset::make("ReshapeWeights", { true })),
*c217d954SCole Faust                                                               framework::dataset::make("DataType", { DataType::QASYMM8 })),
*c217d954SCole Faust                                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
*c217d954SCole Faust                                               QuantizationData),
*c217d954SCole Faust                                       QuantizedActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, tolerance_qasymm8);
*c217d954SCole Faust}
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmallSigned, NEGEMMConvolutionLayerQuantizedPerChannelFixture<int8_t>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
*c217d954SCole Faust                                                                       framework::dataset::make("ReshapeWeights", { true })),
*c217d954SCole Faust                                                               framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })),
*c217d954SCole Faust                                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
*c217d954SCole Faust                                               QuantizationData),
*c217d954SCole Faust                                       QuantizedActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, tolerance_qasymm8);
*c217d954SCole Faust}
*c217d954SCole FaustTEST_SUITE_END() // QSYMM8_PER_CHANNEL
*c217d954SCole FaustTEST_SUITE_END() // Quantized
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE_END() // GEMMConvolutionLayer
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(DirectGEMMConv2d)
*c217d954SCole Fausttemplate <typename T>
*c217d954SCole Faustusing NEDirectGEMMConv2dLayerFixture = ConvolutionValidationFixture<Tensor, Accessor, NEGEMMConv2d, T>;
*c217d954SCole Faust
*c217d954SCole Faust/** Test case for memory injection in @ref cpu::CpuGemmDirectConv2d.
*c217d954SCole Faust *
*c217d954SCole Faust * Configure the operator once and inject memory at run-time in multiple executions.
*c217d954SCole Faust *
*c217d954SCole Faust * Checks performed in order:
*c217d954SCole Faust * - Both runs compute the same output
*c217d954SCole Faust */
*c217d954SCole FaustTEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
*c217d954SCole Faust{
*c217d954SCole Faust    auto       conv        = std::make_unique<cpu::CpuGemmDirectConv2d>();
*c217d954SCole Faust    const auto src_info    = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NHWC);
*c217d954SCole Faust    const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NHWC);
*c217d954SCole Faust    const auto bias_info   = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NHWC);
*c217d954SCole Faust    auto       dst_info    = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NHWC);
*c217d954SCole Faust    const auto conv_info   = Conv2dInfo{};
*c217d954SCole Faust    conv->configure(&src_info, &weight_info, &bias_info, &dst_info, conv_info);
*c217d954SCole Faust
*c217d954SCole Faust    // tensors are newly created every call of this lambda function
*c217d954SCole Faust    auto src    = create_tensor<Tensor>(src_info);
*c217d954SCole Faust    auto weight = create_tensor<Tensor>(weight_info);
*c217d954SCole Faust    auto bias   = create_tensor<Tensor>(bias_info);
*c217d954SCole Faust    src.allocator()->allocate();
*c217d954SCole Faust    weight.allocator()->allocate();
*c217d954SCole Faust    bias.allocator()->allocate();
*c217d954SCole Faust
*c217d954SCole Faust    ITensorPack run_pack{ { TensorType::ACL_SRC_0, &src }, { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
*c217d954SCole Faust    ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
*c217d954SCole Faust
*c217d954SCole Faust    auto mg = MemoryGroup{};
*c217d954SCole Faust    auto ws = manage_workspace<Tensor>(conv->workspace(), mg, run_pack, prep_pack);
*c217d954SCole Faust
*c217d954SCole Faust    auto run_conv = [&]() -> Tensor
*c217d954SCole Faust    {
*c217d954SCole Faust        auto dst = create_tensor<Tensor>(dst_info);
*c217d954SCole Faust        dst.allocator()->allocate();
*c217d954SCole Faust        run_pack.add_tensor(TensorType::ACL_DST, &dst);
*c217d954SCole Faust
*c217d954SCole Faust        library->fill_tensor_value(Accessor(src), 1.f);
*c217d954SCole Faust        library->fill_tensor_value(Accessor(weight), 2.f);
*c217d954SCole Faust        library->fill_tensor_value(Accessor(bias), 3.f);
*c217d954SCole Faust        // This operator is configured once and captured by this lambda.
*c217d954SCole Faust        conv->prepare(prep_pack);
*c217d954SCole Faust        conv->run(run_pack);
*c217d954SCole Faust        return dst;
*c217d954SCole Faust    };
*c217d954SCole Faust    auto result_0 = run_conv();
*c217d954SCole Faust    auto result_1 = run_conv();
*c217d954SCole Faust    for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
*c217d954SCole Faust    {
*c217d954SCole Faust        ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
*c217d954SCole Faust    }
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole Faust/** Test case for memory injection in @ref NEGEMMConv2d.
*c217d954SCole Faust *
*c217d954SCole Faust * Make sure @ref NEGEMMConv2d still works through injecting the memory at configure time using the old API.
*c217d954SCole Faust *
*c217d954SCole Faust * Checks performed in order:
*c217d954SCole Faust * - Both runs compute the same output
*c217d954SCole Faust */
*c217d954SCole FaustTEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
*c217d954SCole Faust{
*c217d954SCole Faust    auto       conv        = std::make_unique<NEGEMMConv2d>();
*c217d954SCole Faust    const auto src_info    = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NHWC);
*c217d954SCole Faust    const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NHWC);
*c217d954SCole Faust    const auto bias_info   = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NHWC);
*c217d954SCole Faust    auto       dst_info    = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NHWC);
*c217d954SCole Faust    const auto conv_info   = Conv2dInfo{};
*c217d954SCole Faust    auto       run_conv    = [&]()
*c217d954SCole Faust    {
*c217d954SCole Faust        auto src    = create_tensor<Tensor>(src_info);
*c217d954SCole Faust        auto weight = create_tensor<Tensor>(weight_info);
*c217d954SCole Faust        auto bias   = create_tensor<Tensor>(bias_info);
*c217d954SCole Faust        auto dst    = create_tensor<Tensor>(dst_info);
*c217d954SCole Faust        conv->configure(&src, &weight, &bias, &dst, conv_info);
*c217d954SCole Faust        src.allocator()->allocate();
*c217d954SCole Faust        weight.allocator()->allocate();
*c217d954SCole Faust        bias.allocator()->allocate();
*c217d954SCole Faust        dst.allocator()->allocate();
*c217d954SCole Faust        library->fill_tensor_value(Accessor(src), 1.f);
*c217d954SCole Faust        library->fill_tensor_value(Accessor(weight), 2.f);
*c217d954SCole Faust        library->fill_tensor_value(Accessor(bias), 3.f);
*c217d954SCole Faust        conv->run();
*c217d954SCole Faust        return dst;
*c217d954SCole Faust    };
*c217d954SCole Faust    auto result_0 = run_conv();
*c217d954SCole Faust    auto result_1 = run_conv();
*c217d954SCole Faust    for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
*c217d954SCole Faust    {
*c217d954SCole Faust        ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
*c217d954SCole Faust    }
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(Float)
*c217d954SCole FaustTEST_SUITE(FP32)
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
*c217d954SCole Faust                                                                                                                     framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NHWC })), ActivationFunctionsDataset))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
*c217d954SCole Faust}
*c217d954SCole FaustTEST_SUITE_END() // FP32
*c217d954SCole FaustTEST_SUITE_END() // Float
*c217d954SCole Faust
*c217d954SCole Faust#ifdef __aarch64__
*c217d954SCole Fausttemplate <typename T>
*c217d954SCole Faustusing NEDirectGEMMConv2dLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEGEMMConv2d, T>;
*c217d954SCole Faust
*c217d954SCole Fausttemplate <typename T>
*c217d954SCole Faustusing NEDirectGEMMConv2dLayerQuantizedPerChannelFixture = ConvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEGEMMConv2d, T, int8_t>;
*c217d954SCole Faust
*c217d954SCole Faustconst auto QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
*c217d954SCole Faust{
*c217d954SCole Faust    ActivationLayerInfo(),
*c217d954SCole Faust    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
*c217d954SCole Faust    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)
*c217d954SCole Faust});
*c217d954SCole FaustTEST_SUITE(Quantized)
*c217d954SCole FaustTEST_SUITE(QASYMM8)
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
*c217d954SCole Faust                                                                                                                        framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NHWC })),
*c217d954SCole Faust                                                                                                                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), QuantizedActivationFunctionsDataset))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, tolerance_qasymm8);
*c217d954SCole Faust}
*c217d954SCole FaustTEST_SUITE_END() // QASYMM8
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(QASYMM8_SIGNED)
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
*c217d954SCole Faust                                                                                                                       framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataLayout", { DataLayout::NHWC })),
*c217d954SCole Faust                                                                                                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), QuantizedActivationFunctionsDataset))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, tolerance_qasymm8);
*c217d954SCole Faust}
*c217d954SCole FaustTEST_SUITE_END() // QASYMM8_SIGNED
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE(QSYMM8_PER_CHANNEL)
*c217d954SCole FaustFIXTURE_DATA_TEST_CASE(RunSmallSigned, NEDirectGEMMConv2dLayerQuantizedPerChannelFixture<int8_t>, framework::DatasetMode::ALL,
*c217d954SCole Faust                       combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
*c217d954SCole Faust                                                                       framework::dataset::make("ReshapeWeights", { true })),
*c217d954SCole Faust                                                               framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })),
*c217d954SCole Faust                                                       framework::dataset::make("DataLayout", { DataLayout::NHWC })),
*c217d954SCole Faust                                               QuantizationData),
*c217d954SCole Faust                                       QuantizedActivationFunctionsDataset),
*c217d954SCole Faust                               framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
*c217d954SCole Faust{
*c217d954SCole Faust    // Validate output
*c217d954SCole Faust    validate(Accessor(_target), _reference, tolerance_qasymm8);
*c217d954SCole Faust}
*c217d954SCole FaustTEST_SUITE_END() // QSYMM8_PER_CHANNEL
*c217d954SCole FaustTEST_SUITE_END() // Quantized
*c217d954SCole Faust#endif           // __aarch64__
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE_END() // DirectGEMMConv2d
*c217d954SCole Faust
*c217d954SCole FaustTEST_SUITE_END() // Neon
*c217d954SCole Faust} // namespace validation
*c217d954SCole Faust} // namespace test
*c217d954SCole Faust} // namespace arm_compute