NEON/functions/NEFFTConvolutionLayer.cpp

*c217d954SCole Faust/*
*c217d954SCole Faust * Copyright (c) 2019-2021 Arm Limited.
*c217d954SCole Faust *
*c217d954SCole Faust * SPDX-License-Identifier: MIT
*c217d954SCole Faust *
*c217d954SCole Faust * Permission is hereby granted, free of charge, to any person obtaining a copy
*c217d954SCole Faust * of this software and associated documentation files (the "Software"), to
*c217d954SCole Faust * deal in the Software without restriction, including without limitation the
*c217d954SCole Faust * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
*c217d954SCole Faust * sell copies of the Software, and to permit persons to whom the Software is
*c217d954SCole Faust * furnished to do so, subject to the following conditions:
*c217d954SCole Faust *
*c217d954SCole Faust * The above copyright notice and this permission notice shall be included in all
*c217d954SCole Faust * copies or substantial portions of the Software.
*c217d954SCole Faust *
*c217d954SCole Faust * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
*c217d954SCole Faust * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
*c217d954SCole Faust * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
*c217d954SCole Faust * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
*c217d954SCole Faust * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
*c217d954SCole Faust * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
*c217d954SCole Faust * SOFTWARE.
*c217d954SCole Faust */
*c217d954SCole Faust#include "arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h"
*c217d954SCole Faust
*c217d954SCole Faust#include "arm_compute/core/ITensor.h"
*c217d954SCole Faust#include "arm_compute/core/Utils.h"
*c217d954SCole Faust#include "arm_compute/core/Validate.h"
*c217d954SCole Faust#include "arm_compute/core/utils/misc/ShapeCalculator.h"
*c217d954SCole Faust#include "src/common/utils/Log.h"
*c217d954SCole Faust#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h"
*c217d954SCole Faust#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h"
*c217d954SCole Faust#include "src/core/NEON/kernels/NEFFTScaleKernel.h"
*c217d954SCole Faust#include "src/core/NEON/kernels/NEPadLayerKernel.h"
*c217d954SCole Faust#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
*c217d954SCole Faust#include "src/core/helpers/AutoConfiguration.h"
*c217d954SCole Faust#include "src/core/utils/helpers/fft.h"
*c217d954SCole Faust
*c217d954SCole Faustnamespace arm_compute
*c217d954SCole Faust{
*c217d954SCole Faustnamespace
*c217d954SCole Faust{
*c217d954SCole Faustint pad_decomposable(int N)
*c217d954SCole Faust{
*c217d954SCole Faust    const auto supported_radix = NEFFTRadixStageKernel::supported_radix();
*c217d954SCole Faust
*c217d954SCole Faust    int  pad           = 0;
*c217d954SCole Faust    bool is_decomposed = false;
*c217d954SCole Faust    while(!is_decomposed)
*c217d954SCole Faust    {
*c217d954SCole Faust        const auto decomposed_vector = arm_compute::helpers::fft::decompose_stages(N++, supported_radix);
*c217d954SCole Faust        is_decomposed                = !decomposed_vector.empty();
*c217d954SCole Faust        if(!is_decomposed)
*c217d954SCole Faust        {
*c217d954SCole Faust            ++pad;
*c217d954SCole Faust        }
*c217d954SCole Faust    }
*c217d954SCole Faust    return pad;
*c217d954SCole Faust}
*c217d954SCole Faust} // namespace
*c217d954SCole Faust
*c217d954SCole FaustNEFFTConvolutionLayer::NEFFTConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
*c217d954SCole Faust    : _memory_group(memory_manager),
*c217d954SCole Faust      _flip_weights_func(),
*c217d954SCole Faust      _permute_input_func(),
*c217d954SCole Faust      _permute_output_func(),
*c217d954SCole Faust      _permute_weights_func(),
*c217d954SCole Faust      _permute_bias_func(),
*c217d954SCole Faust      _pad_input_func(),
*c217d954SCole Faust      _pad_weights_func(),
*c217d954SCole Faust      _transform_input_func(memory_manager),
*c217d954SCole Faust      _transform_weights_func(),
*c217d954SCole Faust      _itransform_output_func(memory_manager),
*c217d954SCole Faust      _prod_func(),
*c217d954SCole Faust      _reduce_func(),
*c217d954SCole Faust      _extract_output_func(),
*c217d954SCole Faust      _bias_add_func(),
*c217d954SCole Faust      _activation_layer_func(),
*c217d954SCole Faust      _permuted_input(),
*c217d954SCole Faust      _permuted_weights(),
*c217d954SCole Faust      _permuted_bias(),
*c217d954SCole Faust      _permuted_output(),
*c217d954SCole Faust      _padded_input(),
*c217d954SCole Faust      _padded_weights(),
*c217d954SCole Faust      _flip_axis(),
*c217d954SCole Faust      _flipped_weights(),
*c217d954SCole Faust      _transformed_input(),
*c217d954SCole Faust      _transformed_weights(),
*c217d954SCole Faust      _input_weights_product(),
*c217d954SCole Faust      _output_product(),
*c217d954SCole Faust      _output_reduced(),
*c217d954SCole Faust      _itransformed_output(),
*c217d954SCole Faust      _reshaped_output(),
*c217d954SCole Faust      _bias_output(),
*c217d954SCole Faust      _original_weights(nullptr),
*c217d954SCole Faust      _original_bias(nullptr),
*c217d954SCole Faust      _is_activationlayer_enabled(false),
*c217d954SCole Faust      _needs_permute(false),
*c217d954SCole Faust      _has_bias(false),
*c217d954SCole Faust      _is_prepared(false)
*c217d954SCole Faust{
*c217d954SCole Faust}
*c217d954SCole FaustNEFFTConvolutionLayer::~NEFFTConvolutionLayer() = default;
*c217d954SCole Faust
*c217d954SCole Faustvoid NEFFTConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
*c217d954SCole Faust                                      const ActivationLayerInfo &act_info, bool enable_fast_math)
*c217d954SCole Faust{
*c217d954SCole Faust    ARM_COMPUTE_UNUSED(enable_fast_math);
*c217d954SCole Faust    ARM_COMPUTE_LOG_PARAMS(input, weights, biases, output, conv_info, act_info, enable_fast_math);
*c217d954SCole Faust
*c217d954SCole Faust    _original_weights = weights;
*c217d954SCole Faust    _original_bias    = biases;
*c217d954SCole Faust
*c217d954SCole Faust    // Flat if bias addition is required
*c217d954SCole Faust    _has_bias = biases != nullptr;
*c217d954SCole Faust
*c217d954SCole Faust    // Get indices for the width and height
*c217d954SCole Faust    const size_t idx_width  = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::WIDTH);
*c217d954SCole Faust    const size_t idx_height = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT);
*c217d954SCole Faust
*c217d954SCole Faust    // Input shape, kernel size and output tile
*c217d954SCole Faust    const Size2D input_dims  = Size2D(input->info()->tensor_shape()[idx_width], input->info()->tensor_shape()[idx_height]);
*c217d954SCole Faust    const Size2D kernel_size = Size2D(weights->info()->tensor_shape()[idx_width], weights->info()->tensor_shape()[idx_height]);
*c217d954SCole Faust    const Size2D pad_valid   = Size2D(pad_decomposable(input_dims.x() + kernel_size.x() - 1),
*c217d954SCole Faust                                      pad_decomposable(input_dims.y() + kernel_size.y() - 1));
*c217d954SCole Faust    // Tensors to use
*c217d954SCole Faust    ITensor       *input_to_use   = input;
*c217d954SCole Faust    const ITensor *weights_to_use = weights;
*c217d954SCole Faust    ITensor       *output_to_use  = _has_bias ? &_bias_output : output;
*c217d954SCole Faust
*c217d954SCole Faust    // Permute bias
*c217d954SCole Faust    if(biases != nullptr)
*c217d954SCole Faust    {
*c217d954SCole Faust        _permute_bias_func.configure(biases, &_permuted_bias, PermutationVector(1U, 2U, 0U));
*c217d954SCole Faust        _permuted_bias.info()->set_data_layout(DataLayout::NCHW);
*c217d954SCole Faust    }
*c217d954SCole Faust
*c217d954SCole Faust    // Permute input if needed
*c217d954SCole Faust    _needs_permute = input->info()->data_layout() == DataLayout::NHWC;
*c217d954SCole Faust    if(_needs_permute)
*c217d954SCole Faust    {
*c217d954SCole Faust        _memory_group.manage(&_permuted_input);
*c217d954SCole Faust        // Configure the function to transform the input tensor from NHWC -> NCHW
*c217d954SCole Faust        _permute_input_func.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
*c217d954SCole Faust        _permuted_input.info()->set_data_layout(DataLayout::NCHW);
*c217d954SCole Faust
*c217d954SCole Faust        // Configure the function to transform the weights tensor from HWI -> IHW
*c217d954SCole Faust        _permute_weights_func.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
*c217d954SCole Faust        _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
*c217d954SCole Faust
*c217d954SCole Faust        input_to_use   = &_permuted_input;
*c217d954SCole Faust        weights_to_use = &_permuted_weights;
*c217d954SCole Faust    }
*c217d954SCole Faust
*c217d954SCole Faust    // Flip weights
*c217d954SCole Faust    _flipped_weights.allocator()->init(weights_to_use->info()->clone()->set_is_resizable(true).reset_padding());
*c217d954SCole Faust    _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
*c217d954SCole Faust    _flip_weights_func.configure(weights_to_use, &_flipped_weights, &_flip_axis);
*c217d954SCole Faust
*c217d954SCole Faust    // Pad weights
*c217d954SCole Faust    const PaddingList padding_w = { { 0, input_dims.x() + pad_valid.x() - 1 }, { 0, input_dims.y() + pad_valid.y() - 1 } };
*c217d954SCole Faust    _pad_weights_func.configure(&_flipped_weights, &_padded_weights, padding_w);
*c217d954SCole Faust
*c217d954SCole Faust    // Transform weights
*c217d954SCole Faust    _transform_weights_func = std::make_unique<NEFFT2D>();
*c217d954SCole Faust    _transform_weights_func->configure(&_padded_weights, &_transformed_weights, FFT2DInfo());
*c217d954SCole Faust
*c217d954SCole Faust    // Pad input
*c217d954SCole Faust    const PaddingList padding_in = { { 0, kernel_size.x() + pad_valid.x() - 1 }, { 0, kernel_size.y() + pad_valid.y() - 1 } };
*c217d954SCole Faust    _memory_group.manage(&_padded_input);
*c217d954SCole Faust    _pad_input_func.configure(input_to_use, &_padded_input, padding_in);
*c217d954SCole Faust    if(_needs_permute)
*c217d954SCole Faust    {
*c217d954SCole Faust        _permuted_input.allocator()->allocate();
*c217d954SCole Faust    }
*c217d954SCole Faust
*c217d954SCole Faust    // Transform input
*c217d954SCole Faust    _memory_group.manage(&_transformed_input);
*c217d954SCole Faust    _transform_input_func.configure(&_padded_input, &_transformed_input, FFT2DInfo());
*c217d954SCole Faust    _padded_input.allocator()->allocate();
*c217d954SCole Faust
*c217d954SCole Faust    // Perform product
*c217d954SCole Faust    _memory_group.manage(&_output_product);
*c217d954SCole Faust    _prod_func.configure(&_transformed_input, &_transformed_weights, &_output_product);
*c217d954SCole Faust    _transformed_input.allocator()->allocate();
*c217d954SCole Faust
*c217d954SCole Faust    // Perform reduction
*c217d954SCole Faust    _memory_group.manage(&_output_reduced);
*c217d954SCole Faust    _reduce_func.configure(&_output_product, &_output_reduced, 2, ReductionOperation::SUM);
*c217d954SCole Faust    _output_product.allocator()->allocate();
*c217d954SCole Faust
*c217d954SCole Faust    // Transform output
*c217d954SCole Faust    _memory_group.manage(&_itransformed_output);
*c217d954SCole Faust    FFT2DInfo itranform_info;
*c217d954SCole Faust    itranform_info.direction = FFTDirection::Inverse;
*c217d954SCole Faust    _itransformed_output.allocator()->init(_output_reduced.info()->clone()->set_is_resizable(true).set_num_channels(1).reset_padding());
*c217d954SCole Faust    _itransform_output_func.configure(&_output_reduced, &_itransformed_output, itranform_info);
*c217d954SCole Faust    _output_reduced.allocator()->allocate();
*c217d954SCole Faust
*c217d954SCole Faust    // Reshape output
*c217d954SCole Faust    TensorShape reshaped_shape = _itransformed_output.info()->tensor_shape();
*c217d954SCole Faust    reshaped_shape.remove_dimension(2);
*c217d954SCole Faust    _reshaped_output.allocator()->init(_itransformed_output.info()->clone()->set_tensor_shape(reshaped_shape));
*c217d954SCole Faust
*c217d954SCole Faust    // Extract correct region
*c217d954SCole Faust    const int start_left = kernel_size.x() - conv_info.pad_left() - 1;
*c217d954SCole Faust    const int start_top  = kernel_size.y() - conv_info.pad_top() - 1;
*c217d954SCole Faust    const int end_right  = _reshaped_output.info()->tensor_shape().x() - (kernel_size.x() - conv_info.pad_right() - 1) - pad_valid.x();
*c217d954SCole Faust    const int end_botton = _reshaped_output.info()->tensor_shape().y() - (kernel_size.y() - conv_info.pad_bottom() - 1) - pad_valid.y();
*c217d954SCole Faust    if(_has_bias)
*c217d954SCole Faust    {
*c217d954SCole Faust        _memory_group.manage(&_bias_output);
*c217d954SCole Faust    }
*c217d954SCole Faust    else if(_needs_permute)
*c217d954SCole Faust    {
*c217d954SCole Faust        output_to_use = &_permuted_output;
*c217d954SCole Faust        _memory_group.manage(&_permuted_output);
*c217d954SCole Faust    }
*c217d954SCole Faust    _extract_output_func.configure(&_reshaped_output, output_to_use, Coordinates(start_left, start_top), Coordinates(end_right, end_botton));
*c217d954SCole Faust    _reshaped_output.allocator()->allocate();
*c217d954SCole Faust    _itransformed_output.allocator()->allocate();
*c217d954SCole Faust
*c217d954SCole Faust    // Add bias
*c217d954SCole Faust    if(biases != nullptr)
*c217d954SCole Faust    {
*c217d954SCole Faust        output_to_use = output;
*c217d954SCole Faust        if(_needs_permute)
*c217d954SCole Faust        {
*c217d954SCole Faust            output_to_use = &_permuted_output;
*c217d954SCole Faust            _memory_group.manage(&_permuted_output);
*c217d954SCole Faust        }
*c217d954SCole Faust        auto_init_if_empty(*output_to_use->info(), *_bias_output.info());
*c217d954SCole Faust        _bias_add_func.configure(&_bias_output, &_permuted_bias, output_to_use, ConvertPolicy::WRAP);
*c217d954SCole Faust        _bias_output.allocator()->allocate();
*c217d954SCole Faust    }
*c217d954SCole Faust
*c217d954SCole Faust    // Permute output
*c217d954SCole Faust    if(_needs_permute)
*c217d954SCole Faust    {
*c217d954SCole Faust        // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
*c217d954SCole Faust        _permuted_output.info()->set_data_layout(DataLayout::NCHW);
*c217d954SCole Faust        _permute_output_func.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
*c217d954SCole Faust
*c217d954SCole Faust        // Allocate tensors
*c217d954SCole Faust        _permuted_output.allocator()->allocate();
*c217d954SCole Faust    }
*c217d954SCole Faust
*c217d954SCole Faust    // Configure Activation Layer
*c217d954SCole Faust    _is_activationlayer_enabled = act_info.enabled();
*c217d954SCole Faust    if(_is_activationlayer_enabled)
*c217d954SCole Faust    {
*c217d954SCole Faust        _activation_layer_func.configure(output, nullptr, act_info);
*c217d954SCole Faust    }
*c217d954SCole Faust
*c217d954SCole Faust    // Setup flip axis data
*c217d954SCole Faust    _flip_axis.allocator()->allocate();
*c217d954SCole Faust
*c217d954SCole Faust    auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer());
*c217d954SCole Faust    axis_data[0]   = 0;
*c217d954SCole Faust    axis_data[1]   = 1;
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole FaustStatus NEFFTConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
*c217d954SCole Faust                                       const ActivationLayerInfo &act_info, bool enable_fast_math)
*c217d954SCole Faust{
*c217d954SCole Faust    ARM_COMPUTE_UNUSED(enable_fast_math);
*c217d954SCole Faust
*c217d954SCole Faust    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
*c217d954SCole Faust    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
*c217d954SCole Faust
*c217d954SCole Faust    // Get indices for the width and height
*c217d954SCole Faust    const size_t idx_width  = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
*c217d954SCole Faust    const size_t idx_height = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
*c217d954SCole Faust
*c217d954SCole Faust    // Input shape, kernel size and output tile
*c217d954SCole Faust    const Size2D kernel_size = Size2D(weights->tensor_shape()[idx_width], weights->tensor_shape()[idx_height]);
*c217d954SCole Faust
*c217d954SCole Faust    // Strides
*c217d954SCole Faust    const auto strides = conv_info.stride();
*c217d954SCole Faust    ARM_COMPUTE_RETURN_ERROR_ON(strides.first != strides.second && strides.first != 1);
*c217d954SCole Faust    ARM_COMPUTE_RETURN_ERROR_ON(kernel_size.x() != kernel_size.y());
*c217d954SCole Faust    ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_left() != (kernel_size.x() / 2) || conv_info.pad_right() != (kernel_size.x() / 2));
*c217d954SCole Faust    ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_top() != (kernel_size.y() / 2) || conv_info.pad_bottom() != (kernel_size.y() / 2));
*c217d954SCole Faust
*c217d954SCole Faust    // Validate biases
*c217d954SCole Faust    if(biases != nullptr)
*c217d954SCole Faust    {
*c217d954SCole Faust        const size_t idx_channels = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
*c217d954SCole Faust        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
*c217d954SCole Faust        ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_channels] != biases->tensor_shape().x());
*c217d954SCole Faust    }
*c217d954SCole Faust
*c217d954SCole Faust    // Checks performed when output is configured
*c217d954SCole Faust    if((output != nullptr) && (output->total_size() != 0))
*c217d954SCole Faust    {
*c217d954SCole Faust        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
*c217d954SCole Faust        ARM_COMPUTE_RETURN_ERROR_ON((input->tensor_shape()[idx_height] != output->tensor_shape()[idx_height]) || (input->tensor_shape()[idx_width] != output->tensor_shape()[idx_width]));
*c217d954SCole Faust
*c217d954SCole Faust        // Validate Activation Layer
*c217d954SCole Faust        if(act_info.enabled())
*c217d954SCole Faust        {
*c217d954SCole Faust            ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
*c217d954SCole Faust        }
*c217d954SCole Faust    }
*c217d954SCole Faust
*c217d954SCole Faust    return Status{};
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole Faustvoid NEFFTConvolutionLayer::run()
*c217d954SCole Faust{
*c217d954SCole Faust    prepare();
*c217d954SCole Faust
*c217d954SCole Faust    MemoryGroupResourceScope scope_mg(_memory_group);
*c217d954SCole Faust
*c217d954SCole Faust    // Transform input
*c217d954SCole Faust    if(_needs_permute)
*c217d954SCole Faust    {
*c217d954SCole Faust        _permute_input_func.run();
*c217d954SCole Faust    }
*c217d954SCole Faust    _pad_input_func.run();
*c217d954SCole Faust    _transform_input_func.run();
*c217d954SCole Faust
*c217d954SCole Faust    // Perform operations to frequency domain
*c217d954SCole Faust    _prod_func.run();
*c217d954SCole Faust
*c217d954SCole Faust    _reduce_func.run();
*c217d954SCole Faust
*c217d954SCole Faust    // Transform output
*c217d954SCole Faust    _itransform_output_func.run();
*c217d954SCole Faust    _reshaped_output.allocator()->import_memory(_itransformed_output.buffer());
*c217d954SCole Faust    _extract_output_func.run();
*c217d954SCole Faust
*c217d954SCole Faust    // Add bias
*c217d954SCole Faust    if(_has_bias)
*c217d954SCole Faust    {
*c217d954SCole Faust        _bias_add_func.run();
*c217d954SCole Faust    }
*c217d954SCole Faust    if(_needs_permute)
*c217d954SCole Faust    {
*c217d954SCole Faust        _permute_output_func.run();
*c217d954SCole Faust    }
*c217d954SCole Faust
*c217d954SCole Faust    // Run activation layer
*c217d954SCole Faust    if(_is_activationlayer_enabled)
*c217d954SCole Faust    {
*c217d954SCole Faust        _activation_layer_func.run();
*c217d954SCole Faust    }
*c217d954SCole Faust}
*c217d954SCole Faust
*c217d954SCole Faustvoid NEFFTConvolutionLayer::prepare()
*c217d954SCole Faust{
*c217d954SCole Faust    if(!_is_prepared)
*c217d954SCole Faust    {
*c217d954SCole Faust        // Permute bias to NCHW
*c217d954SCole Faust        if(_original_bias != nullptr)
*c217d954SCole Faust        {
*c217d954SCole Faust            _permuted_bias.allocator()->allocate();
*c217d954SCole Faust            _permute_bias_func.run();
*c217d954SCole Faust            _original_bias->mark_as_unused();
*c217d954SCole Faust        }
*c217d954SCole Faust
*c217d954SCole Faust        const ITensor *cur_weights = _original_weights;
*c217d954SCole Faust
*c217d954SCole Faust        // Permute weights
*c217d954SCole Faust        if(_needs_permute)
*c217d954SCole Faust        {
*c217d954SCole Faust            ARM_COMPUTE_ERROR_ON(!cur_weights->is_used());
*c217d954SCole Faust
*c217d954SCole Faust            _permuted_weights.allocator()->allocate();
*c217d954SCole Faust            _permute_weights_func.run();
*c217d954SCole Faust            cur_weights->mark_as_unused();
*c217d954SCole Faust            cur_weights = &_permuted_weights;
*c217d954SCole Faust        }
*c217d954SCole Faust
*c217d954SCole Faust        // Flip weights
*c217d954SCole Faust        _flipped_weights.allocator()->allocate();
*c217d954SCole Faust        _flip_weights_func.run();
*c217d954SCole Faust        cur_weights->mark_as_unused();
*c217d954SCole Faust
*c217d954SCole Faust        // Pad weights
*c217d954SCole Faust        _padded_weights.allocator()->allocate();
*c217d954SCole Faust        _pad_weights_func.run();
*c217d954SCole Faust        _flipped_weights.mark_as_unused();
*c217d954SCole Faust        _flipped_weights.allocator()->free();
*c217d954SCole Faust
*c217d954SCole Faust        // Transform weights to frequency domain
*c217d954SCole Faust        _transformed_weights.allocator()->allocate();
*c217d954SCole Faust        _transform_weights_func->run();
*c217d954SCole Faust        _transform_weights_func.reset();
*c217d954SCole Faust
*c217d954SCole Faust        _padded_weights.mark_as_unused();
*c217d954SCole Faust        _padded_weights.allocator()->free();
*c217d954SCole Faust
*c217d954SCole Faust        _is_prepared = true;
*c217d954SCole Faust    }
*c217d954SCole Faust}
*c217d954SCole Faust} // namespace arm_compute