xref: /aosp_15_r20/external/ComputeLibrary/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2017-2021, 2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_NEDECONVOLUTIONLAYER_H
25 #define ARM_COMPUTE_NEDECONVOLUTIONLAYER_H
26 
27 #include "arm_compute/runtime/CPP/functions/CPPUpsample.h"
28 #include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
29 #include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
30 #include "arm_compute/runtime/NEON/functions/NEReverse.h"
31 
32 #include "arm_compute/core/Types.h"
33 #include "arm_compute/runtime/IFunction.h"
34 #include "arm_compute/runtime/IMemoryManager.h"
35 #include "arm_compute/runtime/MemoryGroup.h"
36 #include "arm_compute/runtime/Tensor.h"
37 
38 #include <memory>
39 
40 namespace arm_compute
41 {
42 /** Function to run the deconvolution layer.
43  *
44  * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input depending on the stride and pad info and then perfrom a 1x1
45  * convolution pass. Input stride defines how many zeroes we should put between each element of the input, pad is the amount of padding and finaly a is a user
46  * specified value where a < stride - 1 that increases the padding top and right of the input image.
47  *
48  *  The relation between input to output is as follows:
49  *  \f[
50  *       width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x
51  *  \f]
52  *  \f[
53  *       height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y
54  *  \f]
55  *
56  *  where
57  *      width is the size of the first input dimension.
58  *      height is the size of the second input dimension.
59  *      width_output is the size of the first output dimension.
60  *      height_output is the size of the second output dimension.
61  *      kernel_x and kernel_y are the convolution sizes in x and y.
62  *      stride_x and stride_y is the input stride of the first and second dimension.
63  *
64  * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. Therefore, it will be necessary to use the weights in the
65  * reverse order to perform an actual convolution. This is achieved by using @ref NEReverse.
66  *
67  * This function calls the following kernels/functions:
68  *
69  * -# @ref CPPUpsample
70  * -# @ref NEConvolutionLayer
71  * -# @ref NEReverse
72  *
73  */
74 class NEDeconvolutionLayer : public IFunction
75 {
76 public:
77     /** Constructor */
78     NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
79     /** Prevent instances of this class from being copied (As this class contains pointers) */
80     NEDeconvolutionLayer(const NEDeconvolutionLayer &) = delete;
81     /** Default move constructor */
82     NEDeconvolutionLayer(NEDeconvolutionLayer &&) = default;
83     /** Prevent instances of this class from being copied (As this class contains pointers) */
84     NEDeconvolutionLayer &operator=(const NEDeconvolutionLayer &) = delete;
85     /** Default move assignment operator */
86     NEDeconvolutionLayer &operator=(NEDeconvolutionLayer &&) = default;
87     /** Default destructor */
88     ~NEDeconvolutionLayer() = default;
89 
90     /** Set the input, weights, biases and output tensors.
91      *
92      * Valid data layouts:
93      * - NHWC
94      * - NCHW
95      *
96      * Valid data type configurations:
97      * |src0           |src1               |src2   |dst            |
98      * |:--------------|:------------------|:------|:--------------|
99      * |F16            |F16                |F16    |F16            |
100      * |F32            |F32                |F32    |F32            |
101      * |QASYMM8        |QASYMM8            |S32    |QASYMM8        |
102      * |QASYMM8        |QSYMM8_PER_CHANNEL |S32    |QASYMM8        |
103      * |QASYMM8_SIGNED |QASYMM8_SIGNED     |S32    |QASYMM8_SIGNED |
104      * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32    |QASYMM8_SIGNED |
105      *
106      * @param[in,out] input            Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.
107      *                                 Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
108      * @param[in]     weights          The 4d weights with dimensions [width, height, IFM, OFM].
109      *                                 Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
110      * @param[in]     bias             Optional, ignored if NULL. The biases have one dimension.
111      *                                 Data type supported: Data types supported: S32 for QASYMM8/QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
112      * @param[out]    output           Output tensor. The output has the same number of dimensions as the @p input.
113      * @param[in]     info             Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
114      * @param[in]     enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
115      *                                            available which may introduce a drop of accuracy as well. Default is false
116      *
117      */
118     void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info, bool enable_fast_math = false);
119     /** Static function to check if given info will lead to a valid configuration of @ref NEDeconvolutionLayer
120      *
121      * @param[in] input            Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.
122      *                             Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
123      * @param[in] weights          The 4d weights info with dimensions [width, height, IFM, OFM].
124      *                             Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
125      * @param[in] bias             (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8/QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
126      * @param[in] output           Output tensor info. The output has the same number of dimensions as the @p input.
127      * @param[in] info             Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
128      * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
129      *                                        available which may introduce a drop of accuracy as well. Default is false
130      *
131      * @return a status
132      */
133     static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &info, bool enable_fast_math = false);
134 
135     // Inherited methods overridden:
136     void run() override;
137     void prepare() override;
138 
139 private:
140     MemoryGroup        _memory_group;
141     NEConvolutionLayer _conv_f;
142     CPPUpsample        _upsample_f;
143     NEReverse          _flip_weights;
144     Tensor             _scaled_output;
145     Tensor             _weights_flipped;
146     Tensor             _flip_axis;
147     const ITensor     *_original_weights;
148     ITensor           *_input;
149     PadStrideInfo      _info;
150     bool               _is_prepared;
151     bool               _do_upsampling;
152 };
153 } // arm_compute
154 #endif /* ARM_COMPUTE_NEDECONVOLUTIONLAYER_H */
155