xref: /aosp_15_r20/external/ComputeLibrary/src/core/NEON/kernels/assembly/pooling.hpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2021-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #pragma once
26 
27 #include "arm_gemm_local.hpp"
28 #include "pool_common.hpp"
29 
30 namespace arm_conv
31 {
32 namespace pooling
33 {
34 struct PoolingConfig
35 {
36     PoolingMethod method = PoolingMethod::DEFAULT;
37     std::string   filter = "";
38 
PoolingConfigarm_conv::pooling::PoolingConfig39     PoolingConfig(PoolingMethod method)
40         : method(method) {};
PoolingConfigarm_conv::pooling::PoolingConfig41     PoolingConfig() {};
42 };
43 
44 struct PoolingArgs
45 {
46     const CPUInfo *cpu_info;
47 
48     PoolingType   pool_type;
49     PoolingWindow pool_window;
50     PoolingStride pool_stride;
51     bool          exclude_padding;
52 
53     unsigned int n_batches, input_rows, input_cols, n_channels;
54     unsigned int output_rows, output_cols;
55 
56     PaddingValues padding;
57 
58     const PoolingConfig *config;
59 
PoolingArgsarm_conv::pooling::PoolingArgs60     PoolingArgs(
61         const CPUInfo       *cpu_info,
62         PoolingType          pool_type,
63         const PoolingWindow &window,
64         const PoolingStride &stride,
65         bool                 exclude_padding,
66         unsigned int         n_batches,
67         unsigned int         input_rows,
68         unsigned int         input_cols,
69         unsigned int         n_channels,
70         unsigned int         output_rows,
71         unsigned int         output_cols,
72         const PaddingValues &padding,
73         const PoolingConfig *cfg)
74         : cpu_info(cpu_info), pool_type(pool_type), pool_window(window), pool_stride(stride), exclude_padding(exclude_padding), n_batches(n_batches), input_rows(input_rows), input_cols(input_cols),
75           n_channels(n_channels), output_rows(output_rows), output_cols(output_cols), padding(padding), config(cfg)
76     {
77         // If either of the pooling window dimensions are set to zero, meaning
78         // "pool everything", then replace with the corresponding input dimension.
79         if(pool_window.rows == 0)
80         {
81             pool_window.rows = input_rows;
82         }
83         if(pool_window.cols == 0)
84         {
85             pool_window.cols = input_cols;
86         }
87     }
88 };
89 
90 struct Nothing
91 {
92 };
93 
94 struct Requantize32
95 {
96     int32_t input_offset  = 0;
97     int32_t output_offset = 0;
98 
99     int32_t per_layer_left_shift  = 0;
100     int32_t per_layer_right_shift = 0;
101     int32_t per_layer_mul         = 0;
102 
Requantize32arm_conv::pooling::Requantize32103     Requantize32(int32_t input_offset, int32_t output_offset,
104                  int32_t per_layer_left_shift, int32_t per_layer_right_shift,
105                  int32_t per_layer_mul)
106         : input_offset(input_offset), output_offset(output_offset), per_layer_left_shift(per_layer_left_shift), per_layer_right_shift(per_layer_right_shift), per_layer_mul(per_layer_mul)
107     {
108     }
109 };
110 
111 template <typename TInput, typename TOutput>
112 class PoolingCommon : public IPoolingCommon
113 {
114 protected:
115     const PoolingArgs m_args;
116 
117 public:
PoolingCommon(const PoolingArgs & args)118     PoolingCommon(const PoolingArgs &args)
119         : m_args(args)
120     {
121     }
122     PoolingCommon(PoolingCommon &) = delete;
123     PoolingCommon &operator=(PoolingCommon &) = delete;
124 
125     size_t get_working_size(unsigned int, unsigned int) const override = 0;
get_working_size(unsigned int n_threads) const126     size_t get_working_size(unsigned int n_threads) const override
127     {
128         return this->get_working_size(n_threads, m_args.n_channels);
129     }
130 
131     // Execute pooling over the specified area of memory.
execute(const void * const input,void * const output,void * working_space,unsigned int thread_id,unsigned int num_threads) const132     void execute(
133         const void *const input,
134         void *const       output,
135         void             *working_space,
136         unsigned int      thread_id,
137         unsigned int      num_threads) const override
138     {
139         this->execute(
140             input,
141             m_args.n_channels,
142             m_args.n_channels * m_args.input_cols,
143             m_args.n_channels * m_args.input_cols * m_args.input_rows,
144             output,
145             m_args.n_channels,
146             m_args.n_channels * m_args.output_cols,
147             m_args.n_channels * m_args.output_cols * m_args.output_rows,
148             working_space,
149             thread_id, num_threads);
150     }
151 
execute(const void * const input,size_t ld_input_col,size_t ld_input_row,size_t ld_input_batch,void * const output,size_t ld_output_col,size_t ld_output_row,size_t ld_output_batch,void * working_space,unsigned int thread_id,unsigned int num_threads) const152     void execute(
153         const void *const input,
154         size_t            ld_input_col,
155         size_t            ld_input_row,
156         size_t            ld_input_batch,
157         void *const       output,
158         size_t            ld_output_col,
159         size_t            ld_output_row,
160         size_t            ld_output_batch,
161         void             *working_space,
162         unsigned int      thread_id,
163         unsigned int      num_threads) const override
164     {
165         this->execute(
166             m_args.n_batches, m_args.input_rows, m_args.input_cols, m_args.n_channels,
167             input, ld_input_col, ld_input_row, ld_input_batch,
168             m_args.padding, m_args.output_rows, m_args.output_cols,
169             output, ld_output_col, ld_output_row, ld_output_batch,
170             working_space, thread_id, num_threads);
171     }
172 
execute(unsigned int batches,unsigned int height,unsigned int width,unsigned int channels,const void * const input,size_t ld_input_col,size_t ld_input_row,size_t ld_input_batch,const PaddingValues & padding,unsigned int output_height,unsigned int output_width,void * const output,size_t ld_output_col,size_t ld_output_row,size_t ld_output_batch,void * working_space,unsigned int thread_id,unsigned int num_threads) const173     void execute(
174         unsigned int         batches,
175         unsigned int         height,
176         unsigned int         width,
177         unsigned int         channels,
178         const void *const    input,
179         size_t               ld_input_col,
180         size_t               ld_input_row,
181         size_t               ld_input_batch,
182         const PaddingValues &padding,
183         unsigned int         output_height,
184         unsigned int         output_width,
185         void *const          output,
186         size_t               ld_output_col,
187         size_t               ld_output_row,
188         size_t               ld_output_batch,
189         void                *working_space,
190         unsigned int         thread_id,
191         unsigned int         num_threads) const override
192     {
193         this->execute_internal(
194             batches, height, width, channels, padding,
195             input, ld_input_col, ld_input_row, ld_input_batch,
196             output_height, output_width,
197             output, ld_output_col, ld_output_row, ld_output_batch,
198             working_space, thread_id, num_threads);
199     }
200 
201 protected:
202     virtual void execute_internal(
203         unsigned int batches,
204         unsigned int height,
205         unsigned int width,
206         unsigned int channels,
207         const PaddingValues &,
208         const void *const input,
209         size_t            ld_input_col,
210         size_t            ld_input_row,
211         size_t            ld_input_batch,
212         unsigned int      output_height,
213         unsigned int      output_width,
214         void *const       output,
215         size_t            ld_output_col,
216         size_t            ld_output_row,
217         size_t            ld_output_batch,
218         void             *working_space,
219         unsigned int      thread_id,
220         unsigned int      num_threads) const = 0;
221 };
222 
223 template <typename TInput, typename TOutput>
224 using UniquePoolingCommon = std::unique_ptr<PoolingCommon<TInput, TOutput>>;
225 
226 // Get a pooling engine
227 template <typename TInput, typename TOutput = TInput, class OutputStage = Nothing>
228 UniquePoolingCommon<TInput, TOutput> pooling(const PoolingArgs &, const OutputStage & = {});
229 
230 } // namespace pooling
231 } // namespace arm_conv
232