xref: /aosp_15_r20/external/ComputeLibrary/src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2021-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 // This can only be built if the target/compiler supports FP16 arguments.
26 #ifdef __ARM_FP16_ARGS
27 
28 #include "arm_gemm_local.hpp"
29 
30 #include "pooling_implementation.hpp"
31 #include "pooling_depthfirst.hpp"
32 #include "pooling_depthfirst_generic.hpp"
33 
34 #include "kernels/cpp_nhwc_1x1_stride_any_depthfirst.hpp"
35 #if defined(__aarch64__)
36 #if defined(ARM_COMPUTE_ENABLE_SME)
37 #include "kernels/sme_fp16_nhwc_max_2x2_s1_output2x2_depthfirst.hpp"
38 #include "kernels/sme_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp"
39 #include "kernels/sme_fp16_nhwc_avg_generic_depthfirst.hpp"
40 #include "kernels/sme_fp16_nhwc_max_generic_depthfirst.hpp"
41 #endif  // defined(ARM_COMPUTE_ENABLE_SME)
42 #if defined(ARM_COMPUTE_ENABLE_SVE)
43 #include "kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst.hpp"
44 #include "kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp"
45 #include "kernels/sve_fp16_nhwc_avg_generic_depthfirst.hpp"
46 #include "kernels/sve_fp16_nhwc_max_generic_depthfirst.hpp"
47 #endif  // defined(ARM_COMPUTE_ENABLE_SVE)
48 #include "kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst.hpp"
49 #include "kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst.hpp"
50 #include "kernels/a64_fp16_nhwc_avg_generic_depthfirst.hpp"
51 #include "kernels/a64_fp16_nhwc_max_generic_depthfirst.hpp"
52 #endif  // defined(__aarch64__)
53 
54 namespace arm_conv {
55 namespace pooling {
56 
57 static const PoolingImplementation<__fp16, __fp16> pooling_fp16_methods[] = {
58   {
59     PoolingMethod::DEPTHFIRST,
60     "cpp_fp16_nhwc_1x1_stride_any_depthfirst",
__anon308608e90102() 61     [] (const PoolingArgs &args, const Nothing &) -> bool {
62       return args.pool_window.rows == 1 && args.pool_window.cols == 1;
63     },
64     nullptr,
__anon308608e90202() 65     [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * {
66       auto strat = new cpp_nhwc_1x1_stride_any_depthfirst<__fp16>(args.cpu_info);
67       return new PoolingDepthfirstGeneric<__fp16>(strat, args);
68     },
69   },
70 #if defined(__aarch64__)
71 #if defined(ARM_COMPUTE_ENABLE_SME)
72   {
73     PoolingMethod::DEPTHFIRST,
74     "sme_fp16_nhwc_max_2x2_s1_output2x2_depthfirst",
__anon308608e90302() 75     [] (const PoolingArgs &args, const Nothing &os) -> bool {
76       return args.cpu_info->has_sme() &&
77              is_supported<sme_fp16_nhwc_max_2x2_s1_output2x2_depthfirst>(args, os);
78     },
79     nullptr,
__anon308608e90402() 80     [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * {
81       auto strat = new sme_fp16_nhwc_max_2x2_s1_output2x2_depthfirst(args.cpu_info);
82       return new PoolingDepthfirst<__fp16>(strat, args);
83     },
84   },
85   {
86     PoolingMethod::DEPTHFIRST,
87     "sme_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst",
__anon308608e90502() 88     [] (const PoolingArgs &args, const Nothing &os) -> bool {
89       return args.cpu_info->has_sme() &&
90              is_supported<sme_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst>(args, os);
91     },
92     nullptr,
__anon308608e90602() 93     [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * {
94       auto strat = new sme_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst(args.cpu_info);
95       return new PoolingDepthfirst<__fp16>(strat, args);
96     },
97   },
98   {
99     PoolingMethod::DEPTHFIRST,
100     "sme_fp16_nhwc_avg_generic_depthfirst",
__anon308608e90702() 101     [] (const PoolingArgs &args, const Nothing &) -> bool {
102       return args.cpu_info->has_sme() && args.pool_type == PoolingType::AVERAGE;
103     },
104     nullptr,
__anon308608e90802() 105     [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * {
106       auto strat = new sme_fp16_nhwc_avg_generic_depthfirst(args.cpu_info);
107       return new PoolingDepthfirstGeneric<__fp16>(strat, args);
108     },
109   },
110   {
111     PoolingMethod::DEPTHFIRST,
112     "sme_fp16_nhwc_max_generic_depthfirst",
__anon308608e90902() 113     [] (const PoolingArgs &args, const Nothing &) -> bool {
114       return args.cpu_info->has_sme() && args.pool_type == PoolingType::MAX;
115     },
116     nullptr,
__anon308608e90a02() 117     [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * {
118       auto strat = new sme_fp16_nhwc_max_generic_depthfirst(args.cpu_info);
119       return new PoolingDepthfirstGeneric<__fp16>(strat, args);
120     },
121   },
122 #endif  // defined(ARM_COMPUTE_ENABLE_SME)
123 #if defined(ARM_COMPUTE_ENABLE_SVE)
124   {
125     PoolingMethod::DEPTHFIRST,
126     "sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst",
__anon308608e90b02() 127     [] (const PoolingArgs &args, const Nothing &os) -> bool {
128       return args.cpu_info->has_sve() &&
129              is_supported<sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst>(args, os);
130     },
131     nullptr,
__anon308608e90c02() 132     [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * {
133       auto strat = new sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst(args.cpu_info);
134       return new PoolingDepthfirst<__fp16>(strat, args);
135     },
136   },
137   {
138     PoolingMethod::DEPTHFIRST,
139     "sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst",
__anon308608e90d02() 140     [] (const PoolingArgs &args, const Nothing &os) -> bool {
141       return args.cpu_info->has_sve() &&
142              is_supported<sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst>(args, os);
143     },
144     nullptr,
__anon308608e90e02() 145     [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * {
146       auto strat = new sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst(args.cpu_info);
147       return new PoolingDepthfirst<__fp16>(strat, args);
148     },
149   },
150   {
151     PoolingMethod::DEPTHFIRST,
152     "sve_fp16_nhwc_avg_generic_depthfirst",
__anon308608e90f02() 153     [] (const PoolingArgs &args, const Nothing &) -> bool {
154       return args.cpu_info->has_sve() && args.pool_type == PoolingType::AVERAGE;
155     },
156     nullptr,
__anon308608e91002() 157     [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * {
158       auto strat = new sve_fp16_nhwc_avg_generic_depthfirst(args.cpu_info);
159       return new PoolingDepthfirstGeneric<__fp16>(strat, args);
160     },
161   },
162   {
163     PoolingMethod::DEPTHFIRST,
164     "sve_fp16_nhwc_max_generic_depthfirst",
__anon308608e91102() 165     [] (const PoolingArgs &args, const Nothing &) -> bool {
166       return args.cpu_info->has_sve() && args.pool_type == PoolingType::MAX;
167     },
168     nullptr,
__anon308608e91202() 169     [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * {
170       auto strat = new sve_fp16_nhwc_max_generic_depthfirst(args.cpu_info);
171       return new PoolingDepthfirstGeneric<__fp16>(strat, args);
172     },
173   },
174 #endif  // defined(ARM_COMPUTE_ENABLE_SVE)
175 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
176   {
177     PoolingMethod::DEPTHFIRST,
178     "a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst",
179     is_supported<a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst>,
180     nullptr,
__anon308608e91302() 181     [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * {
182       auto strat = new a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst(args.cpu_info);
183       return new PoolingDepthfirst<__fp16>(strat, args);
184     },
185   },
186   {
187     PoolingMethod::DEPTHFIRST,
188     "a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst",
189     is_supported<a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst>,
190     nullptr,
__anon308608e91402() 191     [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * {
192       auto strat = new a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst(args.cpu_info);
193       return new PoolingDepthfirst<__fp16>(strat, args);
194     },
195   },
196   {
197     PoolingMethod::DEPTHFIRST,
198     "a64_fp16_nhwc_avg_generic_depthfirst",
__anon308608e91502() 199     [] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::AVERAGE; },
200     nullptr,
__anon308608e91602() 201     [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * {
202       auto strat = new a64_fp16_nhwc_avg_generic_depthfirst(args.cpu_info);
203       return new PoolingDepthfirstGeneric<__fp16>(strat, args);
204     },
205   },
206   {
207     PoolingMethod::DEPTHFIRST,
208     "a64_fp16_nhwc_max_generic_depthfirst",
__anon308608e91702() 209     [] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::MAX; },
210     nullptr,
__anon308608e91802() 211     [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<__fp16, __fp16> * {
212       auto strat = new a64_fp16_nhwc_max_generic_depthfirst(args.cpu_info);
213       return new PoolingDepthfirstGeneric<__fp16>(strat, args);
214     },
215   },
216 #endif  // defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
217 #endif  // defined(__aarch64__)
218   { PoolingMethod::DEFAULT, "", nullptr, nullptr, nullptr },  // End of list
219 };
220 
221 template <>
pooling_implementation_list()222 const PoolingImplementation<__fp16, __fp16> *pooling_implementation_list()
223 {
224   return pooling_fp16_methods;
225 }
226 
227 template UniquePoolingCommon<__fp16, __fp16> pooling(const PoolingArgs &, const Nothing &);
228 
229 }  //  namespace pooling
230 }  //  namespace arm_conv
231 
232 #endif // __ARM_FP16_ARGS
233