xref: /aosp_15_r20/external/ComputeLibrary/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2021-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_CPU_POOL2D_ASSEMBLY_WRAPPER_KERNEL_H
25 #define ARM_COMPUTE_CPU_POOL2D_ASSEMBLY_WRAPPER_KERNEL_H
26 
27 #include "arm_compute/core/Types.h"
28 #include "src/core/NEON/kernels/assembly/pooling.hpp"
29 #include "src/core/common/Macros.h"
30 #include "src/cpu/ICpuKernel.h"
31 #include "src/cpu/kernels/CpuKernelSelectionTypes.h"
32 
33 #include "pool_common.hpp"
34 
35 namespace arm_compute
36 {
37 namespace cpu
38 {
39 namespace kernels
40 {
41 /** This class is a wrapper for the assembly kernels.
42   *
43   * Some kernels were written in assembly and highly optimised for specific
44   * CPUs like A53 or A55. The arm compute library creates an instance of
45   * CpuPool2dAssemblyWrapperKernel and other auxiliary data structures to
46   * execute a single assembly kernel in the context of an NEFunction.
47   *
48   */
49 class CpuPool2dAssemblyWrapperKernel final : public ICpuKernel<CpuPool2dAssemblyWrapperKernel>
50 {
51 public:
52     /** Constructor
53      */
54     CpuPool2dAssemblyWrapperKernel() = default;
55     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuPool2dAssemblyWrapperKernel);
56 
name()57     const char *name() const override
58     {
59         return "CpuPool2dAssemblyWrapperKernel";
60     }
61 
62     /** Initialise the kernel's src and dst.
63      *
64      * @param[in]  src      Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
65      * @param[out] dst      Destination tensor info to store the result of pooling. Data types supported: same as @p src.
66      * @param[in]  info     Pooling meta-data.
67      * @param[in]  cpu_info CPU information needed to select the most appropriate kernel.
68      */
69     void configure(const ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, const CPUInfo &cpu_info);
70 
71     /** Static function to check if given info will lead to a valid configuration
72      *
73      * Similar to CpuPool2dAssemblyWrapperKernel::configure()
74      *
75      * @return a status
76      */
77     static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info);
78 
79     // Inherited methods overridden:
80     void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
81 
82     /** Get size of the workspace needed by the assembly kernel.
83      *
84      * @param[in] num_threads Maximum number of threads that are going to be spawned.
85      *
86      * @return size of workspace
87      */
88     size_t get_working_size(unsigned int num_threads) const;
89 
90     /** Was the asm kernel successfully configured?
91      *
92      * @return True if the asm kernel is configured and ready to run
93      */
94     bool is_configured() const;
95 
96 private:
97     /** Helper function to create the assembly kernel.
98      *
99      * @param[in] src  Source tensor info.
100      * @param[in] dst  Destination tensor info.
101      * @param[in] info Pooling layer meta-data.
102      */
103     template <typename Typesrc, typename Typedst>
104     void create_arm_pooling(const ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, const CPUInfo &cpu_info);
105 
106     /** Helper function to create the assembly kernel with requantization support
107      *
108      * @param[in] src  Source tensor info.
109      * @param[in] dst  Destination tensor info.
110      * @param[in] info Pooling layer meta-data.
111      */
112     template <typename Typesrc, typename Typedst>
113     void create_arm_pooling_requant(const ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, const CPUInfo &cpu_info);
114 
115     std::unique_ptr<arm_conv::pooling::IPoolingCommon> _kernel_asm{ nullptr };
116 
117     /** Return minimum workload size of the relevant kernel
118      *
119      * @param[in] platform     The CPU platform used to create the context.
120      * @param[in] thread_count Number of threads in the execution.
121      *
122      * @return[out] small_network_mws          Minimum workload size for requsted configuration.
123      */
124     size_t get_mws(const CPUInfo &platform, size_t thread_count) const override;
125 };
126 } // namespace kernels
127 } // namespace cpu
128 } // namespace arm_compute
129 #endif /* ARM_COMPUTE_CPU_POOL2D_ASSEMBLY_WRAPPER_KERNEL_H */
130