xref: /aosp_15_r20/external/ComputeLibrary/src/cpu/kernels/elementwise_unary/generic/neon/impl.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2018-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/cpu/kernels/elementwise_unary/generic/neon/impl.h"
25 
26 namespace arm_compute
27 {
28 namespace cpu
29 {
30 template <typename ScalarType>
elementwise_op_scalar_imp(ElementWiseUnary op,const ScalarType & a)31 inline ScalarType elementwise_op_scalar_imp(ElementWiseUnary op, const ScalarType &a)
32 {
33     switch(op)
34     {
35         case ElementWiseUnary::RSQRT:
36             return 1 / sqrt(a);
37         case ElementWiseUnary::EXP:
38             return std::exp(a);
39         case ElementWiseUnary::NEG:
40             return -a;
41         case ElementWiseUnary::LOG:
42             return std::log(a);
43         case ElementWiseUnary::ABS:
44             return std::abs(a);
45         case ElementWiseUnary::ROUND:
46             return support::cpp11::nearbyint(a);
47         case ElementWiseUnary::SIN:
48             return std::sin(a);
49         default:
50             ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
51     }
52 }
53 
54 template <typename ScalarType, typename VectorType>
elementwise_op_imp(ElementWiseUnary op,const VectorType & a)55 inline VectorType elementwise_op_imp(ElementWiseUnary op, const VectorType &a)
56 {
57     switch(op)
58     {
59         case ElementWiseUnary::RSQRT:
60             return wrapper::vinvsqrt(a);
61         case ElementWiseUnary::EXP:
62             return wrapper::vexpq(a);
63         case ElementWiseUnary::NEG:
64             return wrapper::vneg(a);
65         case ElementWiseUnary::LOG:
66             return wrapper::vlog(a);
67         case ElementWiseUnary::ABS:
68             return wrapper::vabs(a);
69         case ElementWiseUnary::ROUND:
70             return wrapper::vround(a);
71         case ElementWiseUnary::SIN:
72             return wrapper::vsin(a);
73         default:
74             ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
75     }
76 }
77 
78 template <typename ScalarType>
elementwise_op(const ITensor * in,ITensor * out,const Window & window,ElementWiseUnary op)79 void elementwise_op(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op)
80 {
81     const int  window_step_x  = 16 / sizeof(ScalarType);
82     const auto window_start_x = static_cast<int>(window.x().start());
83     const auto window_end_x   = static_cast<int>(window.x().end());
84 
85     Window win = window;
86     win.set(Window::DimX, Window::Dimension(0, 1, 1));
87 
88     Iterator input(in, win);
89     Iterator output(out, win);
90 
91     execute_window_loop(win, [&](const Coordinates &)
92     {
93         auto       output_ptr = reinterpret_cast<ScalarType *>(output.ptr());
94         const auto input_ptr  = reinterpret_cast<const ScalarType *>(input.ptr());
95 
96         int x = window_start_x;
97         for(; x <= window_end_x - window_step_x; x += window_step_x)
98         {
99             wrapper::vstore(output_ptr + x, elementwise_op_imp<ScalarType>(op, wrapper::vloadq(input_ptr + x)));
100         }
101         for(; x < window_end_x; ++x)
102         {
103             *(output_ptr + x) = elementwise_op_scalar_imp(op, *(input_ptr + x));
104         }
105     },
106     input, output);
107 }
108 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
109 template void elementwise_op<__fp16>(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op);
110 #endif //defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
111 template void elementwise_op<float>(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op);
112 template void elementwise_op<int32_t>(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op);
113 
114 } // namespace cpu
115 } // namespace arm_compute
116