xref: /aosp_15_r20/external/ComputeLibrary/src/core/CPP/kernels/CPPUpsampleKernel.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/core/CPP/kernels/CPPUpsampleKernel.h"
25 
26 #include "arm_compute/core/Helpers.h"
27 #include "src/core/helpers/WindowHelpers.h"
28 
29 #include <cstddef>
30 #include <cstdint>
31 
32 namespace arm_compute
33 {
CPPUpsampleKernel()34 CPPUpsampleKernel::CPPUpsampleKernel()
35     : _input(nullptr), _output(nullptr), _info()
36 {
37 }
38 
is_parallelisable() const39 bool CPPUpsampleKernel::is_parallelisable() const
40 {
41     return false;
42 }
43 
configure(const ITensor * input,ITensor * output,const PadStrideInfo & info)44 void CPPUpsampleKernel::configure(const ITensor *input, ITensor *output, const PadStrideInfo &info)
45 {
46     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
47 
48     _input  = input;
49     _output = output;
50     _info   = info;
51 
52     // Configure kernel window
53     Window win = calculate_max_window(*input->info(), Steps());
54 
55     // The CPPUpsampleKernel doesn't need padding so update_window_and_padding() can be skipped
56     Coordinates coord;
57     coord.set_num_dimensions(output->info()->num_dimensions());
58     output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
59 
60     ICPPKernel::configure(win);
61 }
62 
run(const Window & window,const ThreadInfo & info)63 void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info)
64 {
65     ARM_COMPUTE_UNUSED(info);
66     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
67     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
68 
69     const DataLayout data_layout = _input->info()->data_layout();
70     const size_t     idx_w       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
71     const size_t     idx_h       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
72 
73     // Initialize _scaled_output buffer
74     const int    width_scaled  = _output->info()->dimension(idx_w);
75     const int    height_scaled = _output->info()->dimension(idx_h);
76     const int    stride_width  = _info.stride().first;
77     const int    stride_height = _info.stride().second;
78     const int    start_width   = _info.pad_left();
79     const int    start_height  = _info.pad_top();
80     const int    end_width     = width_scaled - _info.pad_right();
81     const int    end_height    = height_scaled - _info.pad_bottom();
82     const size_t element_size  = _input->info()->element_size();
83 
84     // The fill value is normally 0, but for quantized types '0' corresponds to the offset
85     switch(_output->info()->data_type())
86     {
87         case DataType::QASYMM8:
88         {
89             const uint8_t fill_value = _output->info()->quantization_info().uniform().offset;
90             std::fill_n(_output->buffer(), _output->info()->total_size(), fill_value);
91         }
92         break;
93         case DataType::QASYMM8_SIGNED:
94         {
95             const int8_t fill_value = _output->info()->quantization_info().uniform().offset;
96             std::fill_n(_output->buffer(), _output->info()->total_size(), fill_value);
97         }
98         break;
99         default:
100             std::fill_n(_output->buffer(), _output->info()->total_size(), 0);
101     }
102 
103     // Create window
104     Window window_out(window);
105     if(data_layout == DataLayout::NCHW)
106     {
107         window_out.set(Window::DimX, Window::Dimension(start_width, end_width, stride_width));
108         window_out.set(Window::DimY, Window::Dimension(start_height, end_height, stride_height));
109     }
110     else
111     {
112         window_out.set(Window::DimY, Window::Dimension(start_width, end_width, stride_width));
113         window_out.set(Window::DimZ, Window::Dimension(start_height, end_height, stride_height));
114     }
115 
116     // Create iterators
117     Iterator in(_input, window);
118     Iterator out(_output, window_out);
119 
120     execute_window_loop(window, [&](const Coordinates &)
121     {
122         memcpy(out.ptr(), in.ptr(), element_size);
123     },
124     in, out);
125 }
126 } // namespace arm_compute