xref: /aosp_15_r20/external/ComputeLibrary/src/core/CL/kernels/CLFillBorderKernel.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2016-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/core/CL/kernels/CLFillBorderKernel.h"
25 
26 #include "arm_compute/core/CL/CLHelpers.h"
27 #include "arm_compute/core/CL/CLKernelLibrary.h"
28 #include "arm_compute/core/CL/ICLTensor.h"
29 #include "arm_compute/core/CL/OpenCL.h"
30 #include "arm_compute/core/TensorInfo.h"
31 #include "arm_compute/core/Utils.h"
32 #include "arm_compute/core/Validate.h"
33 #include "src/core/helpers/WindowHelpers.h"
34 #include "support/Cast.h"
35 #include "support/StringSupport.h"
36 
37 namespace arm_compute
38 {
CLFillBorderKernel()39 CLFillBorderKernel::CLFillBorderKernel()
40     : ICLKernel(), _tensor(nullptr)
41 {
42     _type = CLKernelType::ELEMENTWISE;
43 }
44 
is_parallelisable() const45 bool CLFillBorderKernel::is_parallelisable() const
46 {
47     return false;
48 }
49 
50 template <class T>
set_constant_border(unsigned int idx,const PixelValue & constant_border_value)51 void CLFillBorderKernel::set_constant_border(unsigned int idx, const PixelValue &constant_border_value)
52 {
53     T value;
54     constant_border_value.get(value);
55     ICLKernel::add_argument<T>(idx, static_cast<T>(value));
56 }
57 
configure(ICLTensor * tensor,BorderSize border_size,BorderMode border_mode,const PixelValue & constant_border_value)58 void CLFillBorderKernel::configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value)
59 {
60     configure(CLKernelLibrary::get().get_compile_context(), tensor, border_size, border_mode, constant_border_value);
61 }
62 
configure(const CLCompileContext & compile_context,ICLTensor * tensor,BorderSize border_size,BorderMode border_mode,const PixelValue & constant_border_value)63 void CLFillBorderKernel::configure(const CLCompileContext &compile_context, ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value)
64 {
65     _tensor = tensor;
66     configure(compile_context, tensor->info(), border_size, border_mode, constant_border_value);
67 }
68 
configure(const CLCompileContext & compile_context,ITensorInfo * tensor,BorderSize border_size,BorderMode border_mode,const PixelValue & constant_border_value)69 void CLFillBorderKernel::configure(const CLCompileContext &compile_context, ITensorInfo *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value)
70 {
71     ARM_COMPUTE_ERROR_ON(tensor == nullptr);
72     ARM_COMPUTE_ERROR_ON(tensor->num_channels() != 1);
73     auto padding_info = get_padding_info({ tensor });
74 
75     border_size.limit(tensor->padding());
76 
77     // If there is no border: early exit
78     if(border_size.empty() || border_mode == BorderMode::UNDEFINED)
79     {
80         return;
81     }
82 
83     // Select appropriate kernel
84     std::string kernel_name = "fill_image_borders_" + lower_string(string_from_border_mode(border_mode));
85 
86     const DataType dt = tensor->data_type();
87 
88     // Define build options
89     CLBuildOptions build_opts;
90     build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(dt));
91     build_opts.add_option("-DBORDER_SIZE_TOP=" + support::cpp11::to_string(border_size.top));
92     build_opts.add_option("-DBORDER_SIZE_BOTTOM=" + support::cpp11::to_string(border_size.bottom));
93     build_opts.add_option("-DBORDER_SIZE_LEFT=" + support::cpp11::to_string(border_size.left));
94     build_opts.add_option("-DBORDER_SIZE_RIGHT=" + support::cpp11::to_string(border_size.right));
95 
96     // Create kernel
97     _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
98 
99     // Create static kernel arguments
100     const unsigned int valid_width  = tensor->valid_region().shape[0];
101     const unsigned int valid_height = tensor->valid_region().shape[1];
102     const cl_int2      valid_region_coords =
103     {
104         {
105             static_cast<cl_int>(tensor->valid_region().anchor[0]),
106             static_cast<cl_int>(tensor->valid_region().anchor[1]),
107         }
108     };
109     const unsigned int total_valid_width = border_size.left + valid_width + border_size.right;
110 
111     // Set static kernel arguments
112     unsigned int idx = num_arguments_per_3D_tensor(); //Skip the tensor parameters
113     ICLKernel::add_argument<cl_uint>(idx, valid_width);
114     ICLKernel::add_argument<cl_uint>(idx, valid_height);
115     ICLKernel::add_argument<cl_int2>(idx, valid_region_coords);
116     if(BorderMode::CONSTANT == border_mode)
117     {
118         switch(dt)
119         {
120             case DataType::U8:
121             case DataType::QASYMM8:
122                 set_constant_border<uint8_t>(idx, constant_border_value);
123                 break;
124             case DataType::S8:
125             case DataType::QASYMM8_SIGNED:
126                 set_constant_border<int8_t>(idx, constant_border_value);
127                 break;
128             case DataType::U16:
129                 set_constant_border<uint16_t>(idx, constant_border_value);
130                 break;
131             case DataType::S16:
132                 set_constant_border<int16_t>(idx, constant_border_value);
133                 break;
134             case DataType::U32:
135                 set_constant_border<uint32_t>(idx, constant_border_value);
136                 break;
137             case DataType::S32:
138                 set_constant_border<int32_t>(idx, constant_border_value);
139                 break;
140             case DataType::F32:
141                 static_assert(sizeof(float) == 4, "Float must be 32 bit");
142                 set_constant_border<float>(idx, constant_border_value);
143                 break;
144             case DataType::F16:
145                 static_assert(sizeof(cl_half) == sizeof(half), "Half must be same size as cl_half");
146                 static_assert(sizeof(cl_half) == 2, "Half must be 16 bit");
147                 set_constant_border<half>(idx, constant_border_value);
148                 break;
149             default:
150                 ARM_COMPUTE_ERROR("Not handled");
151         }
152     }
153 
154     // Configure kernel window
155     Window win;
156     win.set(Window::DimX, Window::Dimension(0, total_valid_width + valid_height));
157     win.set(Window::DimY, Window::Dimension(0, 1, 1));
158     win.use_tensor_dimensions(tensor->tensor_shape(), Window::DimZ);
159     ICLKernel::configure_internal(win);
160 
161     // Set config_id for enabling LWS tuning
162     _config_id = kernel_name;
163     _config_id += "_";
164     _config_id += lower_string(string_from_data_type(dt));
165     _config_id += "_";
166     _config_id += support::cpp11::to_string(tensor->dimension(0));
167     _config_id += "_";
168     _config_id += support::cpp11::to_string(tensor->dimension(1));
169     _config_id += "_";
170     _config_id += lower_string(string_from_border_mode(border_mode));
171     ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
172 }
173 
run_op(ITensorPack & tensors,const Window & window,cl::CommandQueue & queue)174 void CLFillBorderKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
175 {
176     // Border mode undefined or border width == 0
177     if(_kernel() == nullptr)
178     {
179         return;
180     }
181 
182     const auto tensor = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
183 
184     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
185     ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
186 
187     Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
188     Window slice     = collapsed.first_slice_window_3D();
189 
190     do
191     {
192         unsigned int idx = 0;
193         add_3D_tensor_argument(idx, tensor, slice);
194         enqueue(queue, *this, slice, lws_hint());
195     }
196     while(collapsed.slide_window_slice_3D(slice));
197 }
198 
run(const Window & window,cl::CommandQueue & queue)199 void CLFillBorderKernel::run(const Window &window, cl::CommandQueue &queue)
200 {
201     // Border mode undefined or border width == 0
202     if(_kernel() == nullptr)
203     {
204         return;
205     }
206 
207     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
208     ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
209 
210     Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
211     Window slice     = collapsed.first_slice_window_3D();
212 
213     do
214     {
215         unsigned int idx = 0;
216         add_3D_tensor_argument(idx, _tensor, slice);
217         enqueue(queue, *this, slice, lws_hint());
218     }
219     while(collapsed.slide_window_slice_3D(slice));
220 }
221 } // namespace arm_compute
222