xref: /aosp_15_r20/external/ComputeLibrary/src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2018-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/core/NEON/kernels/NEPriorBoxLayerKernel.h"
25 
26 #include "arm_compute/core/Helpers.h"
27 #include "arm_compute/core/ITensor.h"
28 #include "arm_compute/core/Types.h"
29 #include "arm_compute/core/Validate.h"
30 #include "src/core/helpers/AutoConfiguration.h"
31 #include "src/core/helpers/WindowHelpers.h"
32 
33 #include <arm_neon.h>
34 
35 namespace arm_compute
36 {
37 namespace
38 {
validate_arguments(const ITensorInfo * input1,const ITensorInfo * input2,const ITensorInfo * output,const PriorBoxLayerInfo & info)39 Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info)
40 {
41     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
42     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::F32);
43     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input1, input2);
44     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2);
45 
46     // Check variances
47     const int var_size = info.variances().size();
48     if(var_size > 1)
49     {
50         ARM_COMPUTE_RETURN_ERROR_ON_MSG(var_size != 4, "Must provide 4 variance values");
51         for(int i = 0; i < var_size; ++i)
52         {
53             ARM_COMPUTE_RETURN_ERROR_ON_MSG(var_size <= 0, "Must be greater than 0");
54         }
55     }
56     ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.steps()[0] < 0.f, "Step x should be greater or equal to 0");
57     ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.steps()[1] < 0.f, "Step y should be greater or equal to 0");
58 
59     if(!info.max_sizes().empty())
60     {
61         ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.max_sizes().size() != info.min_sizes().size(), "Max and min sizes dimensions should match");
62     }
63 
64     for(unsigned int i = 0; i < info.max_sizes().size(); ++i)
65     {
66         ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.max_sizes()[i] < info.min_sizes()[i], "Max size should be greater than min size");
67     }
68 
69     if(output != nullptr && output->total_size() != 0)
70     {
71         ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != 2);
72         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, output);
73     }
74 
75     return Status{};
76 }
77 } // namespace
78 
NEPriorBoxLayerKernel()79 NEPriorBoxLayerKernel::NEPriorBoxLayerKernel()
80     : _input1(nullptr), _input2(nullptr), _output(nullptr), _info()
81 {
82 }
83 
store_coordinates(float * out,const int offset,const float center_x,const float center_y,const float box_width,const float box_height,const int width,const int height)84 void NEPriorBoxLayerKernel::store_coordinates(float *out, const int offset, const float center_x, const float center_y, const float box_width, const float box_height, const int width,
85                                               const int height)
86 {
87     float xmin = (center_x - box_width / 2.f) / width;
88     float ymin = (center_y - box_height / 2.f) / height;
89     float xmax = (center_x + box_width / 2.f) / width;
90     float ymax = (center_y + box_height / 2.f) / height;
91 
92     float32x4_t vec_elements = { xmin, ymin, xmax, ymax };
93     if(_info.clip())
94     {
95         static const float32x4_t CONST_0 = vdupq_n_f32(0.f);
96         static const float32x4_t CONST_1 = vdupq_n_f32(1.f);
97         vec_elements                     = vmaxq_f32(vminq_f32(vec_elements, CONST_1), CONST_0);
98     }
99     vst1q_f32(out + offset, vec_elements);
100 }
101 
calculate_prior_boxes(const Window & window)102 void NEPriorBoxLayerKernel::calculate_prior_boxes(const Window &window)
103 {
104     const int num_priors = _info.aspect_ratios().size() * _info.min_sizes().size() + _info.max_sizes().size();
105 
106     const DataLayout data_layout = _input1->info()->data_layout();
107     const int        width_idx   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
108     const int        height_idx  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
109 
110     const int layer_width  = _input1->info()->dimension(width_idx);
111     const int layer_height = _input1->info()->dimension(height_idx);
112 
113     int img_width  = _info.img_size().x;
114     int img_height = _info.img_size().y;
115     if(img_width == 0 || img_height == 0)
116     {
117         img_width  = _input2->info()->dimension(width_idx);
118         img_height = _input2->info()->dimension(height_idx);
119     }
120 
121     float step_x = _info.steps()[0];
122     float step_y = _info.steps()[1];
123     if(step_x == 0.f || step_y == 0.f)
124     {
125         step_x = static_cast<float>(img_width) / layer_width;
126         step_y = static_cast<float>(img_height) / layer_height;
127     }
128 
129     Window slice = window.first_slice_window_2D();
130     slice.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 2));
131 
132     Iterator output(_output, slice);
133     execute_window_loop(slice, [&](const Coordinates & id)
134     {
135         float center_x = 0;
136         float center_y = 0;
137         int   idx      = id.x() / (4 * num_priors);
138         center_x       = (static_cast<float>(idx % layer_width) + _info.offset()) * step_x;
139         center_y       = (static_cast<float>(idx / layer_width) + _info.offset()) * step_y;
140 
141         float box_width;
142         float box_height;
143         int   offset = 0;
144 
145         auto out = reinterpret_cast<float *>(output.ptr());
146         for(unsigned int i = 0; i < _info.min_sizes().size(); ++i)
147         {
148             const float min_size = _info.min_sizes().at(i);
149             box_width            = min_size;
150             box_height           = min_size;
151             store_coordinates(out, offset, center_x, center_y, box_width, box_height, img_width, img_height);
152             offset += 4;
153 
154             if(!_info.max_sizes().empty())
155             {
156                 const float max_size = _info.max_sizes().at(i);
157                 box_width            = std::sqrt(min_size * max_size);
158                 box_height           = box_width;
159 
160                 store_coordinates(out, offset, center_x, center_y, box_width, box_height, img_width, img_height);
161                 offset += 4;
162             }
163 
164             // rest of priors
165             for(auto ar : _info.aspect_ratios())
166             {
167                 if(fabs(ar - 1.) < 1e-6)
168                 {
169                     continue;
170                 }
171 
172                 box_width  = min_size * sqrt(ar);
173                 box_height = min_size / sqrt(ar);
174 
175                 store_coordinates(out, offset, center_x, center_y, box_width, box_height, img_width, img_height);
176                 offset += 4;
177             }
178         }
179 
180         // set the variance
181         out = reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(id.x(), 1)));
182         float32x4_t var;
183         if(_info.variances().size() == 1)
184         {
185             var = vdupq_n_f32(_info.variances().at(0));
186         }
187         else
188         {
189             const float32x4_t vars = { _info.variances().at(0), _info.variances().at(1), _info.variances().at(2), _info.variances().at(3) };
190             var                    = vars;
191         }
192         for(int i = 0; i < num_priors; ++i)
193         {
194             vst1q_f32(out + 4 * i, var);
195         }
196     },
197     output);
198 }
199 
configure(const ITensor * input1,const ITensor * input2,ITensor * output,const PriorBoxLayerInfo & info)200 void NEPriorBoxLayerKernel::configure(const ITensor *input1, const ITensor *input2, ITensor *output, const PriorBoxLayerInfo &info)
201 {
202     ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
203 
204     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), output->info(), info));
205 
206     _input1 = input1;
207     _input2 = input2;
208     _info   = info;
209     _output = output;
210 
211     // Configure kernel window
212     const int num_priors = info.aspect_ratios().size() * info.min_sizes().size() + info.max_sizes().size();
213     Window    win        = calculate_max_window(*output->info(), Steps(num_priors * 4));
214 
215     INEKernel::configure(win);
216 }
217 
validate(const ITensorInfo * input1,const ITensorInfo * input2,const ITensorInfo * output,const PriorBoxLayerInfo & info)218 Status NEPriorBoxLayerKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info)
219 {
220     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
221     ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output, info));
222 
223     return Status{};
224 }
run(const Window & window,const ThreadInfo & info)225 void NEPriorBoxLayerKernel::run(const Window &window, const ThreadInfo &info)
226 {
227     ARM_COMPUTE_UNUSED(info);
228     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
229     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
230 
231     // Run function
232     calculate_prior_boxes(window);
233 }
234 } // namespace arm_compute