1 /*
2 * Copyright (c) 2018-2021 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #include "arm_compute/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.h"
25
26 #include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h"
27 #include "arm_compute/runtime/Scheduler.h"
28
29 #include "src/common/utils/Log.h"
30
31 namespace arm_compute
32 {
33 namespace
34 {
dequantize_tensor(const ITensor * input,ITensor * output)35 void dequantize_tensor(const ITensor *input, ITensor *output)
36 {
37 const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
38 const DataType data_type = input->info()->data_type();
39
40 Window window;
41 window.use_tensor_dimensions(input->info()->tensor_shape());
42 Iterator input_it(input, window);
43 Iterator output_it(output, window);
44
45 switch(data_type)
46 {
47 case DataType::QASYMM8:
48 execute_window_loop(window, [&](const Coordinates &)
49 {
50 *reinterpret_cast<float *>(output_it.ptr()) = dequantize(*reinterpret_cast<const uint8_t *>(input_it.ptr()), qinfo.scale, qinfo.offset);
51 },
52 input_it, output_it);
53 break;
54 case DataType::QASYMM8_SIGNED:
55 execute_window_loop(window, [&](const Coordinates &)
56 {
57 *reinterpret_cast<float *>(output_it.ptr()) = dequantize_qasymm8_signed(*reinterpret_cast<const int8_t *>(input_it.ptr()), qinfo);
58 },
59 input_it, output_it);
60 break;
61 case DataType::QASYMM16:
62 execute_window_loop(window, [&](const Coordinates &)
63 {
64 *reinterpret_cast<float *>(output_it.ptr()) = dequantize(*reinterpret_cast<const uint16_t *>(input_it.ptr()), qinfo.scale, qinfo.offset);
65 },
66 input_it, output_it);
67 break;
68 default:
69 ARM_COMPUTE_ERROR("Unsupported data type");
70 }
71 }
72
quantize_tensor(const ITensor * input,ITensor * output)73 void quantize_tensor(const ITensor *input, ITensor *output)
74 {
75 const UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform();
76 const DataType data_type = output->info()->data_type();
77
78 Window window;
79 window.use_tensor_dimensions(input->info()->tensor_shape());
80 Iterator input_it(input, window);
81 Iterator output_it(output, window);
82
83 switch(data_type)
84 {
85 case DataType::QASYMM8:
86 execute_window_loop(window, [&](const Coordinates &)
87 {
88 *reinterpret_cast<uint8_t *>(output_it.ptr()) = quantize_qasymm8(*reinterpret_cast<const float *>(input_it.ptr()), qinfo);
89 },
90 input_it, output_it);
91 break;
92 case DataType::QASYMM8_SIGNED:
93 execute_window_loop(window, [&](const Coordinates &)
94 {
95 *reinterpret_cast<int8_t *>(output_it.ptr()) = quantize_qasymm8_signed(*reinterpret_cast<const float *>(input_it.ptr()), qinfo);
96 },
97 input_it, output_it);
98 break;
99 case DataType::QASYMM16:
100 execute_window_loop(window, [&](const Coordinates &)
101 {
102 *reinterpret_cast<uint16_t *>(output_it.ptr()) = quantize_qasymm16(*reinterpret_cast<const float *>(input_it.ptr()), qinfo);
103 },
104 input_it, output_it);
105 break;
106 default:
107 ARM_COMPUTE_ERROR("Unsupported data type");
108 }
109 }
110 } // namespace
111
CPPBoxWithNonMaximaSuppressionLimit(std::shared_ptr<IMemoryManager> memory_manager)112 CPPBoxWithNonMaximaSuppressionLimit::CPPBoxWithNonMaximaSuppressionLimit(std::shared_ptr<IMemoryManager> memory_manager)
113 : _memory_group(std::move(memory_manager)),
114 _box_with_nms_limit_kernel(),
115 _scores_in(),
116 _boxes_in(),
117 _batch_splits_in(),
118 _scores_out(),
119 _boxes_out(),
120 _classes(),
121 _batch_splits_out(),
122 _keeps(),
123 _scores_in_f32(),
124 _boxes_in_f32(),
125 _batch_splits_in_f32(),
126 _scores_out_f32(),
127 _boxes_out_f32(),
128 _classes_f32(),
129 _batch_splits_out_f32(),
130 _keeps_f32(),
131 _is_qasymm8(false)
132 {
133 }
134
configure(const ITensor * scores_in,const ITensor * boxes_in,const ITensor * batch_splits_in,ITensor * scores_out,ITensor * boxes_out,ITensor * classes,ITensor * batch_splits_out,ITensor * keeps,ITensor * keeps_size,const BoxNMSLimitInfo info)135 void CPPBoxWithNonMaximaSuppressionLimit::configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in,
136 ITensor *scores_out, ITensor *boxes_out, ITensor *classes, ITensor *batch_splits_out,
137 ITensor *keeps, ITensor *keeps_size, const BoxNMSLimitInfo info)
138 {
139 ARM_COMPUTE_ERROR_ON_NULLPTR(scores_in, boxes_in, scores_out, boxes_out, classes);
140 ARM_COMPUTE_LOG_PARAMS(scores_in, boxes_in, batch_splits_in, scores_out, boxes_out, classes, batch_splits_out, keeps, keeps_size, info);
141
142 _is_qasymm8 = scores_in->info()->data_type() == DataType::QASYMM8 || scores_in->info()->data_type() == DataType::QASYMM8_SIGNED;
143
144 _scores_in = scores_in;
145 _boxes_in = boxes_in;
146 _batch_splits_in = batch_splits_in;
147 _scores_out = scores_out;
148 _boxes_out = boxes_out;
149 _classes = classes;
150 _batch_splits_out = batch_splits_out;
151 _keeps = keeps;
152
153 if(_is_qasymm8)
154 {
155 // Manage intermediate buffers
156 _memory_group.manage(&_scores_in_f32);
157 _memory_group.manage(&_boxes_in_f32);
158 _memory_group.manage(&_scores_out_f32);
159 _memory_group.manage(&_boxes_out_f32);
160 _memory_group.manage(&_classes_f32);
161 _scores_in_f32.allocator()->init(scores_in->info()->clone()->set_data_type(DataType::F32));
162 _boxes_in_f32.allocator()->init(boxes_in->info()->clone()->set_data_type(DataType::F32));
163 if(batch_splits_in != nullptr)
164 {
165 _memory_group.manage(&_batch_splits_in_f32);
166 _batch_splits_in_f32.allocator()->init(batch_splits_in->info()->clone()->set_data_type(DataType::F32));
167 }
168 _scores_out_f32.allocator()->init(scores_out->info()->clone()->set_data_type(DataType::F32));
169 _boxes_out_f32.allocator()->init(boxes_out->info()->clone()->set_data_type(DataType::F32));
170 _classes_f32.allocator()->init(classes->info()->clone()->set_data_type(DataType::F32));
171 if(batch_splits_out != nullptr)
172 {
173 _memory_group.manage(&_batch_splits_out_f32);
174 _batch_splits_out_f32.allocator()->init(batch_splits_out->info()->clone()->set_data_type(DataType::F32));
175 }
176 if(keeps != nullptr)
177 {
178 _memory_group.manage(&_keeps_f32);
179 _keeps_f32.allocator()->init(keeps->info()->clone()->set_data_type(DataType::F32));
180 }
181
182 _box_with_nms_limit_kernel.configure(&_scores_in_f32, &_boxes_in_f32, (batch_splits_in != nullptr) ? &_batch_splits_in_f32 : nullptr,
183 &_scores_out_f32, &_boxes_out_f32, &_classes_f32,
184 (batch_splits_out != nullptr) ? &_batch_splits_out_f32 : nullptr, (keeps != nullptr) ? &_keeps_f32 : nullptr,
185 keeps_size, info);
186 }
187 else
188 {
189 _box_with_nms_limit_kernel.configure(scores_in, boxes_in, batch_splits_in, scores_out, boxes_out, classes, batch_splits_out, keeps, keeps_size, info);
190 }
191
192 if(_is_qasymm8)
193 {
194 _scores_in_f32.allocator()->allocate();
195 _boxes_in_f32.allocator()->allocate();
196 if(_batch_splits_in != nullptr)
197 {
198 _batch_splits_in_f32.allocator()->allocate();
199 }
200 _scores_out_f32.allocator()->allocate();
201 _boxes_out_f32.allocator()->allocate();
202 _classes_f32.allocator()->allocate();
203 if(batch_splits_out != nullptr)
204 {
205 _batch_splits_out_f32.allocator()->allocate();
206 }
207 if(keeps != nullptr)
208 {
209 _keeps_f32.allocator()->allocate();
210 }
211 }
212 }
213
validate(const ITensorInfo * scores_in,const ITensorInfo * boxes_in,const ITensorInfo * batch_splits_in,const ITensorInfo * scores_out,const ITensorInfo * boxes_out,const ITensorInfo * classes,const ITensorInfo * batch_splits_out,const ITensorInfo * keeps,const ITensorInfo * keeps_size,const BoxNMSLimitInfo info)214 Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes,
215 const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)
216 {
217 ARM_COMPUTE_UNUSED(batch_splits_in, batch_splits_out, keeps, keeps_size, info);
218 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(scores_in, boxes_in, scores_out, boxes_out, classes);
219 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(scores_in, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
220
221 const bool is_qasymm8 = scores_in->data_type() == DataType::QASYMM8 || scores_in->data_type() == DataType::QASYMM8_SIGNED;
222 if(is_qasymm8)
223 {
224 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(boxes_in, 1, DataType::QASYMM16);
225 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(boxes_in, boxes_out);
226 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(boxes_in, boxes_out);
227 const UniformQuantizationInfo boxes_qinfo = boxes_in->quantization_info().uniform();
228 ARM_COMPUTE_RETURN_ERROR_ON(boxes_qinfo.scale != 0.125f);
229 ARM_COMPUTE_RETURN_ERROR_ON(boxes_qinfo.offset != 0);
230 }
231
232 return Status{};
233 }
234
run()235 void CPPBoxWithNonMaximaSuppressionLimit::run()
236 {
237 // Acquire all the temporaries
238 MemoryGroupResourceScope scope_mg(_memory_group);
239
240 if(_is_qasymm8)
241 {
242 dequantize_tensor(_scores_in, &_scores_in_f32);
243 dequantize_tensor(_boxes_in, &_boxes_in_f32);
244 if(_batch_splits_in != nullptr)
245 {
246 dequantize_tensor(_batch_splits_in, &_batch_splits_in_f32);
247 }
248 }
249
250 Scheduler::get().schedule(&_box_with_nms_limit_kernel, Window::DimY);
251
252 if(_is_qasymm8)
253 {
254 quantize_tensor(&_scores_out_f32, _scores_out);
255 quantize_tensor(&_boxes_out_f32, _boxes_out);
256 quantize_tensor(&_classes_f32, _classes);
257 if(_batch_splits_out != nullptr)
258 {
259 quantize_tensor(&_batch_splits_out_f32, _batch_splits_out);
260 }
261 if(_keeps != nullptr)
262 {
263 quantize_tensor(&_keeps_f32, _keeps);
264 }
265 }
266 }
267 } // namespace arm_compute
268