1 // Copyright 2022 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <array>
8 #include <cassert>
9 #include <cstddef>
10 #include <cstdint>
11 #include <limits>
12 #include <memory>
13 #include <numeric>
14 #include <random>
15
16 #include <xnnpack.h>
17 #include <xnnpack/node-type.h>
18 #include <xnnpack/operator.h>
19 #include <xnnpack/subgraph.h>
20
21 #include <gtest/gtest.h>
22
23 template <typename T> class DepthToSpaceTest : public ::testing::Test {
24 protected:
DepthToSpaceTest()25 DepthToSpaceTest()
26 {
27 random_device = std::unique_ptr<std::random_device>(new std::random_device());
28 rng = std::mt19937((*random_device)());
29 dim_dist = std::uniform_int_distribution<size_t>(1, 9);
30 i8dist =
31 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max());
32 u8dist =
33 std::uniform_int_distribution<int32_t>(std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max());
34 scale_dist = std::uniform_real_distribution<float>(0.1f, 10.0f);
35 f32dist = std::uniform_real_distribution<float>(0.01f, 1.0f);
36
37 input_dims = RandomShape(4);
38 block_size = std::uniform_int_distribution<uint32_t>(2, 10)(rng);
39 uint32_t output_channels = dim_dist(rng);
40 output_dims = {input_dims[0], input_dims[1] * block_size, input_dims[2] * block_size, output_channels};
41 input_dims[3] = block_size * block_size * output_channels;
42
43 size_t num_output_elements = NumElements(output_dims);
44 input = std::vector<T>(NumElements(input_dims) + XNN_EXTRA_BYTES / sizeof(T));
45 operator_output = std::vector<T>(num_output_elements);
46 subgraph_output = std::vector<T>(num_output_elements);
47 }
48
NumElements(std::vector<size_t> & dims)49 size_t NumElements(std::vector<size_t>& dims)
50 {
51 return std::accumulate(dims.begin(), dims.end(), size_t(1), std::multiplies<size_t>());
52 }
53
RandomShape(size_t num_dims)54 std::vector<size_t> RandomShape(size_t num_dims)
55 {
56 std::vector<size_t> dims(num_dims);
57 std::generate(dims.begin(), dims.end(), [&] { return dim_dist(rng); });
58 return dims;
59 }
60
batch_size()61 size_t batch_size()
62 {
63 assert(input_dims[0] == output_dims[0]);
64 return input_dims[0];
65 }
66
input_height()67 size_t input_height() { return input_dims[1]; }
input_width()68 size_t input_width() { return input_dims[2]; }
input_channel()69 size_t input_channel() { return input_dims[3]; }
output_channel()70 size_t output_channel() { return output_dims[3]; }
71
72 std::unique_ptr<std::random_device> random_device;
73 std::mt19937 rng;
74 std::uniform_int_distribution<size_t> dim_dist;
75 std::uniform_real_distribution<float> scale_dist;
76 std::uniform_int_distribution<int32_t> i8dist;
77 std::uniform_int_distribution<int32_t> u8dist;
78 std::uniform_real_distribution<float> f32dist;
79
80 std::vector<size_t> input_dims;
81 std::vector<size_t> output_dims;
82
83 std::vector<T> input;
84 std::vector<T> operator_output;
85 std::vector<T> subgraph_output;
86
87 uint32_t block_size;
88
89 uint32_t input_id;
90 uint32_t output_id;
91 };
92
93 using DepthToSpaceTestQS8 = DepthToSpaceTest<int8_t>;
94 using DepthToSpaceTestQU8 = DepthToSpaceTest<uint8_t>;
95 using DepthToSpaceTestF32 = DepthToSpaceTest<float>;
96
TEST_F(DepthToSpaceTestQS8,define)97 TEST_F(DepthToSpaceTestQS8, define)
98 {
99 const int32_t input_zero_point = i8dist(rng);
100 const float input_scale = scale_dist(rng);
101 const int32_t output_zero_point = input_zero_point;
102 const float output_scale = input_scale;
103
104 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
105
106 xnn_subgraph_t subgraph = nullptr;
107 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph));
108 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
109
110 input_id = XNN_INVALID_NODE_ID;
111 ASSERT_EQ(
112 xnn_status_success, xnn_define_quantized_tensor_value(
113 subgraph, xnn_datatype_qint8, input_zero_point, input_scale, input_dims.size(),
114 input_dims.data(), nullptr, 0, /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
115 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
116
117 output_id = XNN_INVALID_NODE_ID;
118 ASSERT_EQ(
119 xnn_status_success, xnn_define_quantized_tensor_value(
120 subgraph, xnn_datatype_qint8, output_zero_point, output_scale, output_dims.size(),
121 output_dims.data(), nullptr, 1, /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
122 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
123
124 ASSERT_EQ(xnn_status_success, xnn_define_depth_to_space(subgraph, input_id, output_id, block_size, /*flags=*/0));
125
126 ASSERT_EQ(subgraph->num_nodes, 1);
127 const struct xnn_node* node = &subgraph->nodes[0];
128 ASSERT_EQ(node->type, xnn_node_type_depth_to_space);
129 ASSERT_EQ(node->compute_type, xnn_compute_type_qs8);
130 ASSERT_EQ(node->num_inputs, 1);
131 ASSERT_EQ(node->inputs[0], input_id);
132 ASSERT_EQ(node->num_outputs, 1);
133 ASSERT_EQ(node->outputs[0], output_id);
134 ASSERT_EQ(node->flags, 0);
135 }
136
TEST_F(DepthToSpaceTestQU8,define)137 TEST_F(DepthToSpaceTestQU8, define)
138 {
139 const int32_t input_zero_point = u8dist(rng);
140 const float input_scale = scale_dist(rng);
141 const int32_t output_zero_point = input_zero_point;
142 const float output_scale = input_scale;
143 uint32_t block_size = std::uniform_int_distribution<uint32_t>(2, 10)(rng);
144
145 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
146
147 xnn_subgraph_t subgraph = nullptr;
148 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph));
149 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
150
151 input_id = XNN_INVALID_NODE_ID;
152 ASSERT_EQ(
153 xnn_status_success, xnn_define_quantized_tensor_value(
154 subgraph, xnn_datatype_quint8, input_zero_point, input_scale, input_dims.size(),
155 input_dims.data(), nullptr, 0, /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
156 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
157
158 output_id = XNN_INVALID_NODE_ID;
159 ASSERT_EQ(
160 xnn_status_success, xnn_define_quantized_tensor_value(
161 subgraph, xnn_datatype_quint8, output_zero_point, output_scale, output_dims.size(),
162 output_dims.data(), nullptr, 1, /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
163 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
164
165 ASSERT_EQ(xnn_status_success, xnn_define_depth_to_space(subgraph, input_id, output_id, block_size, /*flags=*/0));
166
167 ASSERT_EQ(subgraph->num_nodes, 1);
168 const struct xnn_node* node = &subgraph->nodes[0];
169 ASSERT_EQ(node->type, xnn_node_type_depth_to_space);
170 ASSERT_EQ(node->compute_type, xnn_compute_type_qu8);
171 ASSERT_EQ(node->num_inputs, 1);
172 ASSERT_EQ(node->inputs[0], input_id);
173 ASSERT_EQ(node->num_outputs, 1);
174 ASSERT_EQ(node->outputs[0], output_id);
175 ASSERT_EQ(node->flags, 0);
176 }
177
TEST_F(DepthToSpaceTestF32,define)178 TEST_F(DepthToSpaceTestF32, define)
179 {
180 uint32_t block_size = std::uniform_int_distribution<uint32_t>(2, 10)(rng);
181
182 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
183
184 xnn_subgraph_t subgraph = nullptr;
185 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph));
186 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
187
188 input_id = XNN_INVALID_NODE_ID;
189 ASSERT_EQ(
190 xnn_status_success, xnn_define_tensor_value(
191 subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr, 0,
192 /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
193 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
194
195 output_id = XNN_INVALID_NODE_ID;
196 ASSERT_EQ(
197 xnn_status_success, xnn_define_tensor_value(
198 subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr, 1,
199 /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
200 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
201
202 ASSERT_EQ(xnn_status_success, xnn_define_depth_to_space(subgraph, input_id, output_id, block_size, /*flags=*/0));
203
204 ASSERT_EQ(subgraph->num_nodes, 1);
205 const struct xnn_node* node = &subgraph->nodes[0];
206 ASSERT_EQ(node->type, xnn_node_type_depth_to_space);
207 ASSERT_EQ(node->compute_type, xnn_compute_type_fp32);
208 ASSERT_EQ(node->num_inputs, 1);
209 ASSERT_EQ(node->inputs[0], input_id);
210 ASSERT_EQ(node->num_outputs, 1);
211 ASSERT_EQ(node->outputs[0], output_id);
212 ASSERT_EQ(node->flags, 0);
213 }
214
TEST_F(DepthToSpaceTestQS8,matches_operator_api)215 TEST_F(DepthToSpaceTestQS8, matches_operator_api)
216 {
217 const int32_t input_zero_point = i8dist(rng);
218 const float input_scale = scale_dist(rng);
219 const int32_t output_zero_point = input_zero_point;
220 const float output_scale = input_scale;
221 std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); });
222 std::fill(operator_output.begin(), operator_output.end(), INT8_C(0xA5));
223 std::fill(subgraph_output.begin(), subgraph_output.end(), INT8_C(0xA5));
224
225 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
226
227 // Call operator API.
228 xnn_operator_t op = nullptr;
229 const xnn_status status = xnn_create_depth_to_space_nhwc_x8(
230 output_channel(), input_channel(), output_channel(), block_size, /*flags=*/0, &op);
231 if (status == xnn_status_unsupported_hardware) {
232 GTEST_SKIP();
233 }
234 ASSERT_EQ(xnn_status_success, status);
235 ASSERT_NE(nullptr, op);
236 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
237
238 ASSERT_EQ(
239 xnn_status_success,
240 xnn_setup_depth_to_space_nhwc_x8(
241 op, batch_size(), input_height(), input_width(), input.data(), operator_output.data(), /*threadpool=*/nullptr));
242 ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
243
244 // Call subgraph API.
245 xnn_subgraph_t subgraph = nullptr;
246 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph));
247 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
248 input_id = XNN_INVALID_NODE_ID;
249 ASSERT_EQ(
250 xnn_status_success,
251 xnn_define_quantized_tensor_value(
252 subgraph, xnn_datatype_qint8, input_zero_point, input_scale, input_dims.size(), input_dims.data(), nullptr,
253 /*external_id=*/0, /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
254 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
255
256 output_id = XNN_INVALID_NODE_ID;
257 ASSERT_EQ(
258 xnn_status_success,
259 xnn_define_quantized_tensor_value(
260 subgraph, xnn_datatype_qint8, output_zero_point, output_scale, output_dims.size(), output_dims.data(), nullptr,
261 /*external_id=*/1, /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
262 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
263
264 ASSERT_EQ(xnn_status_success, xnn_define_depth_to_space(subgraph, input_id, output_id, block_size, /*flags=*/0));
265
266 xnn_runtime_t runtime = nullptr;
267 ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
268 ASSERT_NE(nullptr, runtime);
269 std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
270
271 std::array<xnn_external_value, 2> external = {
272 xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
273 ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
274 ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
275
276 ASSERT_EQ(subgraph_output, operator_output);
277 }
278
TEST_F(DepthToSpaceTestQU8,matches_operator_api)279 TEST_F(DepthToSpaceTestQU8, matches_operator_api)
280 {
281 const int32_t input_zero_point = u8dist(rng);
282 const float input_scale = scale_dist(rng);
283 const int32_t output_zero_point = input_zero_point;
284 const float output_scale = input_scale;
285 std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); });
286 std::fill(operator_output.begin(), operator_output.end(), UINT8_C(0xA5));
287 std::fill(subgraph_output.begin(), subgraph_output.end(), UINT8_C(0xA5));
288
289 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
290
291 // Call operator API.
292 xnn_operator_t op = nullptr;
293 const xnn_status status = xnn_create_depth_to_space_nhwc_x8(
294 output_channel(), input_channel(), output_channel(), block_size, /*flags=*/0, &op);
295 if (status == xnn_status_unsupported_hardware) {
296 GTEST_SKIP();
297 }
298 ASSERT_EQ(xnn_status_success, status);
299 ASSERT_NE(nullptr, op);
300 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
301
302 ASSERT_EQ(
303 xnn_status_success,
304 xnn_setup_depth_to_space_nhwc_x8(
305 op, batch_size(), input_height(), input_width(), input.data(), operator_output.data(), /*threadpool=*/nullptr));
306 ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
307
308 // Call subgraph API.
309 xnn_subgraph_t subgraph = nullptr;
310 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph));
311 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
312 input_id = XNN_INVALID_NODE_ID;
313 ASSERT_EQ(
314 xnn_status_success,
315 xnn_define_quantized_tensor_value(
316 subgraph, xnn_datatype_quint8, input_zero_point, input_scale, input_dims.size(), input_dims.data(), nullptr,
317 /*external_id=*/0, /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
318 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
319
320 output_id = XNN_INVALID_NODE_ID;
321 ASSERT_EQ(
322 xnn_status_success,
323 xnn_define_quantized_tensor_value(
324 subgraph, xnn_datatype_quint8, output_zero_point, output_scale, output_dims.size(), output_dims.data(), nullptr,
325 /*external_id=*/1, /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
326 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
327
328 ASSERT_EQ(xnn_status_success, xnn_define_depth_to_space(subgraph, input_id, output_id, block_size, /*flags=*/0));
329
330 xnn_runtime_t runtime = nullptr;
331 ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
332 ASSERT_NE(nullptr, runtime);
333 std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
334
335 std::array<xnn_external_value, 2> external = {
336 xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
337 ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
338 ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
339
340 ASSERT_EQ(subgraph_output, operator_output);
341 }
342
TEST_F(DepthToSpaceTestF32,matches_operator_api)343 TEST_F(DepthToSpaceTestF32, matches_operator_api)
344 {
345 std::uniform_real_distribution<float> f32dist(-255.0f, 255.0f);
346 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); });
347 std::fill(operator_output.begin(), operator_output.end(), nanf(""));
348 std::fill(subgraph_output.begin(), subgraph_output.end(), nanf(""));
349
350 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
351
352 // Call operator API.
353 xnn_operator_t op = nullptr;
354 const xnn_status status = xnn_create_depth_to_space_nhwc_x32(
355 output_channel(), input_channel(), output_channel(), block_size, /*flags=*/0, &op);
356 if (status == xnn_status_unsupported_hardware) {
357 GTEST_SKIP();
358 }
359
360 ASSERT_EQ(xnn_status_success, status);
361 ASSERT_NE(nullptr, op);
362 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
363
364 ASSERT_EQ(
365 xnn_status_success,
366 xnn_setup_depth_to_space_nhwc_x32(
367 op, batch_size(), input_height(), input_width(), input.data(), operator_output.data(), /*threadpool=*/nullptr));
368
369 ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
370
371 // Call subgraph API.
372 xnn_subgraph_t subgraph = nullptr;
373 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(/*external_value_ids=*/2, /*flags=*/0, &subgraph));
374 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
375 input_id = XNN_INVALID_NODE_ID;
376 ASSERT_EQ(
377 xnn_status_success, xnn_define_tensor_value(
378 subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr, /*external_id=*/0,
379 /*flags=*/XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
380 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
381
382 output_id = XNN_INVALID_NODE_ID;
383 ASSERT_EQ(
384 xnn_status_success,
385 xnn_define_tensor_value(
386 subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr, /*external_id=*/1,
387 /*flags=*/XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
388 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
389
390 xnn_runtime_t runtime = nullptr;
391 ASSERT_EQ(xnn_status_success, xnn_define_depth_to_space(subgraph, input_id, output_id, block_size, /*flags=*/0));
392 ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
393 ASSERT_NE(nullptr, runtime);
394 std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
395 std::array<xnn_external_value, 2> external = {
396 xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
397 ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
398 ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
399
400 ASSERT_EQ(subgraph_output, operator_output);
401 }
402