1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 // Specification: test/x16-transpose.yaml
8 // Generator: tools/generate-transpose-test.py
9
10
11 #include <gtest/gtest.h>
12
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15
16 #include <xnnpack/transpose.h>
17 #include "transpose-microkernel-tester.h"
18
19
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_1_bw_2)20 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_1_bw_2) {
21 TransposeMicrokernelTester()
22 .input_stride(4)
23 .output_stride(2)
24 .block_width(2)
25 .block_height(1)
26 .element_size(2)
27 .iterations(1)
28 .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
29 }
30
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_1_2_bw_1_4)31 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_1_2_bw_1_4) {
32 for(size_t i = 1; i <= 2; ++i){
33 for(size_t j = 1; j <= 4; ++j){
34 TransposeMicrokernelTester()
35 .input_stride(j * 3)
36 .output_stride(i * 7)
37 .block_width(j)
38 .block_height(i)
39 .element_size(2)
40 .iterations(1)
41 .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
42 }
43 }
44 }
45
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_1_bw_4)46 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_1_bw_4) {
47 TransposeMicrokernelTester()
48 .input_stride(4)
49 .output_stride(1)
50 .block_width(4)
51 .block_height(1)
52 .element_size(2)
53 .iterations(1)
54 .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
55 }
56
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_1_bw_3_4)57 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_1_bw_3_4) {
58 for(size_t i = 3; i < 4; ++i){
59 TransposeMicrokernelTester()
60 .input_stride(i)
61 .output_stride(2)
62 .block_width(i)
63 .block_height(1)
64 .element_size(2)
65 .iterations(1)
66 .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
67 }
68 }
69
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_2_bw_3_4)70 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_2_bw_3_4) {
71 for(size_t i = 3; i < 4; ++i){
72 TransposeMicrokernelTester()
73 .input_stride(i)
74 .output_stride(2)
75 .block_width(i)
76 .block_height(2)
77 .element_size(2)
78 .iterations(1)
79 .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
80 }
81 }
82
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_2_bw_2)83 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_2_bw_2) {
84 TransposeMicrokernelTester()
85 .input_stride(2)
86 .output_stride(7)
87 .block_width(2)
88 .block_height(2)
89 .element_size(2)
90 .iterations(1)
91 .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
92 }
93
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_2_2_bw_2)94 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_2_2_bw_2){
95 for(size_t i = 2; i < 2; ++i){
96 TransposeMicrokernelTester()
97 .input_stride(19)
98 .output_stride(i)
99 .block_width(5)
100 .block_height(i)
101 .element_size(2)
102 .iterations(1)
103 .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
104 }
105 }
106
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_2_2_bw_4)107 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_2_2_bw_4){
108 for(size_t i = 2; i < 2; ++i){
109 TransposeMicrokernelTester()
110 .input_stride(4)
111 .output_stride(i)
112 .block_width(4)
113 .block_height(i)
114 .element_size(2)
115 .iterations(1)
116 .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
117 }
118 }
119
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_2_2_bw_3_4)120 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_2_2_bw_3_4) {
121 for(size_t i = 2; i < 2; ++i){
122 for(size_t j = 3; j < 4; ++j){
123 TransposeMicrokernelTester()
124 .input_stride(j)
125 .output_stride(i)
126 .block_width(j)
127 .block_height(i)
128 .element_size(2)
129 .iterations(1)
130 .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
131 }
132 }
133 }
134
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_1_bw_2_is_4)135 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_1_bw_2_is_4) {
136 TransposeMicrokernelTester()
137 .input_stride(4)
138 .output_stride(1)
139 .block_width(2)
140 .block_height(1)
141 .element_size(2)
142 .iterations(1)
143 .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
144 }
145
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_1_bw_2_os_2)146 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_1_bw_2_os_2) {
147 TransposeMicrokernelTester()
148 .input_stride(2)
149 .output_stride(2)
150 .block_width(2)
151 .block_height(1)
152 .element_size(2)
153 .iterations(1)
154 .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
155 }
156
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_1_bw_2_is_4_os_2)157 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_1_bw_2_is_4_os_2) {
158 TransposeMicrokernelTester()
159 .input_stride(4)
160 .output_stride(2)
161 .block_width(2)
162 .block_height(1)
163 .element_size(2)
164 .iterations(1)
165 .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
166 }
167
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_17_bw_38_ies_13)168 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_17_bw_38_ies_13) {
169 TransposeMicrokernelTester()
170 .input_stride(38)
171 .output_stride(17)
172 .block_width(38)
173 .block_height(17)
174 .element_size(2)
175 .input_element_stride(13)
176 .iterations(1)
177 .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
178 }
179
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_3_bw_10_oes_13)180 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_3_bw_10_oes_13) {
181 TransposeMicrokernelTester()
182 .input_stride(10)
183 .output_stride(3)
184 .block_width(10)
185 .block_height(3)
186 .element_size(2)
187 .output_element_stride(13)
188 .iterations(1)
189 .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
190 }
191
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_7_bw_46_ies_19_oes_15)192 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_7_bw_46_ies_19_oes_15) {
193 TransposeMicrokernelTester()
194 .input_stride(51)
195 .output_stride(13)
196 .block_width(46)
197 .block_height(7)
198 .element_size(2)
199 .input_element_stride(19)
200 .output_element_stride(15)
201 .iterations(1)
202 .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
203 }
204
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_1_bw_4)205 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_1_bw_4) {
206 TransposeMicrokernelTester()
207 .input_stride(8)
208 .output_stride(2)
209 .block_width(4)
210 .block_height(1)
211 .element_size(2)
212 .iterations(1)
213 .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
214 }
215
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_1_2_bw_1_8)216 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_1_2_bw_1_8) {
217 for(size_t i = 1; i <= 2; ++i){
218 for(size_t j = 1; j <= 8; ++j){
219 TransposeMicrokernelTester()
220 .input_stride(j * 3)
221 .output_stride(i * 7)
222 .block_width(j)
223 .block_height(i)
224 .element_size(2)
225 .iterations(1)
226 .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
227 }
228 }
229 }
230
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_1_bw_8)231 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_1_bw_8) {
232 TransposeMicrokernelTester()
233 .input_stride(8)
234 .output_stride(1)
235 .block_width(8)
236 .block_height(1)
237 .element_size(2)
238 .iterations(1)
239 .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
240 }
241
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_1_bw_5_8)242 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_1_bw_5_8) {
243 for(size_t i = 5; i < 8; ++i){
244 TransposeMicrokernelTester()
245 .input_stride(i)
246 .output_stride(2)
247 .block_width(i)
248 .block_height(1)
249 .element_size(2)
250 .iterations(1)
251 .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
252 }
253 }
254
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_2_bw_5_8)255 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_2_bw_5_8) {
256 for(size_t i = 5; i < 8; ++i){
257 TransposeMicrokernelTester()
258 .input_stride(i)
259 .output_stride(2)
260 .block_width(i)
261 .block_height(2)
262 .element_size(2)
263 .iterations(1)
264 .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
265 }
266 }
267
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_2_bw_4)268 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_2_bw_4) {
269 TransposeMicrokernelTester()
270 .input_stride(4)
271 .output_stride(7)
272 .block_width(4)
273 .block_height(2)
274 .element_size(2)
275 .iterations(1)
276 .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
277 }
278
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_2_2_bw_4)279 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_2_2_bw_4){
280 for(size_t i = 2; i < 2; ++i){
281 TransposeMicrokernelTester()
282 .input_stride(21)
283 .output_stride(i)
284 .block_width(7)
285 .block_height(i)
286 .element_size(2)
287 .iterations(1)
288 .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
289 }
290 }
291
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_2_2_bw_8)292 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_2_2_bw_8){
293 for(size_t i = 2; i < 2; ++i){
294 TransposeMicrokernelTester()
295 .input_stride(8)
296 .output_stride(i)
297 .block_width(8)
298 .block_height(i)
299 .element_size(2)
300 .iterations(1)
301 .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
302 }
303 }
304
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_2_2_bw_5_8)305 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_2_2_bw_5_8) {
306 for(size_t i = 2; i < 2; ++i){
307 for(size_t j = 5; j < 8; ++j){
308 TransposeMicrokernelTester()
309 .input_stride(j)
310 .output_stride(i)
311 .block_width(j)
312 .block_height(i)
313 .element_size(2)
314 .iterations(1)
315 .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
316 }
317 }
318 }
319
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_1_bw_4_is_8)320 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_1_bw_4_is_8) {
321 TransposeMicrokernelTester()
322 .input_stride(8)
323 .output_stride(1)
324 .block_width(4)
325 .block_height(1)
326 .element_size(2)
327 .iterations(1)
328 .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
329 }
330
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_1_bw_4_os_2)331 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_1_bw_4_os_2) {
332 TransposeMicrokernelTester()
333 .input_stride(4)
334 .output_stride(2)
335 .block_width(4)
336 .block_height(1)
337 .element_size(2)
338 .iterations(1)
339 .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
340 }
341
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_1_bw_4_is_8_os_2)342 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_1_bw_4_is_8_os_2) {
343 TransposeMicrokernelTester()
344 .input_stride(8)
345 .output_stride(2)
346 .block_width(4)
347 .block_height(1)
348 .element_size(2)
349 .iterations(1)
350 .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
351 }
352
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_17_bw_76_ies_13)353 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_17_bw_76_ies_13) {
354 TransposeMicrokernelTester()
355 .input_stride(76)
356 .output_stride(17)
357 .block_width(76)
358 .block_height(17)
359 .element_size(2)
360 .input_element_stride(13)
361 .iterations(1)
362 .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
363 }
364
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_3_bw_20_oes_13)365 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_3_bw_20_oes_13) {
366 TransposeMicrokernelTester()
367 .input_stride(20)
368 .output_stride(3)
369 .block_width(20)
370 .block_height(3)
371 .element_size(2)
372 .output_element_stride(13)
373 .iterations(1)
374 .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
375 }
376
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_7_bw_92_ies_19_oes_15)377 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_7_bw_92_ies_19_oes_15) {
378 TransposeMicrokernelTester()
379 .input_stride(97)
380 .output_stride(13)
381 .block_width(92)
382 .block_height(7)
383 .element_size(2)
384 .input_element_stride(19)
385 .output_element_stride(15)
386 .iterations(1)
387 .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
388 }
389
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_2_bw_1)390 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_2_bw_1) {
391 TransposeMicrokernelTester()
392 .input_stride(2)
393 .output_stride(4)
394 .block_width(1)
395 .block_height(2)
396 .element_size(2)
397 .iterations(1)
398 .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
399 }
400
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_1_4_bw_1_2)401 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_1_4_bw_1_2) {
402 for(size_t i = 1; i <= 4; ++i){
403 for(size_t j = 1; j <= 2; ++j){
404 TransposeMicrokernelTester()
405 .input_stride(j * 3)
406 .output_stride(i * 7)
407 .block_width(j)
408 .block_height(i)
409 .element_size(2)
410 .iterations(1)
411 .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
412 }
413 }
414 }
415
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_2_bw_2)416 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_2_bw_2) {
417 TransposeMicrokernelTester()
418 .input_stride(2)
419 .output_stride(2)
420 .block_width(2)
421 .block_height(2)
422 .element_size(2)
423 .iterations(1)
424 .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
425 }
426
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_2_bw_2_2)427 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_2_bw_2_2) {
428 for(size_t i = 2; i < 2; ++i){
429 TransposeMicrokernelTester()
430 .input_stride(i)
431 .output_stride(4)
432 .block_width(i)
433 .block_height(2)
434 .element_size(2)
435 .iterations(1)
436 .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
437 }
438 }
439
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_4_bw_2_2)440 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_4_bw_2_2) {
441 for(size_t i = 2; i < 2; ++i){
442 TransposeMicrokernelTester()
443 .input_stride(i)
444 .output_stride(4)
445 .block_width(i)
446 .block_height(4)
447 .element_size(2)
448 .iterations(1)
449 .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
450 }
451 }
452
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_4_bw_1)453 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_4_bw_1) {
454 TransposeMicrokernelTester()
455 .input_stride(1)
456 .output_stride(10)
457 .block_width(1)
458 .block_height(4)
459 .element_size(2)
460 .iterations(1)
461 .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
462 }
463
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_3_4_bw_1)464 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_3_4_bw_1){
465 for(size_t i = 3; i < 4; ++i){
466 TransposeMicrokernelTester()
467 .input_stride(18)
468 .output_stride(i)
469 .block_width(4)
470 .block_height(i)
471 .element_size(2)
472 .iterations(1)
473 .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
474 }
475 }
476
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_3_4_bw_2)477 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_3_4_bw_2){
478 for(size_t i = 3; i < 4; ++i){
479 TransposeMicrokernelTester()
480 .input_stride(2)
481 .output_stride(i)
482 .block_width(2)
483 .block_height(i)
484 .element_size(2)
485 .iterations(1)
486 .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
487 }
488 }
489
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_3_4_bw_2_2)490 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_3_4_bw_2_2) {
491 for(size_t i = 3; i < 4; ++i){
492 for(size_t j = 2; j < 2; ++j){
493 TransposeMicrokernelTester()
494 .input_stride(j)
495 .output_stride(i)
496 .block_width(j)
497 .block_height(i)
498 .element_size(2)
499 .iterations(1)
500 .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
501 }
502 }
503 }
504
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_2_bw_1_is_2)505 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_2_bw_1_is_2) {
506 TransposeMicrokernelTester()
507 .input_stride(2)
508 .output_stride(2)
509 .block_width(1)
510 .block_height(2)
511 .element_size(2)
512 .iterations(1)
513 .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
514 }
515
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_2_bw_1_os_4)516 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_2_bw_1_os_4) {
517 TransposeMicrokernelTester()
518 .input_stride(1)
519 .output_stride(4)
520 .block_width(1)
521 .block_height(2)
522 .element_size(2)
523 .iterations(1)
524 .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
525 }
526
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_2_bw_1_is_2_os_4)527 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_2_bw_1_is_2_os_4) {
528 TransposeMicrokernelTester()
529 .input_stride(2)
530 .output_stride(4)
531 .block_width(1)
532 .block_height(2)
533 .element_size(2)
534 .iterations(1)
535 .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
536 }
537
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_34_bw_19_ies_13)538 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_34_bw_19_ies_13) {
539 TransposeMicrokernelTester()
540 .input_stride(19)
541 .output_stride(34)
542 .block_width(19)
543 .block_height(34)
544 .element_size(2)
545 .input_element_stride(13)
546 .iterations(1)
547 .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
548 }
549
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_6_bw_5_oes_13)550 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_6_bw_5_oes_13) {
551 TransposeMicrokernelTester()
552 .input_stride(5)
553 .output_stride(6)
554 .block_width(5)
555 .block_height(6)
556 .element_size(2)
557 .output_element_stride(13)
558 .iterations(1)
559 .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
560 }
561
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_14_bw_23_ies_19_oes_15)562 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_14_bw_23_ies_19_oes_15) {
563 TransposeMicrokernelTester()
564 .input_stride(28)
565 .output_stride(20)
566 .block_width(23)
567 .block_height(14)
568 .element_size(2)
569 .input_element_stride(19)
570 .output_element_stride(15)
571 .iterations(1)
572 .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
573 }
574
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_2_bw_2)575 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_2_bw_2) {
576 TransposeMicrokernelTester()
577 .input_stride(4)
578 .output_stride(4)
579 .block_width(2)
580 .block_height(2)
581 .element_size(2)
582 .iterations(1)
583 .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
584 }
585
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_1_4_bw_1_4)586 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_1_4_bw_1_4) {
587 for(size_t i = 1; i <= 4; ++i){
588 for(size_t j = 1; j <= 4; ++j){
589 TransposeMicrokernelTester()
590 .input_stride(j * 3)
591 .output_stride(i * 7)
592 .block_width(j)
593 .block_height(i)
594 .element_size(2)
595 .iterations(1)
596 .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
597 }
598 }
599 }
600
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_2_bw_4)601 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_2_bw_4) {
602 TransposeMicrokernelTester()
603 .input_stride(4)
604 .output_stride(2)
605 .block_width(4)
606 .block_height(2)
607 .element_size(2)
608 .iterations(1)
609 .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
610 }
611
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_2_bw_3_4)612 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_2_bw_3_4) {
613 for(size_t i = 3; i < 4; ++i){
614 TransposeMicrokernelTester()
615 .input_stride(i)
616 .output_stride(4)
617 .block_width(i)
618 .block_height(2)
619 .element_size(2)
620 .iterations(1)
621 .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
622 }
623 }
624
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_4_bw_3_4)625 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_4_bw_3_4) {
626 for(size_t i = 3; i < 4; ++i){
627 TransposeMicrokernelTester()
628 .input_stride(i)
629 .output_stride(4)
630 .block_width(i)
631 .block_height(4)
632 .element_size(2)
633 .iterations(1)
634 .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
635 }
636 }
637
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_4_bw_2)638 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_4_bw_2) {
639 TransposeMicrokernelTester()
640 .input_stride(2)
641 .output_stride(10)
642 .block_width(2)
643 .block_height(4)
644 .element_size(2)
645 .iterations(1)
646 .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
647 }
648
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_3_4_bw_2)649 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_3_4_bw_2){
650 for(size_t i = 3; i < 4; ++i){
651 TransposeMicrokernelTester()
652 .input_stride(19)
653 .output_stride(i)
654 .block_width(5)
655 .block_height(i)
656 .element_size(2)
657 .iterations(1)
658 .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
659 }
660 }
661
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_3_4_bw_4)662 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_3_4_bw_4){
663 for(size_t i = 3; i < 4; ++i){
664 TransposeMicrokernelTester()
665 .input_stride(4)
666 .output_stride(i)
667 .block_width(4)
668 .block_height(i)
669 .element_size(2)
670 .iterations(1)
671 .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
672 }
673 }
674
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_3_4_bw_3_4)675 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_3_4_bw_3_4) {
676 for(size_t i = 3; i < 4; ++i){
677 for(size_t j = 3; j < 4; ++j){
678 TransposeMicrokernelTester()
679 .input_stride(j)
680 .output_stride(i)
681 .block_width(j)
682 .block_height(i)
683 .element_size(2)
684 .iterations(1)
685 .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
686 }
687 }
688 }
689
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_2_bw_2_is_4)690 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_2_bw_2_is_4) {
691 TransposeMicrokernelTester()
692 .input_stride(4)
693 .output_stride(2)
694 .block_width(2)
695 .block_height(2)
696 .element_size(2)
697 .iterations(1)
698 .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
699 }
700
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_2_bw_2_os_4)701 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_2_bw_2_os_4) {
702 TransposeMicrokernelTester()
703 .input_stride(2)
704 .output_stride(4)
705 .block_width(2)
706 .block_height(2)
707 .element_size(2)
708 .iterations(1)
709 .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
710 }
711
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_2_bw_2_is_4_os_4)712 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_2_bw_2_is_4_os_4) {
713 TransposeMicrokernelTester()
714 .input_stride(4)
715 .output_stride(4)
716 .block_width(2)
717 .block_height(2)
718 .element_size(2)
719 .iterations(1)
720 .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
721 }
722
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_34_bw_38_ies_13)723 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_34_bw_38_ies_13) {
724 TransposeMicrokernelTester()
725 .input_stride(38)
726 .output_stride(34)
727 .block_width(38)
728 .block_height(34)
729 .element_size(2)
730 .input_element_stride(13)
731 .iterations(1)
732 .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
733 }
734
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_6_bw_10_oes_13)735 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_6_bw_10_oes_13) {
736 TransposeMicrokernelTester()
737 .input_stride(10)
738 .output_stride(6)
739 .block_width(10)
740 .block_height(6)
741 .element_size(2)
742 .output_element_stride(13)
743 .iterations(1)
744 .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
745 }
746
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_14_bw_46_ies_19_oes_15)747 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_14_bw_46_ies_19_oes_15) {
748 TransposeMicrokernelTester()
749 .input_stride(51)
750 .output_stride(20)
751 .block_width(46)
752 .block_height(14)
753 .element_size(2)
754 .input_element_stride(19)
755 .output_element_stride(15)
756 .iterations(1)
757 .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
758 }
759
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_2_bw_4)760 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_2_bw_4) {
761 TransposeMicrokernelTester()
762 .input_stride(8)
763 .output_stride(4)
764 .block_width(4)
765 .block_height(2)
766 .element_size(2)
767 .iterations(1)
768 .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
769 }
770
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_1_4_bw_1_8)771 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_1_4_bw_1_8) {
772 for(size_t i = 1; i <= 4; ++i){
773 for(size_t j = 1; j <= 8; ++j){
774 TransposeMicrokernelTester()
775 .input_stride(j * 3)
776 .output_stride(i * 7)
777 .block_width(j)
778 .block_height(i)
779 .element_size(2)
780 .iterations(1)
781 .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
782 }
783 }
784 }
785
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_2_bw_8)786 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_2_bw_8) {
787 TransposeMicrokernelTester()
788 .input_stride(8)
789 .output_stride(2)
790 .block_width(8)
791 .block_height(2)
792 .element_size(2)
793 .iterations(1)
794 .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
795 }
796
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_2_bw_5_8)797 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_2_bw_5_8) {
798 for(size_t i = 5; i < 8; ++i){
799 TransposeMicrokernelTester()
800 .input_stride(i)
801 .output_stride(4)
802 .block_width(i)
803 .block_height(2)
804 .element_size(2)
805 .iterations(1)
806 .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
807 }
808 }
809
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_4_bw_5_8)810 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_4_bw_5_8) {
811 for(size_t i = 5; i < 8; ++i){
812 TransposeMicrokernelTester()
813 .input_stride(i)
814 .output_stride(4)
815 .block_width(i)
816 .block_height(4)
817 .element_size(2)
818 .iterations(1)
819 .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
820 }
821 }
822
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_4_bw_4)823 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_4_bw_4) {
824 TransposeMicrokernelTester()
825 .input_stride(4)
826 .output_stride(10)
827 .block_width(4)
828 .block_height(4)
829 .element_size(2)
830 .iterations(1)
831 .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
832 }
833
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_3_4_bw_4)834 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_3_4_bw_4){
835 for(size_t i = 3; i < 4; ++i){
836 TransposeMicrokernelTester()
837 .input_stride(21)
838 .output_stride(i)
839 .block_width(7)
840 .block_height(i)
841 .element_size(2)
842 .iterations(1)
843 .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
844 }
845 }
846
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_3_4_bw_8)847 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_3_4_bw_8){
848 for(size_t i = 3; i < 4; ++i){
849 TransposeMicrokernelTester()
850 .input_stride(8)
851 .output_stride(i)
852 .block_width(8)
853 .block_height(i)
854 .element_size(2)
855 .iterations(1)
856 .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
857 }
858 }
859
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_3_4_bw_5_8)860 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_3_4_bw_5_8) {
861 for(size_t i = 3; i < 4; ++i){
862 for(size_t j = 5; j < 8; ++j){
863 TransposeMicrokernelTester()
864 .input_stride(j)
865 .output_stride(i)
866 .block_width(j)
867 .block_height(i)
868 .element_size(2)
869 .iterations(1)
870 .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
871 }
872 }
873 }
874
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_2_bw_4_is_8)875 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_2_bw_4_is_8) {
876 TransposeMicrokernelTester()
877 .input_stride(8)
878 .output_stride(2)
879 .block_width(4)
880 .block_height(2)
881 .element_size(2)
882 .iterations(1)
883 .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
884 }
885
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_2_bw_4_os_4)886 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_2_bw_4_os_4) {
887 TransposeMicrokernelTester()
888 .input_stride(4)
889 .output_stride(4)
890 .block_width(4)
891 .block_height(2)
892 .element_size(2)
893 .iterations(1)
894 .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
895 }
896
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_2_bw_4_is_8_os_4)897 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_2_bw_4_is_8_os_4) {
898 TransposeMicrokernelTester()
899 .input_stride(8)
900 .output_stride(4)
901 .block_width(4)
902 .block_height(2)
903 .element_size(2)
904 .iterations(1)
905 .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
906 }
907
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_34_bw_76_ies_13)908 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_34_bw_76_ies_13) {
909 TransposeMicrokernelTester()
910 .input_stride(76)
911 .output_stride(34)
912 .block_width(76)
913 .block_height(34)
914 .element_size(2)
915 .input_element_stride(13)
916 .iterations(1)
917 .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
918 }
919
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_6_bw_20_oes_13)920 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_6_bw_20_oes_13) {
921 TransposeMicrokernelTester()
922 .input_stride(20)
923 .output_stride(6)
924 .block_width(20)
925 .block_height(6)
926 .element_size(2)
927 .output_element_stride(13)
928 .iterations(1)
929 .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
930 }
931
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_14_bw_92_ies_19_oes_15)932 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_14_bw_92_ies_19_oes_15) {
933 TransposeMicrokernelTester()
934 .input_stride(97)
935 .output_stride(20)
936 .block_width(92)
937 .block_height(14)
938 .element_size(2)
939 .input_element_stride(19)
940 .output_element_stride(15)
941 .iterations(1)
942 .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
943 }
944
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_4_bw_1)945 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_4_bw_1) {
946 TransposeMicrokernelTester()
947 .input_stride(2)
948 .output_stride(8)
949 .block_width(1)
950 .block_height(4)
951 .element_size(2)
952 .iterations(1)
953 .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
954 }
955
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_1_8_bw_1_2)956 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_1_8_bw_1_2) {
957 for(size_t i = 1; i <= 8; ++i){
958 for(size_t j = 1; j <= 2; ++j){
959 TransposeMicrokernelTester()
960 .input_stride(j * 3)
961 .output_stride(i * 7)
962 .block_width(j)
963 .block_height(i)
964 .element_size(2)
965 .iterations(1)
966 .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
967 }
968 }
969 }
970
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_4_bw_2)971 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_4_bw_2) {
972 TransposeMicrokernelTester()
973 .input_stride(2)
974 .output_stride(4)
975 .block_width(2)
976 .block_height(4)
977 .element_size(2)
978 .iterations(1)
979 .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
980 }
981
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_4_bw_2_2)982 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_4_bw_2_2) {
983 for(size_t i = 2; i < 2; ++i){
984 TransposeMicrokernelTester()
985 .input_stride(i)
986 .output_stride(8)
987 .block_width(i)
988 .block_height(4)
989 .element_size(2)
990 .iterations(1)
991 .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
992 }
993 }
994
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_8_bw_2_2)995 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_8_bw_2_2) {
996 for(size_t i = 2; i < 2; ++i){
997 TransposeMicrokernelTester()
998 .input_stride(i)
999 .output_stride(8)
1000 .block_width(i)
1001 .block_height(8)
1002 .element_size(2)
1003 .iterations(1)
1004 .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1005 }
1006 }
1007
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_8_bw_1)1008 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_8_bw_1) {
1009 TransposeMicrokernelTester()
1010 .input_stride(1)
1011 .output_stride(16)
1012 .block_width(1)
1013 .block_height(8)
1014 .element_size(2)
1015 .iterations(1)
1016 .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1017 }
1018
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_5_8_bw_1)1019 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_5_8_bw_1){
1020 for(size_t i = 5; i < 8; ++i){
1021 TransposeMicrokernelTester()
1022 .input_stride(18)
1023 .output_stride(i)
1024 .block_width(4)
1025 .block_height(i)
1026 .element_size(2)
1027 .iterations(1)
1028 .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1029 }
1030 }
1031
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_5_8_bw_2)1032 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_5_8_bw_2){
1033 for(size_t i = 5; i < 8; ++i){
1034 TransposeMicrokernelTester()
1035 .input_stride(2)
1036 .output_stride(i)
1037 .block_width(2)
1038 .block_height(i)
1039 .element_size(2)
1040 .iterations(1)
1041 .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1042 }
1043 }
1044
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_5_8_bw_2_2)1045 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_5_8_bw_2_2) {
1046 for(size_t i = 5; i < 8; ++i){
1047 for(size_t j = 2; j < 2; ++j){
1048 TransposeMicrokernelTester()
1049 .input_stride(j)
1050 .output_stride(i)
1051 .block_width(j)
1052 .block_height(i)
1053 .element_size(2)
1054 .iterations(1)
1055 .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1056 }
1057 }
1058 }
1059
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_4_bw_1_is_2)1060 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_4_bw_1_is_2) {
1061 TransposeMicrokernelTester()
1062 .input_stride(2)
1063 .output_stride(4)
1064 .block_width(1)
1065 .block_height(4)
1066 .element_size(2)
1067 .iterations(1)
1068 .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1069 }
1070
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_4_bw_1_os_8)1071 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_4_bw_1_os_8) {
1072 TransposeMicrokernelTester()
1073 .input_stride(1)
1074 .output_stride(8)
1075 .block_width(1)
1076 .block_height(4)
1077 .element_size(2)
1078 .iterations(1)
1079 .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1080 }
1081
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_4_bw_1_is_2_os_8)1082 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_4_bw_1_is_2_os_8) {
1083 TransposeMicrokernelTester()
1084 .input_stride(2)
1085 .output_stride(8)
1086 .block_width(1)
1087 .block_height(4)
1088 .element_size(2)
1089 .iterations(1)
1090 .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1091 }
1092
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_68_bw_19_ies_13)1093 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_68_bw_19_ies_13) {
1094 TransposeMicrokernelTester()
1095 .input_stride(19)
1096 .output_stride(68)
1097 .block_width(19)
1098 .block_height(68)
1099 .element_size(2)
1100 .input_element_stride(13)
1101 .iterations(1)
1102 .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1103 }
1104
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_12_bw_5_oes_13)1105 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_12_bw_5_oes_13) {
1106 TransposeMicrokernelTester()
1107 .input_stride(5)
1108 .output_stride(12)
1109 .block_width(5)
1110 .block_height(12)
1111 .element_size(2)
1112 .output_element_stride(13)
1113 .iterations(1)
1114 .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1115 }
1116
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_28_bw_23_ies_19_oes_15)1117 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_28_bw_23_ies_19_oes_15) {
1118 TransposeMicrokernelTester()
1119 .input_stride(28)
1120 .output_stride(34)
1121 .block_width(23)
1122 .block_height(28)
1123 .element_size(2)
1124 .input_element_stride(19)
1125 .output_element_stride(15)
1126 .iterations(1)
1127 .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1128 }
1129
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_4_bw_2)1130 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_4_bw_2) {
1131 TransposeMicrokernelTester()
1132 .input_stride(4)
1133 .output_stride(8)
1134 .block_width(2)
1135 .block_height(4)
1136 .element_size(2)
1137 .iterations(1)
1138 .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1139 }
1140
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_1_8_bw_1_4)1141 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_1_8_bw_1_4) {
1142 for(size_t i = 1; i <= 8; ++i){
1143 for(size_t j = 1; j <= 4; ++j){
1144 TransposeMicrokernelTester()
1145 .input_stride(j * 3)
1146 .output_stride(i * 7)
1147 .block_width(j)
1148 .block_height(i)
1149 .element_size(2)
1150 .iterations(1)
1151 .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1152 }
1153 }
1154 }
1155
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_4_bw_4)1156 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_4_bw_4) {
1157 TransposeMicrokernelTester()
1158 .input_stride(4)
1159 .output_stride(4)
1160 .block_width(4)
1161 .block_height(4)
1162 .element_size(2)
1163 .iterations(1)
1164 .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1165 }
1166
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_4_bw_3_4)1167 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_4_bw_3_4) {
1168 for(size_t i = 3; i < 4; ++i){
1169 TransposeMicrokernelTester()
1170 .input_stride(i)
1171 .output_stride(8)
1172 .block_width(i)
1173 .block_height(4)
1174 .element_size(2)
1175 .iterations(1)
1176 .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1177 }
1178 }
1179
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_8_bw_3_4)1180 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_8_bw_3_4) {
1181 for(size_t i = 3; i < 4; ++i){
1182 TransposeMicrokernelTester()
1183 .input_stride(i)
1184 .output_stride(8)
1185 .block_width(i)
1186 .block_height(8)
1187 .element_size(2)
1188 .iterations(1)
1189 .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1190 }
1191 }
1192
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_8_bw_2)1193 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_8_bw_2) {
1194 TransposeMicrokernelTester()
1195 .input_stride(2)
1196 .output_stride(16)
1197 .block_width(2)
1198 .block_height(8)
1199 .element_size(2)
1200 .iterations(1)
1201 .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1202 }
1203
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_5_8_bw_2)1204 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_5_8_bw_2){
1205 for(size_t i = 5; i < 8; ++i){
1206 TransposeMicrokernelTester()
1207 .input_stride(19)
1208 .output_stride(i)
1209 .block_width(5)
1210 .block_height(i)
1211 .element_size(2)
1212 .iterations(1)
1213 .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1214 }
1215 }
1216
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_5_8_bw_4)1217 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_5_8_bw_4){
1218 for(size_t i = 5; i < 8; ++i){
1219 TransposeMicrokernelTester()
1220 .input_stride(4)
1221 .output_stride(i)
1222 .block_width(4)
1223 .block_height(i)
1224 .element_size(2)
1225 .iterations(1)
1226 .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1227 }
1228 }
1229
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_5_8_bw_3_4)1230 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_5_8_bw_3_4) {
1231 for(size_t i = 5; i < 8; ++i){
1232 for(size_t j = 3; j < 4; ++j){
1233 TransposeMicrokernelTester()
1234 .input_stride(j)
1235 .output_stride(i)
1236 .block_width(j)
1237 .block_height(i)
1238 .element_size(2)
1239 .iterations(1)
1240 .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1241 }
1242 }
1243 }
1244
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_4_bw_2_is_4)1245 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_4_bw_2_is_4) {
1246 TransposeMicrokernelTester()
1247 .input_stride(4)
1248 .output_stride(4)
1249 .block_width(2)
1250 .block_height(4)
1251 .element_size(2)
1252 .iterations(1)
1253 .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1254 }
1255
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_4_bw_2_os_8)1256 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_4_bw_2_os_8) {
1257 TransposeMicrokernelTester()
1258 .input_stride(2)
1259 .output_stride(8)
1260 .block_width(2)
1261 .block_height(4)
1262 .element_size(2)
1263 .iterations(1)
1264 .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1265 }
1266
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_4_bw_2_is_4_os_8)1267 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_4_bw_2_is_4_os_8) {
1268 TransposeMicrokernelTester()
1269 .input_stride(4)
1270 .output_stride(8)
1271 .block_width(2)
1272 .block_height(4)
1273 .element_size(2)
1274 .iterations(1)
1275 .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1276 }
1277
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_68_bw_38_ies_13)1278 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_68_bw_38_ies_13) {
1279 TransposeMicrokernelTester()
1280 .input_stride(38)
1281 .output_stride(68)
1282 .block_width(38)
1283 .block_height(68)
1284 .element_size(2)
1285 .input_element_stride(13)
1286 .iterations(1)
1287 .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1288 }
1289
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_12_bw_10_oes_13)1290 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_12_bw_10_oes_13) {
1291 TransposeMicrokernelTester()
1292 .input_stride(10)
1293 .output_stride(12)
1294 .block_width(10)
1295 .block_height(12)
1296 .element_size(2)
1297 .output_element_stride(13)
1298 .iterations(1)
1299 .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1300 }
1301
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_28_bw_46_ies_19_oes_15)1302 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_28_bw_46_ies_19_oes_15) {
1303 TransposeMicrokernelTester()
1304 .input_stride(51)
1305 .output_stride(34)
1306 .block_width(46)
1307 .block_height(28)
1308 .element_size(2)
1309 .input_element_stride(19)
1310 .output_element_stride(15)
1311 .iterations(1)
1312 .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1313 }
1314
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_4_bw_4)1315 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4) {
1316 TransposeMicrokernelTester()
1317 .input_stride(8)
1318 .output_stride(8)
1319 .block_width(4)
1320 .block_height(4)
1321 .element_size(2)
1322 .iterations(1)
1323 .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1324 }
1325
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_1_8_bw_1_8)1326 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_1_8_bw_1_8) {
1327 for(size_t i = 1; i <= 8; ++i){
1328 for(size_t j = 1; j <= 8; ++j){
1329 TransposeMicrokernelTester()
1330 .input_stride(j * 3)
1331 .output_stride(i * 7)
1332 .block_width(j)
1333 .block_height(i)
1334 .element_size(2)
1335 .iterations(1)
1336 .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1337 }
1338 }
1339 }
1340
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_4_bw_8)1341 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_8) {
1342 TransposeMicrokernelTester()
1343 .input_stride(8)
1344 .output_stride(4)
1345 .block_width(8)
1346 .block_height(4)
1347 .element_size(2)
1348 .iterations(1)
1349 .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1350 }
1351
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_4_bw_5_8)1352 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_5_8) {
1353 for(size_t i = 5; i < 8; ++i){
1354 TransposeMicrokernelTester()
1355 .input_stride(i)
1356 .output_stride(8)
1357 .block_width(i)
1358 .block_height(4)
1359 .element_size(2)
1360 .iterations(1)
1361 .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1362 }
1363 }
1364
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_8_bw_5_8)1365 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_8_bw_5_8) {
1366 for(size_t i = 5; i < 8; ++i){
1367 TransposeMicrokernelTester()
1368 .input_stride(i)
1369 .output_stride(8)
1370 .block_width(i)
1371 .block_height(8)
1372 .element_size(2)
1373 .iterations(1)
1374 .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1375 }
1376 }
1377
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_8_bw_4)1378 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_8_bw_4) {
1379 TransposeMicrokernelTester()
1380 .input_stride(4)
1381 .output_stride(16)
1382 .block_width(4)
1383 .block_height(8)
1384 .element_size(2)
1385 .iterations(1)
1386 .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1387 }
1388
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_5_8_bw_4)1389 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_5_8_bw_4){
1390 for(size_t i = 5; i < 8; ++i){
1391 TransposeMicrokernelTester()
1392 .input_stride(21)
1393 .output_stride(i)
1394 .block_width(7)
1395 .block_height(i)
1396 .element_size(2)
1397 .iterations(1)
1398 .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1399 }
1400 }
1401
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_5_8_bw_8)1402 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_5_8_bw_8){
1403 for(size_t i = 5; i < 8; ++i){
1404 TransposeMicrokernelTester()
1405 .input_stride(8)
1406 .output_stride(i)
1407 .block_width(8)
1408 .block_height(i)
1409 .element_size(2)
1410 .iterations(1)
1411 .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1412 }
1413 }
1414
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_5_8_bw_5_8)1415 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_5_8_bw_5_8) {
1416 for(size_t i = 5; i < 8; ++i){
1417 for(size_t j = 5; j < 8; ++j){
1418 TransposeMicrokernelTester()
1419 .input_stride(j)
1420 .output_stride(i)
1421 .block_width(j)
1422 .block_height(i)
1423 .element_size(2)
1424 .iterations(1)
1425 .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1426 }
1427 }
1428 }
1429
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_4_bw_4_is_8)1430 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8) {
1431 TransposeMicrokernelTester()
1432 .input_stride(8)
1433 .output_stride(4)
1434 .block_width(4)
1435 .block_height(4)
1436 .element_size(2)
1437 .iterations(1)
1438 .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1439 }
1440
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_4_bw_4_os_8)1441 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_os_8) {
1442 TransposeMicrokernelTester()
1443 .input_stride(4)
1444 .output_stride(8)
1445 .block_width(4)
1446 .block_height(4)
1447 .element_size(2)
1448 .iterations(1)
1449 .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1450 }
1451
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_4_bw_4_is_8_os_8)1452 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
1453 TransposeMicrokernelTester()
1454 .input_stride(8)
1455 .output_stride(8)
1456 .block_width(4)
1457 .block_height(4)
1458 .element_size(2)
1459 .iterations(1)
1460 .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1461 }
1462
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_68_bw_76_ies_13)1463 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_68_bw_76_ies_13) {
1464 TransposeMicrokernelTester()
1465 .input_stride(76)
1466 .output_stride(68)
1467 .block_width(76)
1468 .block_height(68)
1469 .element_size(2)
1470 .input_element_stride(13)
1471 .iterations(1)
1472 .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1473 }
1474
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_12_bw_20_oes_13)1475 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_12_bw_20_oes_13) {
1476 TransposeMicrokernelTester()
1477 .input_stride(20)
1478 .output_stride(12)
1479 .block_width(20)
1480 .block_height(12)
1481 .element_size(2)
1482 .output_element_stride(13)
1483 .iterations(1)
1484 .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1485 }
1486
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_28_bw_92_ies_19_oes_15)1487 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_28_bw_92_ies_19_oes_15) {
1488 TransposeMicrokernelTester()
1489 .input_stride(97)
1490 .output_stride(34)
1491 .block_width(92)
1492 .block_height(28)
1493 .element_size(2)
1494 .input_element_stride(19)
1495 .output_element_stride(15)
1496 .iterations(1)
1497 .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1498 }
1499
1500 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_4_bw_8)1501 TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_4_bw_8) {
1502 TEST_REQUIRES_X86_SSE2;
1503 TransposeMicrokernelTester()
1504 .input_stride(16)
1505 .output_stride(8)
1506 .block_width(8)
1507 .block_height(4)
1508 .element_size(2)
1509 .iterations(1)
1510 .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1511 }
1512
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_1_8_bw_1_16)1513 TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_1_8_bw_1_16) {
1514 TEST_REQUIRES_X86_SSE2;
1515 for(size_t i = 1; i <= 8; ++i){
1516 for(size_t j = 1; j <= 16; ++j){
1517 TransposeMicrokernelTester()
1518 .input_stride(j * 3)
1519 .output_stride(i * 7)
1520 .block_width(j)
1521 .block_height(i)
1522 .element_size(2)
1523 .iterations(1)
1524 .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1525 }
1526 }
1527 }
1528
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_4_bw_16)1529 TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_4_bw_16) {
1530 TEST_REQUIRES_X86_SSE2;
1531 TransposeMicrokernelTester()
1532 .input_stride(16)
1533 .output_stride(4)
1534 .block_width(16)
1535 .block_height(4)
1536 .element_size(2)
1537 .iterations(1)
1538 .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1539 }
1540
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_4_bw_9_16)1541 TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_4_bw_9_16) {
1542 TEST_REQUIRES_X86_SSE2;
1543 for(size_t i = 9; i < 16; ++i){
1544 TransposeMicrokernelTester()
1545 .input_stride(i)
1546 .output_stride(8)
1547 .block_width(i)
1548 .block_height(4)
1549 .element_size(2)
1550 .iterations(1)
1551 .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1552 }
1553 }
1554
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_8_bw_9_16)1555 TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_8_bw_9_16) {
1556 TEST_REQUIRES_X86_SSE2;
1557 for(size_t i = 9; i < 16; ++i){
1558 TransposeMicrokernelTester()
1559 .input_stride(i)
1560 .output_stride(8)
1561 .block_width(i)
1562 .block_height(8)
1563 .element_size(2)
1564 .iterations(1)
1565 .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1566 }
1567 }
1568
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_8_bw_8)1569 TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_8_bw_8) {
1570 TEST_REQUIRES_X86_SSE2;
1571 TransposeMicrokernelTester()
1572 .input_stride(8)
1573 .output_stride(16)
1574 .block_width(8)
1575 .block_height(8)
1576 .element_size(2)
1577 .iterations(1)
1578 .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1579 }
1580
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_5_8_bw_8)1581 TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_5_8_bw_8){
1582 TEST_REQUIRES_X86_SSE2;
1583 for(size_t i = 5; i < 8; ++i){
1584 TransposeMicrokernelTester()
1585 .input_stride(25)
1586 .output_stride(i)
1587 .block_width(11)
1588 .block_height(i)
1589 .element_size(2)
1590 .iterations(1)
1591 .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1592 }
1593 }
1594
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_5_8_bw_16)1595 TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_5_8_bw_16){
1596 TEST_REQUIRES_X86_SSE2;
1597 for(size_t i = 5; i < 8; ++i){
1598 TransposeMicrokernelTester()
1599 .input_stride(16)
1600 .output_stride(i)
1601 .block_width(16)
1602 .block_height(i)
1603 .element_size(2)
1604 .iterations(1)
1605 .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1606 }
1607 }
1608
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_5_8_bw_9_16)1609 TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_5_8_bw_9_16) {
1610 TEST_REQUIRES_X86_SSE2;
1611 for(size_t i = 5; i < 8; ++i){
1612 for(size_t j = 9; j < 16; ++j){
1613 TransposeMicrokernelTester()
1614 .input_stride(j)
1615 .output_stride(i)
1616 .block_width(j)
1617 .block_height(i)
1618 .element_size(2)
1619 .iterations(1)
1620 .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1621 }
1622 }
1623 }
1624
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_4_bw_8_is_16)1625 TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_4_bw_8_is_16) {
1626 TEST_REQUIRES_X86_SSE2;
1627 TransposeMicrokernelTester()
1628 .input_stride(16)
1629 .output_stride(4)
1630 .block_width(8)
1631 .block_height(4)
1632 .element_size(2)
1633 .iterations(1)
1634 .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1635 }
1636
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_4_bw_8_os_8)1637 TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_4_bw_8_os_8) {
1638 TEST_REQUIRES_X86_SSE2;
1639 TransposeMicrokernelTester()
1640 .input_stride(8)
1641 .output_stride(8)
1642 .block_width(8)
1643 .block_height(4)
1644 .element_size(2)
1645 .iterations(1)
1646 .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1647 }
1648
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_4_bw_8_is_16_os_8)1649 TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_4_bw_8_is_16_os_8) {
1650 TEST_REQUIRES_X86_SSE2;
1651 TransposeMicrokernelTester()
1652 .input_stride(16)
1653 .output_stride(8)
1654 .block_width(8)
1655 .block_height(4)
1656 .element_size(2)
1657 .iterations(1)
1658 .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1659 }
1660
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_68_bw_152_ies_13)1661 TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_68_bw_152_ies_13) {
1662 TEST_REQUIRES_X86_SSE2;
1663 TransposeMicrokernelTester()
1664 .input_stride(152)
1665 .output_stride(68)
1666 .block_width(152)
1667 .block_height(68)
1668 .element_size(2)
1669 .input_element_stride(13)
1670 .iterations(1)
1671 .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1672 }
1673
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_12_bw_40_oes_13)1674 TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_12_bw_40_oes_13) {
1675 TEST_REQUIRES_X86_SSE2;
1676 TransposeMicrokernelTester()
1677 .input_stride(40)
1678 .output_stride(12)
1679 .block_width(40)
1680 .block_height(12)
1681 .element_size(2)
1682 .output_element_stride(13)
1683 .iterations(1)
1684 .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1685 }
1686
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_28_bw_184_ies_19_oes_15)1687 TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_28_bw_184_ies_19_oes_15) {
1688 TEST_REQUIRES_X86_SSE2;
1689 TransposeMicrokernelTester()
1690 .input_stride(189)
1691 .output_stride(34)
1692 .block_width(184)
1693 .block_height(28)
1694 .element_size(2)
1695 .input_element_stride(19)
1696 .output_element_stride(15)
1697 .iterations(1)
1698 .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1699 }
1700 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1701
1702
1703 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_8_bw_8)1704 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_8_bw_8) {
1705 TEST_REQUIRES_X86_SSE2;
1706 TransposeMicrokernelTester()
1707 .input_stride(16)
1708 .output_stride(16)
1709 .block_width(8)
1710 .block_height(8)
1711 .element_size(2)
1712 .iterations(1)
1713 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1714 }
1715
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_1_16_bw_1_16)1716 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_1_16_bw_1_16) {
1717 TEST_REQUIRES_X86_SSE2;
1718 for(size_t i = 1; i <= 16; ++i){
1719 for(size_t j = 1; j <= 16; ++j){
1720 TransposeMicrokernelTester()
1721 .input_stride(j * 3)
1722 .output_stride(i * 7)
1723 .block_width(j)
1724 .block_height(i)
1725 .element_size(2)
1726 .iterations(1)
1727 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1728 }
1729 }
1730 }
1731
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_8_bw_16)1732 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_8_bw_16) {
1733 TEST_REQUIRES_X86_SSE2;
1734 TransposeMicrokernelTester()
1735 .input_stride(16)
1736 .output_stride(8)
1737 .block_width(16)
1738 .block_height(8)
1739 .element_size(2)
1740 .iterations(1)
1741 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1742 }
1743
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_8_bw_9_16)1744 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_8_bw_9_16) {
1745 TEST_REQUIRES_X86_SSE2;
1746 for(size_t i = 9; i < 16; ++i){
1747 TransposeMicrokernelTester()
1748 .input_stride(i)
1749 .output_stride(16)
1750 .block_width(i)
1751 .block_height(8)
1752 .element_size(2)
1753 .iterations(1)
1754 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1755 }
1756 }
1757
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_16_bw_9_16)1758 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_16_bw_9_16) {
1759 TEST_REQUIRES_X86_SSE2;
1760 for(size_t i = 9; i < 16; ++i){
1761 TransposeMicrokernelTester()
1762 .input_stride(i)
1763 .output_stride(16)
1764 .block_width(i)
1765 .block_height(16)
1766 .element_size(2)
1767 .iterations(1)
1768 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1769 }
1770 }
1771
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_16_bw_8)1772 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_16_bw_8) {
1773 TEST_REQUIRES_X86_SSE2;
1774 TransposeMicrokernelTester()
1775 .input_stride(8)
1776 .output_stride(28)
1777 .block_width(8)
1778 .block_height(16)
1779 .element_size(2)
1780 .iterations(1)
1781 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1782 }
1783
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_9_16_bw_8)1784 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_9_16_bw_8){
1785 TEST_REQUIRES_X86_SSE2;
1786 for(size_t i = 9; i < 16; ++i){
1787 TransposeMicrokernelTester()
1788 .input_stride(25)
1789 .output_stride(i)
1790 .block_width(11)
1791 .block_height(i)
1792 .element_size(2)
1793 .iterations(1)
1794 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1795 }
1796 }
1797
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_9_16_bw_16)1798 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_9_16_bw_16){
1799 TEST_REQUIRES_X86_SSE2;
1800 for(size_t i = 9; i < 16; ++i){
1801 TransposeMicrokernelTester()
1802 .input_stride(16)
1803 .output_stride(i)
1804 .block_width(16)
1805 .block_height(i)
1806 .element_size(2)
1807 .iterations(1)
1808 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1809 }
1810 }
1811
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_9_16_bw_9_16)1812 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_9_16_bw_9_16) {
1813 TEST_REQUIRES_X86_SSE2;
1814 for(size_t i = 9; i < 16; ++i){
1815 for(size_t j = 9; j < 16; ++j){
1816 TransposeMicrokernelTester()
1817 .input_stride(j)
1818 .output_stride(i)
1819 .block_width(j)
1820 .block_height(i)
1821 .element_size(2)
1822 .iterations(1)
1823 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1824 }
1825 }
1826 }
1827
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_8_bw_8_is_16)1828 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_8_bw_8_is_16) {
1829 TEST_REQUIRES_X86_SSE2;
1830 TransposeMicrokernelTester()
1831 .input_stride(16)
1832 .output_stride(8)
1833 .block_width(8)
1834 .block_height(8)
1835 .element_size(2)
1836 .iterations(1)
1837 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1838 }
1839
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_8_bw_8_os_16)1840 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_8_bw_8_os_16) {
1841 TEST_REQUIRES_X86_SSE2;
1842 TransposeMicrokernelTester()
1843 .input_stride(8)
1844 .output_stride(16)
1845 .block_width(8)
1846 .block_height(8)
1847 .element_size(2)
1848 .iterations(1)
1849 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1850 }
1851
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_8_bw_8_is_16_os_16)1852 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_8_bw_8_is_16_os_16) {
1853 TEST_REQUIRES_X86_SSE2;
1854 TransposeMicrokernelTester()
1855 .input_stride(16)
1856 .output_stride(16)
1857 .block_width(8)
1858 .block_height(8)
1859 .element_size(2)
1860 .iterations(1)
1861 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1862 }
1863
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_136_bw_152_ies_13)1864 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_136_bw_152_ies_13) {
1865 TEST_REQUIRES_X86_SSE2;
1866 TransposeMicrokernelTester()
1867 .input_stride(152)
1868 .output_stride(136)
1869 .block_width(152)
1870 .block_height(136)
1871 .element_size(2)
1872 .input_element_stride(13)
1873 .iterations(1)
1874 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1875 }
1876
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_24_bw_40_oes_13)1877 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_24_bw_40_oes_13) {
1878 TEST_REQUIRES_X86_SSE2;
1879 TransposeMicrokernelTester()
1880 .input_stride(40)
1881 .output_stride(24)
1882 .block_width(40)
1883 .block_height(24)
1884 .element_size(2)
1885 .output_element_stride(13)
1886 .iterations(1)
1887 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1888 }
1889
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_56_bw_184_ies_19_oes_15)1890 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_56_bw_184_ies_19_oes_15) {
1891 TEST_REQUIRES_X86_SSE2;
1892 TransposeMicrokernelTester()
1893 .input_stride(189)
1894 .output_stride(62)
1895 .block_width(184)
1896 .block_height(56)
1897 .element_size(2)
1898 .input_element_stride(19)
1899 .output_element_stride(15)
1900 .iterations(1)
1901 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1902 }
1903 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1904
1905
1906 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_8_bw_8)1907 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_8_bw_8) {
1908 TEST_REQUIRES_X86_SSE2;
1909 TransposeMicrokernelTester()
1910 .input_stride(16)
1911 .output_stride(16)
1912 .block_width(8)
1913 .block_height(8)
1914 .element_size(2)
1915 .iterations(1)
1916 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
1917 }
1918
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_1_16_bw_1_16)1919 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_1_16_bw_1_16) {
1920 TEST_REQUIRES_X86_SSE2;
1921 for(size_t i = 1; i <= 16; ++i){
1922 for(size_t j = 1; j <= 16; ++j){
1923 TransposeMicrokernelTester()
1924 .input_stride(j * 3)
1925 .output_stride(i * 7)
1926 .block_width(j)
1927 .block_height(i)
1928 .element_size(2)
1929 .iterations(1)
1930 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
1931 }
1932 }
1933 }
1934
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_8_bw_16)1935 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_8_bw_16) {
1936 TEST_REQUIRES_X86_SSE2;
1937 TransposeMicrokernelTester()
1938 .input_stride(16)
1939 .output_stride(8)
1940 .block_width(16)
1941 .block_height(8)
1942 .element_size(2)
1943 .iterations(1)
1944 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
1945 }
1946
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_8_bw_9_16)1947 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_8_bw_9_16) {
1948 TEST_REQUIRES_X86_SSE2;
1949 for(size_t i = 9; i < 16; ++i){
1950 TransposeMicrokernelTester()
1951 .input_stride(i)
1952 .output_stride(16)
1953 .block_width(i)
1954 .block_height(8)
1955 .element_size(2)
1956 .iterations(1)
1957 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
1958 }
1959 }
1960
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_16_bw_9_16)1961 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_16_bw_9_16) {
1962 TEST_REQUIRES_X86_SSE2;
1963 for(size_t i = 9; i < 16; ++i){
1964 TransposeMicrokernelTester()
1965 .input_stride(i)
1966 .output_stride(16)
1967 .block_width(i)
1968 .block_height(16)
1969 .element_size(2)
1970 .iterations(1)
1971 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
1972 }
1973 }
1974
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_16_bw_8)1975 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_16_bw_8) {
1976 TEST_REQUIRES_X86_SSE2;
1977 TransposeMicrokernelTester()
1978 .input_stride(8)
1979 .output_stride(28)
1980 .block_width(8)
1981 .block_height(16)
1982 .element_size(2)
1983 .iterations(1)
1984 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
1985 }
1986
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_9_16_bw_8)1987 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_9_16_bw_8){
1988 TEST_REQUIRES_X86_SSE2;
1989 for(size_t i = 9; i < 16; ++i){
1990 TransposeMicrokernelTester()
1991 .input_stride(25)
1992 .output_stride(i)
1993 .block_width(11)
1994 .block_height(i)
1995 .element_size(2)
1996 .iterations(1)
1997 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
1998 }
1999 }
2000
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_9_16_bw_16)2001 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_9_16_bw_16){
2002 TEST_REQUIRES_X86_SSE2;
2003 for(size_t i = 9; i < 16; ++i){
2004 TransposeMicrokernelTester()
2005 .input_stride(16)
2006 .output_stride(i)
2007 .block_width(16)
2008 .block_height(i)
2009 .element_size(2)
2010 .iterations(1)
2011 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
2012 }
2013 }
2014
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_9_16_bw_9_16)2015 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_9_16_bw_9_16) {
2016 TEST_REQUIRES_X86_SSE2;
2017 for(size_t i = 9; i < 16; ++i){
2018 for(size_t j = 9; j < 16; ++j){
2019 TransposeMicrokernelTester()
2020 .input_stride(j)
2021 .output_stride(i)
2022 .block_width(j)
2023 .block_height(i)
2024 .element_size(2)
2025 .iterations(1)
2026 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
2027 }
2028 }
2029 }
2030
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_8_bw_8_is_16)2031 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_8_bw_8_is_16) {
2032 TEST_REQUIRES_X86_SSE2;
2033 TransposeMicrokernelTester()
2034 .input_stride(16)
2035 .output_stride(8)
2036 .block_width(8)
2037 .block_height(8)
2038 .element_size(2)
2039 .iterations(1)
2040 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
2041 }
2042
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_8_bw_8_os_16)2043 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_8_bw_8_os_16) {
2044 TEST_REQUIRES_X86_SSE2;
2045 TransposeMicrokernelTester()
2046 .input_stride(8)
2047 .output_stride(16)
2048 .block_width(8)
2049 .block_height(8)
2050 .element_size(2)
2051 .iterations(1)
2052 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
2053 }
2054
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_8_bw_8_is_16_os_16)2055 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_8_bw_8_is_16_os_16) {
2056 TEST_REQUIRES_X86_SSE2;
2057 TransposeMicrokernelTester()
2058 .input_stride(16)
2059 .output_stride(16)
2060 .block_width(8)
2061 .block_height(8)
2062 .element_size(2)
2063 .iterations(1)
2064 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
2065 }
2066
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_136_bw_152_ies_13)2067 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_136_bw_152_ies_13) {
2068 TEST_REQUIRES_X86_SSE2;
2069 TransposeMicrokernelTester()
2070 .input_stride(152)
2071 .output_stride(136)
2072 .block_width(152)
2073 .block_height(136)
2074 .element_size(2)
2075 .input_element_stride(13)
2076 .iterations(1)
2077 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
2078 }
2079
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_24_bw_40_oes_13)2080 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_24_bw_40_oes_13) {
2081 TEST_REQUIRES_X86_SSE2;
2082 TransposeMicrokernelTester()
2083 .input_stride(40)
2084 .output_stride(24)
2085 .block_width(40)
2086 .block_height(24)
2087 .element_size(2)
2088 .output_element_stride(13)
2089 .iterations(1)
2090 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
2091 }
2092
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_56_bw_184_ies_19_oes_15)2093 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_56_bw_184_ies_19_oes_15) {
2094 TEST_REQUIRES_X86_SSE2;
2095 TransposeMicrokernelTester()
2096 .input_stride(189)
2097 .output_stride(62)
2098 .block_width(184)
2099 .block_height(56)
2100 .element_size(2)
2101 .input_element_stride(19)
2102 .output_element_stride(15)
2103 .iterations(1)
2104 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
2105 }
2106 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2107
2108
2109 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_8_bw_8)2110 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_8_bw_8) {
2111 TEST_REQUIRES_X86_SSE2;
2112 TransposeMicrokernelTester()
2113 .input_stride(16)
2114 .output_stride(16)
2115 .block_width(8)
2116 .block_height(8)
2117 .element_size(2)
2118 .iterations(1)
2119 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2120 }
2121
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_1_16_bw_1_16)2122 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_1_16_bw_1_16) {
2123 TEST_REQUIRES_X86_SSE2;
2124 for(size_t i = 1; i <= 16; ++i){
2125 for(size_t j = 1; j <= 16; ++j){
2126 TransposeMicrokernelTester()
2127 .input_stride(j * 3)
2128 .output_stride(i * 7)
2129 .block_width(j)
2130 .block_height(i)
2131 .element_size(2)
2132 .iterations(1)
2133 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2134 }
2135 }
2136 }
2137
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_8_bw_16)2138 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_8_bw_16) {
2139 TEST_REQUIRES_X86_SSE2;
2140 TransposeMicrokernelTester()
2141 .input_stride(16)
2142 .output_stride(8)
2143 .block_width(16)
2144 .block_height(8)
2145 .element_size(2)
2146 .iterations(1)
2147 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2148 }
2149
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_8_bw_9_16)2150 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_8_bw_9_16) {
2151 TEST_REQUIRES_X86_SSE2;
2152 for(size_t i = 9; i < 16; ++i){
2153 TransposeMicrokernelTester()
2154 .input_stride(i)
2155 .output_stride(16)
2156 .block_width(i)
2157 .block_height(8)
2158 .element_size(2)
2159 .iterations(1)
2160 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2161 }
2162 }
2163
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_16_bw_9_16)2164 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_16_bw_9_16) {
2165 TEST_REQUIRES_X86_SSE2;
2166 for(size_t i = 9; i < 16; ++i){
2167 TransposeMicrokernelTester()
2168 .input_stride(i)
2169 .output_stride(16)
2170 .block_width(i)
2171 .block_height(16)
2172 .element_size(2)
2173 .iterations(1)
2174 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2175 }
2176 }
2177
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_16_bw_8)2178 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_16_bw_8) {
2179 TEST_REQUIRES_X86_SSE2;
2180 TransposeMicrokernelTester()
2181 .input_stride(8)
2182 .output_stride(28)
2183 .block_width(8)
2184 .block_height(16)
2185 .element_size(2)
2186 .iterations(1)
2187 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2188 }
2189
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_9_16_bw_8)2190 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_9_16_bw_8){
2191 TEST_REQUIRES_X86_SSE2;
2192 for(size_t i = 9; i < 16; ++i){
2193 TransposeMicrokernelTester()
2194 .input_stride(25)
2195 .output_stride(i)
2196 .block_width(11)
2197 .block_height(i)
2198 .element_size(2)
2199 .iterations(1)
2200 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2201 }
2202 }
2203
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_9_16_bw_16)2204 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_9_16_bw_16){
2205 TEST_REQUIRES_X86_SSE2;
2206 for(size_t i = 9; i < 16; ++i){
2207 TransposeMicrokernelTester()
2208 .input_stride(16)
2209 .output_stride(i)
2210 .block_width(16)
2211 .block_height(i)
2212 .element_size(2)
2213 .iterations(1)
2214 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2215 }
2216 }
2217
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_9_16_bw_9_16)2218 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_9_16_bw_9_16) {
2219 TEST_REQUIRES_X86_SSE2;
2220 for(size_t i = 9; i < 16; ++i){
2221 for(size_t j = 9; j < 16; ++j){
2222 TransposeMicrokernelTester()
2223 .input_stride(j)
2224 .output_stride(i)
2225 .block_width(j)
2226 .block_height(i)
2227 .element_size(2)
2228 .iterations(1)
2229 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2230 }
2231 }
2232 }
2233
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_8_bw_8_is_16)2234 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_8_bw_8_is_16) {
2235 TEST_REQUIRES_X86_SSE2;
2236 TransposeMicrokernelTester()
2237 .input_stride(16)
2238 .output_stride(8)
2239 .block_width(8)
2240 .block_height(8)
2241 .element_size(2)
2242 .iterations(1)
2243 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2244 }
2245
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_8_bw_8_os_16)2246 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_8_bw_8_os_16) {
2247 TEST_REQUIRES_X86_SSE2;
2248 TransposeMicrokernelTester()
2249 .input_stride(8)
2250 .output_stride(16)
2251 .block_width(8)
2252 .block_height(8)
2253 .element_size(2)
2254 .iterations(1)
2255 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2256 }
2257
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_8_bw_8_is_16_os_16)2258 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_8_bw_8_is_16_os_16) {
2259 TEST_REQUIRES_X86_SSE2;
2260 TransposeMicrokernelTester()
2261 .input_stride(16)
2262 .output_stride(16)
2263 .block_width(8)
2264 .block_height(8)
2265 .element_size(2)
2266 .iterations(1)
2267 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2268 }
2269
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_136_bw_152_ies_13)2270 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_136_bw_152_ies_13) {
2271 TEST_REQUIRES_X86_SSE2;
2272 TransposeMicrokernelTester()
2273 .input_stride(152)
2274 .output_stride(136)
2275 .block_width(152)
2276 .block_height(136)
2277 .element_size(2)
2278 .input_element_stride(13)
2279 .iterations(1)
2280 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2281 }
2282
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_24_bw_40_oes_13)2283 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_24_bw_40_oes_13) {
2284 TEST_REQUIRES_X86_SSE2;
2285 TransposeMicrokernelTester()
2286 .input_stride(40)
2287 .output_stride(24)
2288 .block_width(40)
2289 .block_height(24)
2290 .element_size(2)
2291 .output_element_stride(13)
2292 .iterations(1)
2293 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2294 }
2295
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_56_bw_184_ies_19_oes_15)2296 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_56_bw_184_ies_19_oes_15) {
2297 TEST_REQUIRES_X86_SSE2;
2298 TransposeMicrokernelTester()
2299 .input_stride(189)
2300 .output_stride(62)
2301 .block_width(184)
2302 .block_height(56)
2303 .element_size(2)
2304 .input_element_stride(19)
2305 .output_element_stride(15)
2306 .iterations(1)
2307 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2308 }
2309 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2310
2311
2312 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_8_bw_8)2313 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_8_bw_8) {
2314 TEST_REQUIRES_X86_SSE2;
2315 TransposeMicrokernelTester()
2316 .input_stride(16)
2317 .output_stride(16)
2318 .block_width(8)
2319 .block_height(8)
2320 .element_size(2)
2321 .iterations(1)
2322 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2323 }
2324
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_1_16_bw_1_16)2325 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_1_16_bw_1_16) {
2326 TEST_REQUIRES_X86_SSE2;
2327 for(size_t i = 1; i <= 16; ++i){
2328 for(size_t j = 1; j <= 16; ++j){
2329 TransposeMicrokernelTester()
2330 .input_stride(j * 3)
2331 .output_stride(i * 7)
2332 .block_width(j)
2333 .block_height(i)
2334 .element_size(2)
2335 .iterations(1)
2336 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2337 }
2338 }
2339 }
2340
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_8_bw_16)2341 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_8_bw_16) {
2342 TEST_REQUIRES_X86_SSE2;
2343 TransposeMicrokernelTester()
2344 .input_stride(16)
2345 .output_stride(8)
2346 .block_width(16)
2347 .block_height(8)
2348 .element_size(2)
2349 .iterations(1)
2350 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2351 }
2352
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_8_bw_9_16)2353 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_8_bw_9_16) {
2354 TEST_REQUIRES_X86_SSE2;
2355 for(size_t i = 9; i < 16; ++i){
2356 TransposeMicrokernelTester()
2357 .input_stride(i)
2358 .output_stride(16)
2359 .block_width(i)
2360 .block_height(8)
2361 .element_size(2)
2362 .iterations(1)
2363 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2364 }
2365 }
2366
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_16_bw_9_16)2367 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_16_bw_9_16) {
2368 TEST_REQUIRES_X86_SSE2;
2369 for(size_t i = 9; i < 16; ++i){
2370 TransposeMicrokernelTester()
2371 .input_stride(i)
2372 .output_stride(16)
2373 .block_width(i)
2374 .block_height(16)
2375 .element_size(2)
2376 .iterations(1)
2377 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2378 }
2379 }
2380
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_16_bw_8)2381 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_16_bw_8) {
2382 TEST_REQUIRES_X86_SSE2;
2383 TransposeMicrokernelTester()
2384 .input_stride(8)
2385 .output_stride(28)
2386 .block_width(8)
2387 .block_height(16)
2388 .element_size(2)
2389 .iterations(1)
2390 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2391 }
2392
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_9_16_bw_8)2393 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_9_16_bw_8){
2394 TEST_REQUIRES_X86_SSE2;
2395 for(size_t i = 9; i < 16; ++i){
2396 TransposeMicrokernelTester()
2397 .input_stride(25)
2398 .output_stride(i)
2399 .block_width(11)
2400 .block_height(i)
2401 .element_size(2)
2402 .iterations(1)
2403 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2404 }
2405 }
2406
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_9_16_bw_16)2407 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_9_16_bw_16){
2408 TEST_REQUIRES_X86_SSE2;
2409 for(size_t i = 9; i < 16; ++i){
2410 TransposeMicrokernelTester()
2411 .input_stride(16)
2412 .output_stride(i)
2413 .block_width(16)
2414 .block_height(i)
2415 .element_size(2)
2416 .iterations(1)
2417 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2418 }
2419 }
2420
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_9_16_bw_9_16)2421 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_9_16_bw_9_16) {
2422 TEST_REQUIRES_X86_SSE2;
2423 for(size_t i = 9; i < 16; ++i){
2424 for(size_t j = 9; j < 16; ++j){
2425 TransposeMicrokernelTester()
2426 .input_stride(j)
2427 .output_stride(i)
2428 .block_width(j)
2429 .block_height(i)
2430 .element_size(2)
2431 .iterations(1)
2432 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2433 }
2434 }
2435 }
2436
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_8_bw_8_is_16)2437 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_8_bw_8_is_16) {
2438 TEST_REQUIRES_X86_SSE2;
2439 TransposeMicrokernelTester()
2440 .input_stride(16)
2441 .output_stride(8)
2442 .block_width(8)
2443 .block_height(8)
2444 .element_size(2)
2445 .iterations(1)
2446 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2447 }
2448
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_8_bw_8_os_16)2449 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_8_bw_8_os_16) {
2450 TEST_REQUIRES_X86_SSE2;
2451 TransposeMicrokernelTester()
2452 .input_stride(8)
2453 .output_stride(16)
2454 .block_width(8)
2455 .block_height(8)
2456 .element_size(2)
2457 .iterations(1)
2458 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2459 }
2460
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_8_bw_8_is_16_os_16)2461 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_8_bw_8_is_16_os_16) {
2462 TEST_REQUIRES_X86_SSE2;
2463 TransposeMicrokernelTester()
2464 .input_stride(16)
2465 .output_stride(16)
2466 .block_width(8)
2467 .block_height(8)
2468 .element_size(2)
2469 .iterations(1)
2470 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2471 }
2472
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_136_bw_152_ies_13)2473 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_136_bw_152_ies_13) {
2474 TEST_REQUIRES_X86_SSE2;
2475 TransposeMicrokernelTester()
2476 .input_stride(152)
2477 .output_stride(136)
2478 .block_width(152)
2479 .block_height(136)
2480 .element_size(2)
2481 .input_element_stride(13)
2482 .iterations(1)
2483 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2484 }
2485
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_24_bw_40_oes_13)2486 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_24_bw_40_oes_13) {
2487 TEST_REQUIRES_X86_SSE2;
2488 TransposeMicrokernelTester()
2489 .input_stride(40)
2490 .output_stride(24)
2491 .block_width(40)
2492 .block_height(24)
2493 .element_size(2)
2494 .output_element_stride(13)
2495 .iterations(1)
2496 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2497 }
2498
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_56_bw_184_ies_19_oes_15)2499 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_56_bw_184_ies_19_oes_15) {
2500 TEST_REQUIRES_X86_SSE2;
2501 TransposeMicrokernelTester()
2502 .input_stride(189)
2503 .output_stride(62)
2504 .block_width(184)
2505 .block_height(56)
2506 .element_size(2)
2507 .input_element_stride(19)
2508 .output_element_stride(15)
2509 .iterations(1)
2510 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2511 }
2512 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2513
2514
2515 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_8_bw_8)2516 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_8_bw_8) {
2517 TEST_REQUIRES_X86_SSE2;
2518 TransposeMicrokernelTester()
2519 .input_stride(16)
2520 .output_stride(16)
2521 .block_width(8)
2522 .block_height(8)
2523 .element_size(2)
2524 .iterations(1)
2525 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2526 }
2527
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_1_16_bw_1_16)2528 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_1_16_bw_1_16) {
2529 TEST_REQUIRES_X86_SSE2;
2530 for(size_t i = 1; i <= 16; ++i){
2531 for(size_t j = 1; j <= 16; ++j){
2532 TransposeMicrokernelTester()
2533 .input_stride(j * 3)
2534 .output_stride(i * 7)
2535 .block_width(j)
2536 .block_height(i)
2537 .element_size(2)
2538 .iterations(1)
2539 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2540 }
2541 }
2542 }
2543
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_8_bw_16)2544 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_8_bw_16) {
2545 TEST_REQUIRES_X86_SSE2;
2546 TransposeMicrokernelTester()
2547 .input_stride(16)
2548 .output_stride(8)
2549 .block_width(16)
2550 .block_height(8)
2551 .element_size(2)
2552 .iterations(1)
2553 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2554 }
2555
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_8_bw_9_16)2556 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_8_bw_9_16) {
2557 TEST_REQUIRES_X86_SSE2;
2558 for(size_t i = 9; i < 16; ++i){
2559 TransposeMicrokernelTester()
2560 .input_stride(i)
2561 .output_stride(16)
2562 .block_width(i)
2563 .block_height(8)
2564 .element_size(2)
2565 .iterations(1)
2566 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2567 }
2568 }
2569
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_16_bw_9_16)2570 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_16_bw_9_16) {
2571 TEST_REQUIRES_X86_SSE2;
2572 for(size_t i = 9; i < 16; ++i){
2573 TransposeMicrokernelTester()
2574 .input_stride(i)
2575 .output_stride(16)
2576 .block_width(i)
2577 .block_height(16)
2578 .element_size(2)
2579 .iterations(1)
2580 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2581 }
2582 }
2583
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_16_bw_8)2584 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_16_bw_8) {
2585 TEST_REQUIRES_X86_SSE2;
2586 TransposeMicrokernelTester()
2587 .input_stride(8)
2588 .output_stride(28)
2589 .block_width(8)
2590 .block_height(16)
2591 .element_size(2)
2592 .iterations(1)
2593 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2594 }
2595
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_9_16_bw_8)2596 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_9_16_bw_8){
2597 TEST_REQUIRES_X86_SSE2;
2598 for(size_t i = 9; i < 16; ++i){
2599 TransposeMicrokernelTester()
2600 .input_stride(25)
2601 .output_stride(i)
2602 .block_width(11)
2603 .block_height(i)
2604 .element_size(2)
2605 .iterations(1)
2606 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2607 }
2608 }
2609
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_9_16_bw_16)2610 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_9_16_bw_16){
2611 TEST_REQUIRES_X86_SSE2;
2612 for(size_t i = 9; i < 16; ++i){
2613 TransposeMicrokernelTester()
2614 .input_stride(16)
2615 .output_stride(i)
2616 .block_width(16)
2617 .block_height(i)
2618 .element_size(2)
2619 .iterations(1)
2620 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2621 }
2622 }
2623
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_9_16_bw_9_16)2624 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_9_16_bw_9_16) {
2625 TEST_REQUIRES_X86_SSE2;
2626 for(size_t i = 9; i < 16; ++i){
2627 for(size_t j = 9; j < 16; ++j){
2628 TransposeMicrokernelTester()
2629 .input_stride(j)
2630 .output_stride(i)
2631 .block_width(j)
2632 .block_height(i)
2633 .element_size(2)
2634 .iterations(1)
2635 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2636 }
2637 }
2638 }
2639
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_8_bw_8_is_16)2640 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_8_bw_8_is_16) {
2641 TEST_REQUIRES_X86_SSE2;
2642 TransposeMicrokernelTester()
2643 .input_stride(16)
2644 .output_stride(8)
2645 .block_width(8)
2646 .block_height(8)
2647 .element_size(2)
2648 .iterations(1)
2649 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2650 }
2651
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_8_bw_8_os_16)2652 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_8_bw_8_os_16) {
2653 TEST_REQUIRES_X86_SSE2;
2654 TransposeMicrokernelTester()
2655 .input_stride(8)
2656 .output_stride(16)
2657 .block_width(8)
2658 .block_height(8)
2659 .element_size(2)
2660 .iterations(1)
2661 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2662 }
2663
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_8_bw_8_is_16_os_16)2664 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_8_bw_8_is_16_os_16) {
2665 TEST_REQUIRES_X86_SSE2;
2666 TransposeMicrokernelTester()
2667 .input_stride(16)
2668 .output_stride(16)
2669 .block_width(8)
2670 .block_height(8)
2671 .element_size(2)
2672 .iterations(1)
2673 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2674 }
2675
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_136_bw_152_ies_13)2676 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_136_bw_152_ies_13) {
2677 TEST_REQUIRES_X86_SSE2;
2678 TransposeMicrokernelTester()
2679 .input_stride(152)
2680 .output_stride(136)
2681 .block_width(152)
2682 .block_height(136)
2683 .element_size(2)
2684 .input_element_stride(13)
2685 .iterations(1)
2686 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2687 }
2688
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_24_bw_40_oes_13)2689 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_24_bw_40_oes_13) {
2690 TEST_REQUIRES_X86_SSE2;
2691 TransposeMicrokernelTester()
2692 .input_stride(40)
2693 .output_stride(24)
2694 .block_width(40)
2695 .block_height(24)
2696 .element_size(2)
2697 .output_element_stride(13)
2698 .iterations(1)
2699 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2700 }
2701
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_56_bw_184_ies_19_oes_15)2702 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_56_bw_184_ies_19_oes_15) {
2703 TEST_REQUIRES_X86_SSE2;
2704 TransposeMicrokernelTester()
2705 .input_stride(189)
2706 .output_stride(62)
2707 .block_width(184)
2708 .block_height(56)
2709 .element_size(2)
2710 .input_element_stride(19)
2711 .output_element_stride(15)
2712 .iterations(1)
2713 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2714 }
2715 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2716
2717
2718 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_8_bw_8)2719 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_8_bw_8) {
2720 TransposeMicrokernelTester()
2721 .input_stride(16)
2722 .output_stride(16)
2723 .block_width(8)
2724 .block_height(8)
2725 .element_size(2)
2726 .iterations(1)
2727 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2728 }
2729
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_1_16_bw_1_16)2730 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_1_16_bw_1_16) {
2731 for(size_t i = 1; i <= 16; ++i){
2732 for(size_t j = 1; j <= 16; ++j){
2733 TransposeMicrokernelTester()
2734 .input_stride(j * 3)
2735 .output_stride(i * 7)
2736 .block_width(j)
2737 .block_height(i)
2738 .element_size(2)
2739 .iterations(1)
2740 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2741 }
2742 }
2743 }
2744
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_8_bw_16)2745 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_8_bw_16) {
2746 TransposeMicrokernelTester()
2747 .input_stride(16)
2748 .output_stride(8)
2749 .block_width(16)
2750 .block_height(8)
2751 .element_size(2)
2752 .iterations(1)
2753 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2754 }
2755
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_8_bw_9_16)2756 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_8_bw_9_16) {
2757 for(size_t i = 9; i < 16; ++i){
2758 TransposeMicrokernelTester()
2759 .input_stride(i)
2760 .output_stride(16)
2761 .block_width(i)
2762 .block_height(8)
2763 .element_size(2)
2764 .iterations(1)
2765 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2766 }
2767 }
2768
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_16_bw_9_16)2769 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_16_bw_9_16) {
2770 for(size_t i = 9; i < 16; ++i){
2771 TransposeMicrokernelTester()
2772 .input_stride(i)
2773 .output_stride(16)
2774 .block_width(i)
2775 .block_height(16)
2776 .element_size(2)
2777 .iterations(1)
2778 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2779 }
2780 }
2781
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_16_bw_8)2782 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_16_bw_8) {
2783 TransposeMicrokernelTester()
2784 .input_stride(8)
2785 .output_stride(28)
2786 .block_width(8)
2787 .block_height(16)
2788 .element_size(2)
2789 .iterations(1)
2790 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2791 }
2792
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_9_16_bw_8)2793 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_9_16_bw_8){
2794 for(size_t i = 9; i < 16; ++i){
2795 TransposeMicrokernelTester()
2796 .input_stride(25)
2797 .output_stride(i)
2798 .block_width(11)
2799 .block_height(i)
2800 .element_size(2)
2801 .iterations(1)
2802 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2803 }
2804 }
2805
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_9_16_bw_16)2806 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_9_16_bw_16){
2807 for(size_t i = 9; i < 16; ++i){
2808 TransposeMicrokernelTester()
2809 .input_stride(16)
2810 .output_stride(i)
2811 .block_width(16)
2812 .block_height(i)
2813 .element_size(2)
2814 .iterations(1)
2815 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2816 }
2817 }
2818
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_9_16_bw_9_16)2819 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_9_16_bw_9_16) {
2820 for(size_t i = 9; i < 16; ++i){
2821 for(size_t j = 9; j < 16; ++j){
2822 TransposeMicrokernelTester()
2823 .input_stride(j)
2824 .output_stride(i)
2825 .block_width(j)
2826 .block_height(i)
2827 .element_size(2)
2828 .iterations(1)
2829 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2830 }
2831 }
2832 }
2833
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_8_bw_8_is_16)2834 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_8_bw_8_is_16) {
2835 TransposeMicrokernelTester()
2836 .input_stride(16)
2837 .output_stride(8)
2838 .block_width(8)
2839 .block_height(8)
2840 .element_size(2)
2841 .iterations(1)
2842 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2843 }
2844
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_8_bw_8_os_16)2845 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_8_bw_8_os_16) {
2846 TransposeMicrokernelTester()
2847 .input_stride(8)
2848 .output_stride(16)
2849 .block_width(8)
2850 .block_height(8)
2851 .element_size(2)
2852 .iterations(1)
2853 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2854 }
2855
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_8_bw_8_is_16_os_16)2856 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_8_bw_8_is_16_os_16) {
2857 TransposeMicrokernelTester()
2858 .input_stride(16)
2859 .output_stride(16)
2860 .block_width(8)
2861 .block_height(8)
2862 .element_size(2)
2863 .iterations(1)
2864 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2865 }
2866
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_136_bw_152_ies_13)2867 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_136_bw_152_ies_13) {
2868 TransposeMicrokernelTester()
2869 .input_stride(152)
2870 .output_stride(136)
2871 .block_width(152)
2872 .block_height(136)
2873 .element_size(2)
2874 .input_element_stride(13)
2875 .iterations(1)
2876 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2877 }
2878
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_24_bw_40_oes_13)2879 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_24_bw_40_oes_13) {
2880 TransposeMicrokernelTester()
2881 .input_stride(40)
2882 .output_stride(24)
2883 .block_width(40)
2884 .block_height(24)
2885 .element_size(2)
2886 .output_element_stride(13)
2887 .iterations(1)
2888 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2889 }
2890
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_56_bw_184_ies_19_oes_15)2891 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_56_bw_184_ies_19_oes_15) {
2892 TransposeMicrokernelTester()
2893 .input_stride(189)
2894 .output_stride(62)
2895 .block_width(184)
2896 .block_height(56)
2897 .element_size(2)
2898 .input_element_stride(19)
2899 .output_element_stride(15)
2900 .iterations(1)
2901 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2902 }
2903 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2904
2905
2906 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_8_bw_8)2907 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_8_bw_8) {
2908 TransposeMicrokernelTester()
2909 .input_stride(16)
2910 .output_stride(16)
2911 .block_width(8)
2912 .block_height(8)
2913 .element_size(2)
2914 .iterations(1)
2915 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
2916 }
2917
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_1_16_bw_1_16)2918 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_1_16_bw_1_16) {
2919 for(size_t i = 1; i <= 16; ++i){
2920 for(size_t j = 1; j <= 16; ++j){
2921 TransposeMicrokernelTester()
2922 .input_stride(j * 3)
2923 .output_stride(i * 7)
2924 .block_width(j)
2925 .block_height(i)
2926 .element_size(2)
2927 .iterations(1)
2928 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
2929 }
2930 }
2931 }
2932
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_8_bw_16)2933 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_8_bw_16) {
2934 TransposeMicrokernelTester()
2935 .input_stride(16)
2936 .output_stride(8)
2937 .block_width(16)
2938 .block_height(8)
2939 .element_size(2)
2940 .iterations(1)
2941 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
2942 }
2943
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_8_bw_9_16)2944 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_8_bw_9_16) {
2945 for(size_t i = 9; i < 16; ++i){
2946 TransposeMicrokernelTester()
2947 .input_stride(i)
2948 .output_stride(16)
2949 .block_width(i)
2950 .block_height(8)
2951 .element_size(2)
2952 .iterations(1)
2953 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
2954 }
2955 }
2956
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_16_bw_9_16)2957 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_16_bw_9_16) {
2958 for(size_t i = 9; i < 16; ++i){
2959 TransposeMicrokernelTester()
2960 .input_stride(i)
2961 .output_stride(16)
2962 .block_width(i)
2963 .block_height(16)
2964 .element_size(2)
2965 .iterations(1)
2966 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
2967 }
2968 }
2969
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_16_bw_8)2970 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_16_bw_8) {
2971 TransposeMicrokernelTester()
2972 .input_stride(8)
2973 .output_stride(28)
2974 .block_width(8)
2975 .block_height(16)
2976 .element_size(2)
2977 .iterations(1)
2978 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
2979 }
2980
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_9_16_bw_8)2981 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_9_16_bw_8){
2982 for(size_t i = 9; i < 16; ++i){
2983 TransposeMicrokernelTester()
2984 .input_stride(25)
2985 .output_stride(i)
2986 .block_width(11)
2987 .block_height(i)
2988 .element_size(2)
2989 .iterations(1)
2990 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
2991 }
2992 }
2993
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_9_16_bw_16)2994 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_9_16_bw_16){
2995 for(size_t i = 9; i < 16; ++i){
2996 TransposeMicrokernelTester()
2997 .input_stride(16)
2998 .output_stride(i)
2999 .block_width(16)
3000 .block_height(i)
3001 .element_size(2)
3002 .iterations(1)
3003 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
3004 }
3005 }
3006
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_9_16_bw_9_16)3007 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_9_16_bw_9_16) {
3008 for(size_t i = 9; i < 16; ++i){
3009 for(size_t j = 9; j < 16; ++j){
3010 TransposeMicrokernelTester()
3011 .input_stride(j)
3012 .output_stride(i)
3013 .block_width(j)
3014 .block_height(i)
3015 .element_size(2)
3016 .iterations(1)
3017 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
3018 }
3019 }
3020 }
3021
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_8_bw_8_is_16)3022 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_8_bw_8_is_16) {
3023 TransposeMicrokernelTester()
3024 .input_stride(16)
3025 .output_stride(8)
3026 .block_width(8)
3027 .block_height(8)
3028 .element_size(2)
3029 .iterations(1)
3030 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
3031 }
3032
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_8_bw_8_os_16)3033 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_8_bw_8_os_16) {
3034 TransposeMicrokernelTester()
3035 .input_stride(8)
3036 .output_stride(16)
3037 .block_width(8)
3038 .block_height(8)
3039 .element_size(2)
3040 .iterations(1)
3041 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
3042 }
3043
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_8_bw_8_is_16_os_16)3044 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_8_bw_8_is_16_os_16) {
3045 TransposeMicrokernelTester()
3046 .input_stride(16)
3047 .output_stride(16)
3048 .block_width(8)
3049 .block_height(8)
3050 .element_size(2)
3051 .iterations(1)
3052 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
3053 }
3054
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_136_bw_152_ies_13)3055 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_136_bw_152_ies_13) {
3056 TransposeMicrokernelTester()
3057 .input_stride(152)
3058 .output_stride(136)
3059 .block_width(152)
3060 .block_height(136)
3061 .element_size(2)
3062 .input_element_stride(13)
3063 .iterations(1)
3064 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
3065 }
3066
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_24_bw_40_oes_13)3067 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_24_bw_40_oes_13) {
3068 TransposeMicrokernelTester()
3069 .input_stride(40)
3070 .output_stride(24)
3071 .block_width(40)
3072 .block_height(24)
3073 .element_size(2)
3074 .output_element_stride(13)
3075 .iterations(1)
3076 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
3077 }
3078
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_56_bw_184_ies_19_oes_15)3079 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_56_bw_184_ies_19_oes_15) {
3080 TransposeMicrokernelTester()
3081 .input_stride(189)
3082 .output_stride(62)
3083 .block_width(184)
3084 .block_height(56)
3085 .element_size(2)
3086 .input_element_stride(19)
3087 .output_element_stride(15)
3088 .iterations(1)
3089 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
3090 }
3091 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3092
3093
3094 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_8_bw_8)3095 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_8_bw_8) {
3096 TransposeMicrokernelTester()
3097 .input_stride(16)
3098 .output_stride(16)
3099 .block_width(8)
3100 .block_height(8)
3101 .element_size(2)
3102 .iterations(1)
3103 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3104 }
3105
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_1_16_bw_1_16)3106 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_1_16_bw_1_16) {
3107 for(size_t i = 1; i <= 16; ++i){
3108 for(size_t j = 1; j <= 16; ++j){
3109 TransposeMicrokernelTester()
3110 .input_stride(j * 3)
3111 .output_stride(i * 7)
3112 .block_width(j)
3113 .block_height(i)
3114 .element_size(2)
3115 .iterations(1)
3116 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3117 }
3118 }
3119 }
3120
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_8_bw_16)3121 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_8_bw_16) {
3122 TransposeMicrokernelTester()
3123 .input_stride(16)
3124 .output_stride(8)
3125 .block_width(16)
3126 .block_height(8)
3127 .element_size(2)
3128 .iterations(1)
3129 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3130 }
3131
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_8_bw_9_16)3132 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_8_bw_9_16) {
3133 for(size_t i = 9; i < 16; ++i){
3134 TransposeMicrokernelTester()
3135 .input_stride(i)
3136 .output_stride(16)
3137 .block_width(i)
3138 .block_height(8)
3139 .element_size(2)
3140 .iterations(1)
3141 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3142 }
3143 }
3144
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_16_bw_9_16)3145 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_16_bw_9_16) {
3146 for(size_t i = 9; i < 16; ++i){
3147 TransposeMicrokernelTester()
3148 .input_stride(i)
3149 .output_stride(16)
3150 .block_width(i)
3151 .block_height(16)
3152 .element_size(2)
3153 .iterations(1)
3154 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3155 }
3156 }
3157
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_16_bw_8)3158 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_16_bw_8) {
3159 TransposeMicrokernelTester()
3160 .input_stride(8)
3161 .output_stride(28)
3162 .block_width(8)
3163 .block_height(16)
3164 .element_size(2)
3165 .iterations(1)
3166 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3167 }
3168
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_9_16_bw_8)3169 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_9_16_bw_8){
3170 for(size_t i = 9; i < 16; ++i){
3171 TransposeMicrokernelTester()
3172 .input_stride(25)
3173 .output_stride(i)
3174 .block_width(11)
3175 .block_height(i)
3176 .element_size(2)
3177 .iterations(1)
3178 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3179 }
3180 }
3181
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_9_16_bw_16)3182 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_9_16_bw_16){
3183 for(size_t i = 9; i < 16; ++i){
3184 TransposeMicrokernelTester()
3185 .input_stride(16)
3186 .output_stride(i)
3187 .block_width(16)
3188 .block_height(i)
3189 .element_size(2)
3190 .iterations(1)
3191 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3192 }
3193 }
3194
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_9_16_bw_9_16)3195 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_9_16_bw_9_16) {
3196 for(size_t i = 9; i < 16; ++i){
3197 for(size_t j = 9; j < 16; ++j){
3198 TransposeMicrokernelTester()
3199 .input_stride(j)
3200 .output_stride(i)
3201 .block_width(j)
3202 .block_height(i)
3203 .element_size(2)
3204 .iterations(1)
3205 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3206 }
3207 }
3208 }
3209
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_8_bw_8_is_16)3210 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_8_bw_8_is_16) {
3211 TransposeMicrokernelTester()
3212 .input_stride(16)
3213 .output_stride(8)
3214 .block_width(8)
3215 .block_height(8)
3216 .element_size(2)
3217 .iterations(1)
3218 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3219 }
3220
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_8_bw_8_os_16)3221 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_8_bw_8_os_16) {
3222 TransposeMicrokernelTester()
3223 .input_stride(8)
3224 .output_stride(16)
3225 .block_width(8)
3226 .block_height(8)
3227 .element_size(2)
3228 .iterations(1)
3229 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3230 }
3231
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_8_bw_8_is_16_os_16)3232 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_8_bw_8_is_16_os_16) {
3233 TransposeMicrokernelTester()
3234 .input_stride(16)
3235 .output_stride(16)
3236 .block_width(8)
3237 .block_height(8)
3238 .element_size(2)
3239 .iterations(1)
3240 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3241 }
3242
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_136_bw_152_ies_13)3243 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_136_bw_152_ies_13) {
3244 TransposeMicrokernelTester()
3245 .input_stride(152)
3246 .output_stride(136)
3247 .block_width(152)
3248 .block_height(136)
3249 .element_size(2)
3250 .input_element_stride(13)
3251 .iterations(1)
3252 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3253 }
3254
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_24_bw_40_oes_13)3255 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_24_bw_40_oes_13) {
3256 TransposeMicrokernelTester()
3257 .input_stride(40)
3258 .output_stride(24)
3259 .block_width(40)
3260 .block_height(24)
3261 .element_size(2)
3262 .output_element_stride(13)
3263 .iterations(1)
3264 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3265 }
3266
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_56_bw_184_ies_19_oes_15)3267 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_56_bw_184_ies_19_oes_15) {
3268 TransposeMicrokernelTester()
3269 .input_stride(189)
3270 .output_stride(62)
3271 .block_width(184)
3272 .block_height(56)
3273 .element_size(2)
3274 .input_element_stride(19)
3275 .output_element_stride(15)
3276 .iterations(1)
3277 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3278 }
3279 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3280
3281
3282 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_8_bw_8)3283 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_8_bw_8) {
3284 TransposeMicrokernelTester()
3285 .input_stride(16)
3286 .output_stride(16)
3287 .block_width(8)
3288 .block_height(8)
3289 .element_size(2)
3290 .iterations(1)
3291 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3292 }
3293
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_1_16_bw_1_16)3294 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_1_16_bw_1_16) {
3295 for(size_t i = 1; i <= 16; ++i){
3296 for(size_t j = 1; j <= 16; ++j){
3297 TransposeMicrokernelTester()
3298 .input_stride(j * 3)
3299 .output_stride(i * 7)
3300 .block_width(j)
3301 .block_height(i)
3302 .element_size(2)
3303 .iterations(1)
3304 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3305 }
3306 }
3307 }
3308
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_8_bw_16)3309 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_8_bw_16) {
3310 TransposeMicrokernelTester()
3311 .input_stride(16)
3312 .output_stride(8)
3313 .block_width(16)
3314 .block_height(8)
3315 .element_size(2)
3316 .iterations(1)
3317 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3318 }
3319
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_8_bw_9_16)3320 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_8_bw_9_16) {
3321 for(size_t i = 9; i < 16; ++i){
3322 TransposeMicrokernelTester()
3323 .input_stride(i)
3324 .output_stride(16)
3325 .block_width(i)
3326 .block_height(8)
3327 .element_size(2)
3328 .iterations(1)
3329 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3330 }
3331 }
3332
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_16_bw_9_16)3333 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_16_bw_9_16) {
3334 for(size_t i = 9; i < 16; ++i){
3335 TransposeMicrokernelTester()
3336 .input_stride(i)
3337 .output_stride(16)
3338 .block_width(i)
3339 .block_height(16)
3340 .element_size(2)
3341 .iterations(1)
3342 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3343 }
3344 }
3345
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_16_bw_8)3346 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_16_bw_8) {
3347 TransposeMicrokernelTester()
3348 .input_stride(8)
3349 .output_stride(28)
3350 .block_width(8)
3351 .block_height(16)
3352 .element_size(2)
3353 .iterations(1)
3354 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3355 }
3356
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_9_16_bw_8)3357 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_9_16_bw_8){
3358 for(size_t i = 9; i < 16; ++i){
3359 TransposeMicrokernelTester()
3360 .input_stride(25)
3361 .output_stride(i)
3362 .block_width(11)
3363 .block_height(i)
3364 .element_size(2)
3365 .iterations(1)
3366 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3367 }
3368 }
3369
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_9_16_bw_16)3370 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_9_16_bw_16){
3371 for(size_t i = 9; i < 16; ++i){
3372 TransposeMicrokernelTester()
3373 .input_stride(16)
3374 .output_stride(i)
3375 .block_width(16)
3376 .block_height(i)
3377 .element_size(2)
3378 .iterations(1)
3379 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3380 }
3381 }
3382
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_9_16_bw_9_16)3383 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_9_16_bw_9_16) {
3384 for(size_t i = 9; i < 16; ++i){
3385 for(size_t j = 9; j < 16; ++j){
3386 TransposeMicrokernelTester()
3387 .input_stride(j)
3388 .output_stride(i)
3389 .block_width(j)
3390 .block_height(i)
3391 .element_size(2)
3392 .iterations(1)
3393 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3394 }
3395 }
3396 }
3397
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_8_bw_8_is_16)3398 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_8_bw_8_is_16) {
3399 TransposeMicrokernelTester()
3400 .input_stride(16)
3401 .output_stride(8)
3402 .block_width(8)
3403 .block_height(8)
3404 .element_size(2)
3405 .iterations(1)
3406 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3407 }
3408
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_8_bw_8_os_16)3409 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_8_bw_8_os_16) {
3410 TransposeMicrokernelTester()
3411 .input_stride(8)
3412 .output_stride(16)
3413 .block_width(8)
3414 .block_height(8)
3415 .element_size(2)
3416 .iterations(1)
3417 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3418 }
3419
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_8_bw_8_is_16_os_16)3420 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_8_bw_8_is_16_os_16) {
3421 TransposeMicrokernelTester()
3422 .input_stride(16)
3423 .output_stride(16)
3424 .block_width(8)
3425 .block_height(8)
3426 .element_size(2)
3427 .iterations(1)
3428 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3429 }
3430
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_136_bw_152_ies_13)3431 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_136_bw_152_ies_13) {
3432 TransposeMicrokernelTester()
3433 .input_stride(152)
3434 .output_stride(136)
3435 .block_width(152)
3436 .block_height(136)
3437 .element_size(2)
3438 .input_element_stride(13)
3439 .iterations(1)
3440 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3441 }
3442
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_24_bw_40_oes_13)3443 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_24_bw_40_oes_13) {
3444 TransposeMicrokernelTester()
3445 .input_stride(40)
3446 .output_stride(24)
3447 .block_width(40)
3448 .block_height(24)
3449 .element_size(2)
3450 .output_element_stride(13)
3451 .iterations(1)
3452 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3453 }
3454
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_56_bw_184_ies_19_oes_15)3455 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_56_bw_184_ies_19_oes_15) {
3456 TransposeMicrokernelTester()
3457 .input_stride(189)
3458 .output_stride(62)
3459 .block_width(184)
3460 .block_height(56)
3461 .element_size(2)
3462 .input_element_stride(19)
3463 .output_element_stride(15)
3464 .iterations(1)
3465 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3466 }
3467 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3468
3469
3470 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_4_bw_4)3471 TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_4_bw_4) {
3472 TEST_REQUIRES_ARM_NEON;
3473 TransposeMicrokernelTester()
3474 .input_stride(8)
3475 .output_stride(8)
3476 .block_width(4)
3477 .block_height(4)
3478 .element_size(2)
3479 .iterations(1)
3480 .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3481 }
3482
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_1_8_bw_1_8)3483 TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_1_8_bw_1_8) {
3484 TEST_REQUIRES_ARM_NEON;
3485 for(size_t i = 1; i <= 8; ++i){
3486 for(size_t j = 1; j <= 8; ++j){
3487 TransposeMicrokernelTester()
3488 .input_stride(j * 3)
3489 .output_stride(i * 7)
3490 .block_width(j)
3491 .block_height(i)
3492 .element_size(2)
3493 .iterations(1)
3494 .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3495 }
3496 }
3497 }
3498
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_4_bw_8)3499 TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_4_bw_8) {
3500 TEST_REQUIRES_ARM_NEON;
3501 TransposeMicrokernelTester()
3502 .input_stride(8)
3503 .output_stride(4)
3504 .block_width(8)
3505 .block_height(4)
3506 .element_size(2)
3507 .iterations(1)
3508 .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3509 }
3510
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_4_bw_5_8)3511 TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_4_bw_5_8) {
3512 TEST_REQUIRES_ARM_NEON;
3513 for(size_t i = 5; i < 8; ++i){
3514 TransposeMicrokernelTester()
3515 .input_stride(i)
3516 .output_stride(8)
3517 .block_width(i)
3518 .block_height(4)
3519 .element_size(2)
3520 .iterations(1)
3521 .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3522 }
3523 }
3524
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_8_bw_5_8)3525 TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_8_bw_5_8) {
3526 TEST_REQUIRES_ARM_NEON;
3527 for(size_t i = 5; i < 8; ++i){
3528 TransposeMicrokernelTester()
3529 .input_stride(i)
3530 .output_stride(8)
3531 .block_width(i)
3532 .block_height(8)
3533 .element_size(2)
3534 .iterations(1)
3535 .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3536 }
3537 }
3538
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_8_bw_4)3539 TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_8_bw_4) {
3540 TEST_REQUIRES_ARM_NEON;
3541 TransposeMicrokernelTester()
3542 .input_stride(4)
3543 .output_stride(16)
3544 .block_width(4)
3545 .block_height(8)
3546 .element_size(2)
3547 .iterations(1)
3548 .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3549 }
3550
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_5_8_bw_4)3551 TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_5_8_bw_4){
3552 TEST_REQUIRES_ARM_NEON;
3553 for(size_t i = 5; i < 8; ++i){
3554 TransposeMicrokernelTester()
3555 .input_stride(21)
3556 .output_stride(i)
3557 .block_width(7)
3558 .block_height(i)
3559 .element_size(2)
3560 .iterations(1)
3561 .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3562 }
3563 }
3564
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_5_8_bw_8)3565 TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_5_8_bw_8){
3566 TEST_REQUIRES_ARM_NEON;
3567 for(size_t i = 5; i < 8; ++i){
3568 TransposeMicrokernelTester()
3569 .input_stride(8)
3570 .output_stride(i)
3571 .block_width(8)
3572 .block_height(i)
3573 .element_size(2)
3574 .iterations(1)
3575 .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3576 }
3577 }
3578
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_5_8_bw_5_8)3579 TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_5_8_bw_5_8) {
3580 TEST_REQUIRES_ARM_NEON;
3581 for(size_t i = 5; i < 8; ++i){
3582 for(size_t j = 5; j < 8; ++j){
3583 TransposeMicrokernelTester()
3584 .input_stride(j)
3585 .output_stride(i)
3586 .block_width(j)
3587 .block_height(i)
3588 .element_size(2)
3589 .iterations(1)
3590 .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3591 }
3592 }
3593 }
3594
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_4_bw_4_is_8)3595 TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_4_bw_4_is_8) {
3596 TEST_REQUIRES_ARM_NEON;
3597 TransposeMicrokernelTester()
3598 .input_stride(8)
3599 .output_stride(4)
3600 .block_width(4)
3601 .block_height(4)
3602 .element_size(2)
3603 .iterations(1)
3604 .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3605 }
3606
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_4_bw_4_os_8)3607 TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_4_bw_4_os_8) {
3608 TEST_REQUIRES_ARM_NEON;
3609 TransposeMicrokernelTester()
3610 .input_stride(4)
3611 .output_stride(8)
3612 .block_width(4)
3613 .block_height(4)
3614 .element_size(2)
3615 .iterations(1)
3616 .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3617 }
3618
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_4_bw_4_is_8_os_8)3619 TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_4_bw_4_is_8_os_8) {
3620 TEST_REQUIRES_ARM_NEON;
3621 TransposeMicrokernelTester()
3622 .input_stride(8)
3623 .output_stride(8)
3624 .block_width(4)
3625 .block_height(4)
3626 .element_size(2)
3627 .iterations(1)
3628 .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3629 }
3630
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_68_bw_76_ies_13)3631 TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_68_bw_76_ies_13) {
3632 TEST_REQUIRES_ARM_NEON;
3633 TransposeMicrokernelTester()
3634 .input_stride(76)
3635 .output_stride(68)
3636 .block_width(76)
3637 .block_height(68)
3638 .element_size(2)
3639 .input_element_stride(13)
3640 .iterations(1)
3641 .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3642 }
3643
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_12_bw_20_oes_13)3644 TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_12_bw_20_oes_13) {
3645 TEST_REQUIRES_ARM_NEON;
3646 TransposeMicrokernelTester()
3647 .input_stride(20)
3648 .output_stride(12)
3649 .block_width(20)
3650 .block_height(12)
3651 .element_size(2)
3652 .output_element_stride(13)
3653 .iterations(1)
3654 .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3655 }
3656
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_28_bw_92_ies_19_oes_15)3657 TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
3658 TEST_REQUIRES_ARM_NEON;
3659 TransposeMicrokernelTester()
3660 .input_stride(97)
3661 .output_stride(34)
3662 .block_width(92)
3663 .block_height(28)
3664 .element_size(2)
3665 .input_element_stride(19)
3666 .output_element_stride(15)
3667 .iterations(1)
3668 .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3669 }
3670 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3671
3672
3673 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_4_bw_4)3674 TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_4_bw_4) {
3675 TEST_REQUIRES_ARM_NEON;
3676 TransposeMicrokernelTester()
3677 .input_stride(8)
3678 .output_stride(8)
3679 .block_width(4)
3680 .block_height(4)
3681 .element_size(2)
3682 .iterations(1)
3683 .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3684 }
3685
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_1_8_bw_1_8)3686 TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_1_8_bw_1_8) {
3687 TEST_REQUIRES_ARM_NEON;
3688 for(size_t i = 1; i <= 8; ++i){
3689 for(size_t j = 1; j <= 8; ++j){
3690 TransposeMicrokernelTester()
3691 .input_stride(j * 3)
3692 .output_stride(i * 7)
3693 .block_width(j)
3694 .block_height(i)
3695 .element_size(2)
3696 .iterations(1)
3697 .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3698 }
3699 }
3700 }
3701
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_4_bw_8)3702 TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_4_bw_8) {
3703 TEST_REQUIRES_ARM_NEON;
3704 TransposeMicrokernelTester()
3705 .input_stride(8)
3706 .output_stride(4)
3707 .block_width(8)
3708 .block_height(4)
3709 .element_size(2)
3710 .iterations(1)
3711 .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3712 }
3713
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_4_bw_5_8)3714 TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_4_bw_5_8) {
3715 TEST_REQUIRES_ARM_NEON;
3716 for(size_t i = 5; i < 8; ++i){
3717 TransposeMicrokernelTester()
3718 .input_stride(i)
3719 .output_stride(8)
3720 .block_width(i)
3721 .block_height(4)
3722 .element_size(2)
3723 .iterations(1)
3724 .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3725 }
3726 }
3727
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_8_bw_5_8)3728 TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_8_bw_5_8) {
3729 TEST_REQUIRES_ARM_NEON;
3730 for(size_t i = 5; i < 8; ++i){
3731 TransposeMicrokernelTester()
3732 .input_stride(i)
3733 .output_stride(8)
3734 .block_width(i)
3735 .block_height(8)
3736 .element_size(2)
3737 .iterations(1)
3738 .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3739 }
3740 }
3741
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_8_bw_4)3742 TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_8_bw_4) {
3743 TEST_REQUIRES_ARM_NEON;
3744 TransposeMicrokernelTester()
3745 .input_stride(4)
3746 .output_stride(16)
3747 .block_width(4)
3748 .block_height(8)
3749 .element_size(2)
3750 .iterations(1)
3751 .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3752 }
3753
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_5_8_bw_4)3754 TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_5_8_bw_4){
3755 TEST_REQUIRES_ARM_NEON;
3756 for(size_t i = 5; i < 8; ++i){
3757 TransposeMicrokernelTester()
3758 .input_stride(21)
3759 .output_stride(i)
3760 .block_width(7)
3761 .block_height(i)
3762 .element_size(2)
3763 .iterations(1)
3764 .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3765 }
3766 }
3767
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_5_8_bw_8)3768 TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_5_8_bw_8){
3769 TEST_REQUIRES_ARM_NEON;
3770 for(size_t i = 5; i < 8; ++i){
3771 TransposeMicrokernelTester()
3772 .input_stride(8)
3773 .output_stride(i)
3774 .block_width(8)
3775 .block_height(i)
3776 .element_size(2)
3777 .iterations(1)
3778 .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3779 }
3780 }
3781
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_5_8_bw_5_8)3782 TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_5_8_bw_5_8) {
3783 TEST_REQUIRES_ARM_NEON;
3784 for(size_t i = 5; i < 8; ++i){
3785 for(size_t j = 5; j < 8; ++j){
3786 TransposeMicrokernelTester()
3787 .input_stride(j)
3788 .output_stride(i)
3789 .block_width(j)
3790 .block_height(i)
3791 .element_size(2)
3792 .iterations(1)
3793 .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3794 }
3795 }
3796 }
3797
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_4_bw_4_is_8)3798 TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_4_bw_4_is_8) {
3799 TEST_REQUIRES_ARM_NEON;
3800 TransposeMicrokernelTester()
3801 .input_stride(8)
3802 .output_stride(4)
3803 .block_width(4)
3804 .block_height(4)
3805 .element_size(2)
3806 .iterations(1)
3807 .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3808 }
3809
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_4_bw_4_os_8)3810 TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_4_bw_4_os_8) {
3811 TEST_REQUIRES_ARM_NEON;
3812 TransposeMicrokernelTester()
3813 .input_stride(4)
3814 .output_stride(8)
3815 .block_width(4)
3816 .block_height(4)
3817 .element_size(2)
3818 .iterations(1)
3819 .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3820 }
3821
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_4_bw_4_is_8_os_8)3822 TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_4_bw_4_is_8_os_8) {
3823 TEST_REQUIRES_ARM_NEON;
3824 TransposeMicrokernelTester()
3825 .input_stride(8)
3826 .output_stride(8)
3827 .block_width(4)
3828 .block_height(4)
3829 .element_size(2)
3830 .iterations(1)
3831 .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3832 }
3833
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_68_bw_76_ies_13)3834 TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_68_bw_76_ies_13) {
3835 TEST_REQUIRES_ARM_NEON;
3836 TransposeMicrokernelTester()
3837 .input_stride(76)
3838 .output_stride(68)
3839 .block_width(76)
3840 .block_height(68)
3841 .element_size(2)
3842 .input_element_stride(13)
3843 .iterations(1)
3844 .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3845 }
3846
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_12_bw_20_oes_13)3847 TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_12_bw_20_oes_13) {
3848 TEST_REQUIRES_ARM_NEON;
3849 TransposeMicrokernelTester()
3850 .input_stride(20)
3851 .output_stride(12)
3852 .block_width(20)
3853 .block_height(12)
3854 .element_size(2)
3855 .output_element_stride(13)
3856 .iterations(1)
3857 .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3858 }
3859
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_28_bw_92_ies_19_oes_15)3860 TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
3861 TEST_REQUIRES_ARM_NEON;
3862 TransposeMicrokernelTester()
3863 .input_stride(97)
3864 .output_stride(34)
3865 .block_width(92)
3866 .block_height(28)
3867 .element_size(2)
3868 .input_element_stride(19)
3869 .output_element_stride(15)
3870 .iterations(1)
3871 .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3872 }
3873 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3874
3875
3876 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_4_bw_4)3877 TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_4_bw_4) {
3878 TEST_REQUIRES_ARM_NEON;
3879 TransposeMicrokernelTester()
3880 .input_stride(8)
3881 .output_stride(8)
3882 .block_width(4)
3883 .block_height(4)
3884 .element_size(2)
3885 .iterations(1)
3886 .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
3887 }
3888
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_1_8_bw_1_8)3889 TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_1_8_bw_1_8) {
3890 TEST_REQUIRES_ARM_NEON;
3891 for(size_t i = 1; i <= 8; ++i){
3892 for(size_t j = 1; j <= 8; ++j){
3893 TransposeMicrokernelTester()
3894 .input_stride(j * 3)
3895 .output_stride(i * 7)
3896 .block_width(j)
3897 .block_height(i)
3898 .element_size(2)
3899 .iterations(1)
3900 .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
3901 }
3902 }
3903 }
3904
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_4_bw_8)3905 TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_4_bw_8) {
3906 TEST_REQUIRES_ARM_NEON;
3907 TransposeMicrokernelTester()
3908 .input_stride(8)
3909 .output_stride(4)
3910 .block_width(8)
3911 .block_height(4)
3912 .element_size(2)
3913 .iterations(1)
3914 .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
3915 }
3916
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_4_bw_5_8)3917 TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_4_bw_5_8) {
3918 TEST_REQUIRES_ARM_NEON;
3919 for(size_t i = 5; i < 8; ++i){
3920 TransposeMicrokernelTester()
3921 .input_stride(i)
3922 .output_stride(8)
3923 .block_width(i)
3924 .block_height(4)
3925 .element_size(2)
3926 .iterations(1)
3927 .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
3928 }
3929 }
3930
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_8_bw_5_8)3931 TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_8_bw_5_8) {
3932 TEST_REQUIRES_ARM_NEON;
3933 for(size_t i = 5; i < 8; ++i){
3934 TransposeMicrokernelTester()
3935 .input_stride(i)
3936 .output_stride(8)
3937 .block_width(i)
3938 .block_height(8)
3939 .element_size(2)
3940 .iterations(1)
3941 .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
3942 }
3943 }
3944
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_8_bw_4)3945 TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_8_bw_4) {
3946 TEST_REQUIRES_ARM_NEON;
3947 TransposeMicrokernelTester()
3948 .input_stride(4)
3949 .output_stride(16)
3950 .block_width(4)
3951 .block_height(8)
3952 .element_size(2)
3953 .iterations(1)
3954 .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
3955 }
3956
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_5_8_bw_4)3957 TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_5_8_bw_4){
3958 TEST_REQUIRES_ARM_NEON;
3959 for(size_t i = 5; i < 8; ++i){
3960 TransposeMicrokernelTester()
3961 .input_stride(21)
3962 .output_stride(i)
3963 .block_width(7)
3964 .block_height(i)
3965 .element_size(2)
3966 .iterations(1)
3967 .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
3968 }
3969 }
3970
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_5_8_bw_8)3971 TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_5_8_bw_8){
3972 TEST_REQUIRES_ARM_NEON;
3973 for(size_t i = 5; i < 8; ++i){
3974 TransposeMicrokernelTester()
3975 .input_stride(8)
3976 .output_stride(i)
3977 .block_width(8)
3978 .block_height(i)
3979 .element_size(2)
3980 .iterations(1)
3981 .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
3982 }
3983 }
3984
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_5_8_bw_5_8)3985 TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_5_8_bw_5_8) {
3986 TEST_REQUIRES_ARM_NEON;
3987 for(size_t i = 5; i < 8; ++i){
3988 for(size_t j = 5; j < 8; ++j){
3989 TransposeMicrokernelTester()
3990 .input_stride(j)
3991 .output_stride(i)
3992 .block_width(j)
3993 .block_height(i)
3994 .element_size(2)
3995 .iterations(1)
3996 .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
3997 }
3998 }
3999 }
4000
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_4_bw_4_is_8)4001 TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_4_bw_4_is_8) {
4002 TEST_REQUIRES_ARM_NEON;
4003 TransposeMicrokernelTester()
4004 .input_stride(8)
4005 .output_stride(4)
4006 .block_width(4)
4007 .block_height(4)
4008 .element_size(2)
4009 .iterations(1)
4010 .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
4011 }
4012
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_4_bw_4_os_8)4013 TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_4_bw_4_os_8) {
4014 TEST_REQUIRES_ARM_NEON;
4015 TransposeMicrokernelTester()
4016 .input_stride(4)
4017 .output_stride(8)
4018 .block_width(4)
4019 .block_height(4)
4020 .element_size(2)
4021 .iterations(1)
4022 .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
4023 }
4024
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_4_bw_4_is_8_os_8)4025 TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_4_bw_4_is_8_os_8) {
4026 TEST_REQUIRES_ARM_NEON;
4027 TransposeMicrokernelTester()
4028 .input_stride(8)
4029 .output_stride(8)
4030 .block_width(4)
4031 .block_height(4)
4032 .element_size(2)
4033 .iterations(1)
4034 .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
4035 }
4036
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_68_bw_76_ies_13)4037 TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_68_bw_76_ies_13) {
4038 TEST_REQUIRES_ARM_NEON;
4039 TransposeMicrokernelTester()
4040 .input_stride(76)
4041 .output_stride(68)
4042 .block_width(76)
4043 .block_height(68)
4044 .element_size(2)
4045 .input_element_stride(13)
4046 .iterations(1)
4047 .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
4048 }
4049
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_12_bw_20_oes_13)4050 TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_12_bw_20_oes_13) {
4051 TEST_REQUIRES_ARM_NEON;
4052 TransposeMicrokernelTester()
4053 .input_stride(20)
4054 .output_stride(12)
4055 .block_width(20)
4056 .block_height(12)
4057 .element_size(2)
4058 .output_element_stride(13)
4059 .iterations(1)
4060 .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
4061 }
4062
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_28_bw_92_ies_19_oes_15)4063 TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
4064 TEST_REQUIRES_ARM_NEON;
4065 TransposeMicrokernelTester()
4066 .input_stride(97)
4067 .output_stride(34)
4068 .block_width(92)
4069 .block_height(28)
4070 .element_size(2)
4071 .input_element_stride(19)
4072 .output_element_stride(15)
4073 .iterations(1)
4074 .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
4075 }
4076 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4077
4078
4079 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_4_bw_4)4080 TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_4_bw_4) {
4081 TEST_REQUIRES_ARM_NEON;
4082 TransposeMicrokernelTester()
4083 .input_stride(8)
4084 .output_stride(8)
4085 .block_width(4)
4086 .block_height(4)
4087 .element_size(2)
4088 .iterations(1)
4089 .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4090 }
4091
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_1_8_bw_1_8)4092 TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_1_8_bw_1_8) {
4093 TEST_REQUIRES_ARM_NEON;
4094 for(size_t i = 1; i <= 8; ++i){
4095 for(size_t j = 1; j <= 8; ++j){
4096 TransposeMicrokernelTester()
4097 .input_stride(j * 3)
4098 .output_stride(i * 7)
4099 .block_width(j)
4100 .block_height(i)
4101 .element_size(2)
4102 .iterations(1)
4103 .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4104 }
4105 }
4106 }
4107
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_4_bw_8)4108 TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_4_bw_8) {
4109 TEST_REQUIRES_ARM_NEON;
4110 TransposeMicrokernelTester()
4111 .input_stride(8)
4112 .output_stride(4)
4113 .block_width(8)
4114 .block_height(4)
4115 .element_size(2)
4116 .iterations(1)
4117 .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4118 }
4119
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_4_bw_5_8)4120 TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_4_bw_5_8) {
4121 TEST_REQUIRES_ARM_NEON;
4122 for(size_t i = 5; i < 8; ++i){
4123 TransposeMicrokernelTester()
4124 .input_stride(i)
4125 .output_stride(8)
4126 .block_width(i)
4127 .block_height(4)
4128 .element_size(2)
4129 .iterations(1)
4130 .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4131 }
4132 }
4133
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_8_bw_5_8)4134 TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_8_bw_5_8) {
4135 TEST_REQUIRES_ARM_NEON;
4136 for(size_t i = 5; i < 8; ++i){
4137 TransposeMicrokernelTester()
4138 .input_stride(i)
4139 .output_stride(8)
4140 .block_width(i)
4141 .block_height(8)
4142 .element_size(2)
4143 .iterations(1)
4144 .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4145 }
4146 }
4147
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_8_bw_4)4148 TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_8_bw_4) {
4149 TEST_REQUIRES_ARM_NEON;
4150 TransposeMicrokernelTester()
4151 .input_stride(4)
4152 .output_stride(16)
4153 .block_width(4)
4154 .block_height(8)
4155 .element_size(2)
4156 .iterations(1)
4157 .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4158 }
4159
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_5_8_bw_4)4160 TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_5_8_bw_4){
4161 TEST_REQUIRES_ARM_NEON;
4162 for(size_t i = 5; i < 8; ++i){
4163 TransposeMicrokernelTester()
4164 .input_stride(21)
4165 .output_stride(i)
4166 .block_width(7)
4167 .block_height(i)
4168 .element_size(2)
4169 .iterations(1)
4170 .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4171 }
4172 }
4173
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_5_8_bw_8)4174 TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_5_8_bw_8){
4175 TEST_REQUIRES_ARM_NEON;
4176 for(size_t i = 5; i < 8; ++i){
4177 TransposeMicrokernelTester()
4178 .input_stride(8)
4179 .output_stride(i)
4180 .block_width(8)
4181 .block_height(i)
4182 .element_size(2)
4183 .iterations(1)
4184 .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4185 }
4186 }
4187
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_5_8_bw_5_8)4188 TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_5_8_bw_5_8) {
4189 TEST_REQUIRES_ARM_NEON;
4190 for(size_t i = 5; i < 8; ++i){
4191 for(size_t j = 5; j < 8; ++j){
4192 TransposeMicrokernelTester()
4193 .input_stride(j)
4194 .output_stride(i)
4195 .block_width(j)
4196 .block_height(i)
4197 .element_size(2)
4198 .iterations(1)
4199 .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4200 }
4201 }
4202 }
4203
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_4_bw_4_is_8)4204 TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_4_bw_4_is_8) {
4205 TEST_REQUIRES_ARM_NEON;
4206 TransposeMicrokernelTester()
4207 .input_stride(8)
4208 .output_stride(4)
4209 .block_width(4)
4210 .block_height(4)
4211 .element_size(2)
4212 .iterations(1)
4213 .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4214 }
4215
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_4_bw_4_os_8)4216 TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_4_bw_4_os_8) {
4217 TEST_REQUIRES_ARM_NEON;
4218 TransposeMicrokernelTester()
4219 .input_stride(4)
4220 .output_stride(8)
4221 .block_width(4)
4222 .block_height(4)
4223 .element_size(2)
4224 .iterations(1)
4225 .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4226 }
4227
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_4_bw_4_is_8_os_8)4228 TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_4_bw_4_is_8_os_8) {
4229 TEST_REQUIRES_ARM_NEON;
4230 TransposeMicrokernelTester()
4231 .input_stride(8)
4232 .output_stride(8)
4233 .block_width(4)
4234 .block_height(4)
4235 .element_size(2)
4236 .iterations(1)
4237 .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4238 }
4239
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_68_bw_76_ies_13)4240 TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_68_bw_76_ies_13) {
4241 TEST_REQUIRES_ARM_NEON;
4242 TransposeMicrokernelTester()
4243 .input_stride(76)
4244 .output_stride(68)
4245 .block_width(76)
4246 .block_height(68)
4247 .element_size(2)
4248 .input_element_stride(13)
4249 .iterations(1)
4250 .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4251 }
4252
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_12_bw_20_oes_13)4253 TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_12_bw_20_oes_13) {
4254 TEST_REQUIRES_ARM_NEON;
4255 TransposeMicrokernelTester()
4256 .input_stride(20)
4257 .output_stride(12)
4258 .block_width(20)
4259 .block_height(12)
4260 .element_size(2)
4261 .output_element_stride(13)
4262 .iterations(1)
4263 .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4264 }
4265
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_28_bw_92_ies_19_oes_15)4266 TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
4267 TEST_REQUIRES_ARM_NEON;
4268 TransposeMicrokernelTester()
4269 .input_stride(97)
4270 .output_stride(34)
4271 .block_width(92)
4272 .block_height(28)
4273 .element_size(2)
4274 .input_element_stride(19)
4275 .output_element_stride(15)
4276 .iterations(1)
4277 .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4278 }
4279 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4280
4281
4282 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_4_bw_4)4283 TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_4_bw_4) {
4284 TEST_REQUIRES_ARM_NEON;
4285 TransposeMicrokernelTester()
4286 .input_stride(8)
4287 .output_stride(8)
4288 .block_width(4)
4289 .block_height(4)
4290 .element_size(2)
4291 .iterations(1)
4292 .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4293 }
4294
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_1_8_bw_1_8)4295 TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_1_8_bw_1_8) {
4296 TEST_REQUIRES_ARM_NEON;
4297 for(size_t i = 1; i <= 8; ++i){
4298 for(size_t j = 1; j <= 8; ++j){
4299 TransposeMicrokernelTester()
4300 .input_stride(j * 3)
4301 .output_stride(i * 7)
4302 .block_width(j)
4303 .block_height(i)
4304 .element_size(2)
4305 .iterations(1)
4306 .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4307 }
4308 }
4309 }
4310
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_4_bw_8)4311 TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_4_bw_8) {
4312 TEST_REQUIRES_ARM_NEON;
4313 TransposeMicrokernelTester()
4314 .input_stride(8)
4315 .output_stride(4)
4316 .block_width(8)
4317 .block_height(4)
4318 .element_size(2)
4319 .iterations(1)
4320 .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4321 }
4322
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_4_bw_5_8)4323 TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_4_bw_5_8) {
4324 TEST_REQUIRES_ARM_NEON;
4325 for(size_t i = 5; i < 8; ++i){
4326 TransposeMicrokernelTester()
4327 .input_stride(i)
4328 .output_stride(8)
4329 .block_width(i)
4330 .block_height(4)
4331 .element_size(2)
4332 .iterations(1)
4333 .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4334 }
4335 }
4336
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_8_bw_5_8)4337 TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_8_bw_5_8) {
4338 TEST_REQUIRES_ARM_NEON;
4339 for(size_t i = 5; i < 8; ++i){
4340 TransposeMicrokernelTester()
4341 .input_stride(i)
4342 .output_stride(8)
4343 .block_width(i)
4344 .block_height(8)
4345 .element_size(2)
4346 .iterations(1)
4347 .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4348 }
4349 }
4350
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_8_bw_4)4351 TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_8_bw_4) {
4352 TEST_REQUIRES_ARM_NEON;
4353 TransposeMicrokernelTester()
4354 .input_stride(4)
4355 .output_stride(16)
4356 .block_width(4)
4357 .block_height(8)
4358 .element_size(2)
4359 .iterations(1)
4360 .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4361 }
4362
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_5_8_bw_4)4363 TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_5_8_bw_4){
4364 TEST_REQUIRES_ARM_NEON;
4365 for(size_t i = 5; i < 8; ++i){
4366 TransposeMicrokernelTester()
4367 .input_stride(21)
4368 .output_stride(i)
4369 .block_width(7)
4370 .block_height(i)
4371 .element_size(2)
4372 .iterations(1)
4373 .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4374 }
4375 }
4376
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_5_8_bw_8)4377 TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_5_8_bw_8){
4378 TEST_REQUIRES_ARM_NEON;
4379 for(size_t i = 5; i < 8; ++i){
4380 TransposeMicrokernelTester()
4381 .input_stride(8)
4382 .output_stride(i)
4383 .block_width(8)
4384 .block_height(i)
4385 .element_size(2)
4386 .iterations(1)
4387 .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4388 }
4389 }
4390
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_5_8_bw_5_8)4391 TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_5_8_bw_5_8) {
4392 TEST_REQUIRES_ARM_NEON;
4393 for(size_t i = 5; i < 8; ++i){
4394 for(size_t j = 5; j < 8; ++j){
4395 TransposeMicrokernelTester()
4396 .input_stride(j)
4397 .output_stride(i)
4398 .block_width(j)
4399 .block_height(i)
4400 .element_size(2)
4401 .iterations(1)
4402 .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4403 }
4404 }
4405 }
4406
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_4_bw_4_is_8)4407 TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_4_bw_4_is_8) {
4408 TEST_REQUIRES_ARM_NEON;
4409 TransposeMicrokernelTester()
4410 .input_stride(8)
4411 .output_stride(4)
4412 .block_width(4)
4413 .block_height(4)
4414 .element_size(2)
4415 .iterations(1)
4416 .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4417 }
4418
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_4_bw_4_os_8)4419 TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_4_bw_4_os_8) {
4420 TEST_REQUIRES_ARM_NEON;
4421 TransposeMicrokernelTester()
4422 .input_stride(4)
4423 .output_stride(8)
4424 .block_width(4)
4425 .block_height(4)
4426 .element_size(2)
4427 .iterations(1)
4428 .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4429 }
4430
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_4_bw_4_is_8_os_8)4431 TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_4_bw_4_is_8_os_8) {
4432 TEST_REQUIRES_ARM_NEON;
4433 TransposeMicrokernelTester()
4434 .input_stride(8)
4435 .output_stride(8)
4436 .block_width(4)
4437 .block_height(4)
4438 .element_size(2)
4439 .iterations(1)
4440 .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4441 }
4442
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_68_bw_76_ies_13)4443 TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_68_bw_76_ies_13) {
4444 TEST_REQUIRES_ARM_NEON;
4445 TransposeMicrokernelTester()
4446 .input_stride(76)
4447 .output_stride(68)
4448 .block_width(76)
4449 .block_height(68)
4450 .element_size(2)
4451 .input_element_stride(13)
4452 .iterations(1)
4453 .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4454 }
4455
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_12_bw_20_oes_13)4456 TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_12_bw_20_oes_13) {
4457 TEST_REQUIRES_ARM_NEON;
4458 TransposeMicrokernelTester()
4459 .input_stride(20)
4460 .output_stride(12)
4461 .block_width(20)
4462 .block_height(12)
4463 .element_size(2)
4464 .output_element_stride(13)
4465 .iterations(1)
4466 .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4467 }
4468
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_28_bw_92_ies_19_oes_15)4469 TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
4470 TEST_REQUIRES_ARM_NEON;
4471 TransposeMicrokernelTester()
4472 .input_stride(97)
4473 .output_stride(34)
4474 .block_width(92)
4475 .block_height(28)
4476 .element_size(2)
4477 .input_element_stride(19)
4478 .output_element_stride(15)
4479 .iterations(1)
4480 .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4481 }
4482 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4483
4484
4485 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_4_bw_4)4486 TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_4_bw_4) {
4487 TEST_REQUIRES_ARM_NEON;
4488 TransposeMicrokernelTester()
4489 .input_stride(8)
4490 .output_stride(8)
4491 .block_width(4)
4492 .block_height(4)
4493 .element_size(2)
4494 .iterations(1)
4495 .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4496 }
4497
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_1_8_bw_1_8)4498 TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_1_8_bw_1_8) {
4499 TEST_REQUIRES_ARM_NEON;
4500 for(size_t i = 1; i <= 8; ++i){
4501 for(size_t j = 1; j <= 8; ++j){
4502 TransposeMicrokernelTester()
4503 .input_stride(j * 3)
4504 .output_stride(i * 7)
4505 .block_width(j)
4506 .block_height(i)
4507 .element_size(2)
4508 .iterations(1)
4509 .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4510 }
4511 }
4512 }
4513
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_4_bw_8)4514 TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_4_bw_8) {
4515 TEST_REQUIRES_ARM_NEON;
4516 TransposeMicrokernelTester()
4517 .input_stride(8)
4518 .output_stride(4)
4519 .block_width(8)
4520 .block_height(4)
4521 .element_size(2)
4522 .iterations(1)
4523 .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4524 }
4525
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_4_bw_5_8)4526 TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_4_bw_5_8) {
4527 TEST_REQUIRES_ARM_NEON;
4528 for(size_t i = 5; i < 8; ++i){
4529 TransposeMicrokernelTester()
4530 .input_stride(i)
4531 .output_stride(8)
4532 .block_width(i)
4533 .block_height(4)
4534 .element_size(2)
4535 .iterations(1)
4536 .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4537 }
4538 }
4539
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_8_bw_5_8)4540 TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_8_bw_5_8) {
4541 TEST_REQUIRES_ARM_NEON;
4542 for(size_t i = 5; i < 8; ++i){
4543 TransposeMicrokernelTester()
4544 .input_stride(i)
4545 .output_stride(8)
4546 .block_width(i)
4547 .block_height(8)
4548 .element_size(2)
4549 .iterations(1)
4550 .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4551 }
4552 }
4553
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_8_bw_4)4554 TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_8_bw_4) {
4555 TEST_REQUIRES_ARM_NEON;
4556 TransposeMicrokernelTester()
4557 .input_stride(4)
4558 .output_stride(16)
4559 .block_width(4)
4560 .block_height(8)
4561 .element_size(2)
4562 .iterations(1)
4563 .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4564 }
4565
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_5_8_bw_4)4566 TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_5_8_bw_4){
4567 TEST_REQUIRES_ARM_NEON;
4568 for(size_t i = 5; i < 8; ++i){
4569 TransposeMicrokernelTester()
4570 .input_stride(21)
4571 .output_stride(i)
4572 .block_width(7)
4573 .block_height(i)
4574 .element_size(2)
4575 .iterations(1)
4576 .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4577 }
4578 }
4579
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_5_8_bw_8)4580 TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_5_8_bw_8){
4581 TEST_REQUIRES_ARM_NEON;
4582 for(size_t i = 5; i < 8; ++i){
4583 TransposeMicrokernelTester()
4584 .input_stride(8)
4585 .output_stride(i)
4586 .block_width(8)
4587 .block_height(i)
4588 .element_size(2)
4589 .iterations(1)
4590 .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4591 }
4592 }
4593
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_5_8_bw_5_8)4594 TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_5_8_bw_5_8) {
4595 TEST_REQUIRES_ARM_NEON;
4596 for(size_t i = 5; i < 8; ++i){
4597 for(size_t j = 5; j < 8; ++j){
4598 TransposeMicrokernelTester()
4599 .input_stride(j)
4600 .output_stride(i)
4601 .block_width(j)
4602 .block_height(i)
4603 .element_size(2)
4604 .iterations(1)
4605 .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4606 }
4607 }
4608 }
4609
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_4_bw_4_is_8)4610 TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_4_bw_4_is_8) {
4611 TEST_REQUIRES_ARM_NEON;
4612 TransposeMicrokernelTester()
4613 .input_stride(8)
4614 .output_stride(4)
4615 .block_width(4)
4616 .block_height(4)
4617 .element_size(2)
4618 .iterations(1)
4619 .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4620 }
4621
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_4_bw_4_os_8)4622 TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_4_bw_4_os_8) {
4623 TEST_REQUIRES_ARM_NEON;
4624 TransposeMicrokernelTester()
4625 .input_stride(4)
4626 .output_stride(8)
4627 .block_width(4)
4628 .block_height(4)
4629 .element_size(2)
4630 .iterations(1)
4631 .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4632 }
4633
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_4_bw_4_is_8_os_8)4634 TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_4_bw_4_is_8_os_8) {
4635 TEST_REQUIRES_ARM_NEON;
4636 TransposeMicrokernelTester()
4637 .input_stride(8)
4638 .output_stride(8)
4639 .block_width(4)
4640 .block_height(4)
4641 .element_size(2)
4642 .iterations(1)
4643 .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4644 }
4645
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_68_bw_76_ies_13)4646 TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_68_bw_76_ies_13) {
4647 TEST_REQUIRES_ARM_NEON;
4648 TransposeMicrokernelTester()
4649 .input_stride(76)
4650 .output_stride(68)
4651 .block_width(76)
4652 .block_height(68)
4653 .element_size(2)
4654 .input_element_stride(13)
4655 .iterations(1)
4656 .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4657 }
4658
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_12_bw_20_oes_13)4659 TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_12_bw_20_oes_13) {
4660 TEST_REQUIRES_ARM_NEON;
4661 TransposeMicrokernelTester()
4662 .input_stride(20)
4663 .output_stride(12)
4664 .block_width(20)
4665 .block_height(12)
4666 .element_size(2)
4667 .output_element_stride(13)
4668 .iterations(1)
4669 .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4670 }
4671
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_28_bw_92_ies_19_oes_15)4672 TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
4673 TEST_REQUIRES_ARM_NEON;
4674 TransposeMicrokernelTester()
4675 .input_stride(97)
4676 .output_stride(34)
4677 .block_width(92)
4678 .block_height(28)
4679 .element_size(2)
4680 .input_element_stride(19)
4681 .output_element_stride(15)
4682 .iterations(1)
4683 .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4684 }
4685 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4686
4687
4688 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_4_bw_4)4689 TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_4_bw_4) {
4690 TEST_REQUIRES_ARM_NEON;
4691 TransposeMicrokernelTester()
4692 .input_stride(8)
4693 .output_stride(8)
4694 .block_width(4)
4695 .block_height(4)
4696 .element_size(2)
4697 .iterations(1)
4698 .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4699 }
4700
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_1_8_bw_1_8)4701 TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_1_8_bw_1_8) {
4702 TEST_REQUIRES_ARM_NEON;
4703 for(size_t i = 1; i <= 8; ++i){
4704 for(size_t j = 1; j <= 8; ++j){
4705 TransposeMicrokernelTester()
4706 .input_stride(j * 3)
4707 .output_stride(i * 7)
4708 .block_width(j)
4709 .block_height(i)
4710 .element_size(2)
4711 .iterations(1)
4712 .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4713 }
4714 }
4715 }
4716
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_4_bw_8)4717 TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_4_bw_8) {
4718 TEST_REQUIRES_ARM_NEON;
4719 TransposeMicrokernelTester()
4720 .input_stride(8)
4721 .output_stride(4)
4722 .block_width(8)
4723 .block_height(4)
4724 .element_size(2)
4725 .iterations(1)
4726 .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4727 }
4728
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_4_bw_5_8)4729 TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_4_bw_5_8) {
4730 TEST_REQUIRES_ARM_NEON;
4731 for(size_t i = 5; i < 8; ++i){
4732 TransposeMicrokernelTester()
4733 .input_stride(i)
4734 .output_stride(8)
4735 .block_width(i)
4736 .block_height(4)
4737 .element_size(2)
4738 .iterations(1)
4739 .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4740 }
4741 }
4742
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_8_bw_5_8)4743 TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_8_bw_5_8) {
4744 TEST_REQUIRES_ARM_NEON;
4745 for(size_t i = 5; i < 8; ++i){
4746 TransposeMicrokernelTester()
4747 .input_stride(i)
4748 .output_stride(8)
4749 .block_width(i)
4750 .block_height(8)
4751 .element_size(2)
4752 .iterations(1)
4753 .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4754 }
4755 }
4756
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_8_bw_4)4757 TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_8_bw_4) {
4758 TEST_REQUIRES_ARM_NEON;
4759 TransposeMicrokernelTester()
4760 .input_stride(4)
4761 .output_stride(16)
4762 .block_width(4)
4763 .block_height(8)
4764 .element_size(2)
4765 .iterations(1)
4766 .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4767 }
4768
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_5_8_bw_4)4769 TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_5_8_bw_4){
4770 TEST_REQUIRES_ARM_NEON;
4771 for(size_t i = 5; i < 8; ++i){
4772 TransposeMicrokernelTester()
4773 .input_stride(21)
4774 .output_stride(i)
4775 .block_width(7)
4776 .block_height(i)
4777 .element_size(2)
4778 .iterations(1)
4779 .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4780 }
4781 }
4782
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_5_8_bw_8)4783 TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_5_8_bw_8){
4784 TEST_REQUIRES_ARM_NEON;
4785 for(size_t i = 5; i < 8; ++i){
4786 TransposeMicrokernelTester()
4787 .input_stride(8)
4788 .output_stride(i)
4789 .block_width(8)
4790 .block_height(i)
4791 .element_size(2)
4792 .iterations(1)
4793 .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4794 }
4795 }
4796
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_5_8_bw_5_8)4797 TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_5_8_bw_5_8) {
4798 TEST_REQUIRES_ARM_NEON;
4799 for(size_t i = 5; i < 8; ++i){
4800 for(size_t j = 5; j < 8; ++j){
4801 TransposeMicrokernelTester()
4802 .input_stride(j)
4803 .output_stride(i)
4804 .block_width(j)
4805 .block_height(i)
4806 .element_size(2)
4807 .iterations(1)
4808 .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4809 }
4810 }
4811 }
4812
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_4_bw_4_is_8)4813 TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_4_bw_4_is_8) {
4814 TEST_REQUIRES_ARM_NEON;
4815 TransposeMicrokernelTester()
4816 .input_stride(8)
4817 .output_stride(4)
4818 .block_width(4)
4819 .block_height(4)
4820 .element_size(2)
4821 .iterations(1)
4822 .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4823 }
4824
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_4_bw_4_os_8)4825 TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_4_bw_4_os_8) {
4826 TEST_REQUIRES_ARM_NEON;
4827 TransposeMicrokernelTester()
4828 .input_stride(4)
4829 .output_stride(8)
4830 .block_width(4)
4831 .block_height(4)
4832 .element_size(2)
4833 .iterations(1)
4834 .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4835 }
4836
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_4_bw_4_is_8_os_8)4837 TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_4_bw_4_is_8_os_8) {
4838 TEST_REQUIRES_ARM_NEON;
4839 TransposeMicrokernelTester()
4840 .input_stride(8)
4841 .output_stride(8)
4842 .block_width(4)
4843 .block_height(4)
4844 .element_size(2)
4845 .iterations(1)
4846 .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4847 }
4848
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_68_bw_76_ies_13)4849 TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_68_bw_76_ies_13) {
4850 TEST_REQUIRES_ARM_NEON;
4851 TransposeMicrokernelTester()
4852 .input_stride(76)
4853 .output_stride(68)
4854 .block_width(76)
4855 .block_height(68)
4856 .element_size(2)
4857 .input_element_stride(13)
4858 .iterations(1)
4859 .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4860 }
4861
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_12_bw_20_oes_13)4862 TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_12_bw_20_oes_13) {
4863 TEST_REQUIRES_ARM_NEON;
4864 TransposeMicrokernelTester()
4865 .input_stride(20)
4866 .output_stride(12)
4867 .block_width(20)
4868 .block_height(12)
4869 .element_size(2)
4870 .output_element_stride(13)
4871 .iterations(1)
4872 .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4873 }
4874
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_28_bw_92_ies_19_oes_15)4875 TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
4876 TEST_REQUIRES_ARM_NEON;
4877 TransposeMicrokernelTester()
4878 .input_stride(97)
4879 .output_stride(34)
4880 .block_width(92)
4881 .block_height(28)
4882 .element_size(2)
4883 .input_element_stride(19)
4884 .output_element_stride(15)
4885 .iterations(1)
4886 .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4887 }
4888 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4889
4890
4891 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_4_bw_4)4892 TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_4_bw_4) {
4893 TEST_REQUIRES_ARM_NEON;
4894 TransposeMicrokernelTester()
4895 .input_stride(8)
4896 .output_stride(8)
4897 .block_width(4)
4898 .block_height(4)
4899 .element_size(2)
4900 .iterations(1)
4901 .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
4902 }
4903
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_1_8_bw_1_8)4904 TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_1_8_bw_1_8) {
4905 TEST_REQUIRES_ARM_NEON;
4906 for(size_t i = 1; i <= 8; ++i){
4907 for(size_t j = 1; j <= 8; ++j){
4908 TransposeMicrokernelTester()
4909 .input_stride(j * 3)
4910 .output_stride(i * 7)
4911 .block_width(j)
4912 .block_height(i)
4913 .element_size(2)
4914 .iterations(1)
4915 .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
4916 }
4917 }
4918 }
4919
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_4_bw_8)4920 TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_4_bw_8) {
4921 TEST_REQUIRES_ARM_NEON;
4922 TransposeMicrokernelTester()
4923 .input_stride(8)
4924 .output_stride(4)
4925 .block_width(8)
4926 .block_height(4)
4927 .element_size(2)
4928 .iterations(1)
4929 .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
4930 }
4931
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_4_bw_5_8)4932 TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_4_bw_5_8) {
4933 TEST_REQUIRES_ARM_NEON;
4934 for(size_t i = 5; i < 8; ++i){
4935 TransposeMicrokernelTester()
4936 .input_stride(i)
4937 .output_stride(8)
4938 .block_width(i)
4939 .block_height(4)
4940 .element_size(2)
4941 .iterations(1)
4942 .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
4943 }
4944 }
4945
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_8_bw_5_8)4946 TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_8_bw_5_8) {
4947 TEST_REQUIRES_ARM_NEON;
4948 for(size_t i = 5; i < 8; ++i){
4949 TransposeMicrokernelTester()
4950 .input_stride(i)
4951 .output_stride(8)
4952 .block_width(i)
4953 .block_height(8)
4954 .element_size(2)
4955 .iterations(1)
4956 .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
4957 }
4958 }
4959
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_8_bw_4)4960 TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_8_bw_4) {
4961 TEST_REQUIRES_ARM_NEON;
4962 TransposeMicrokernelTester()
4963 .input_stride(4)
4964 .output_stride(16)
4965 .block_width(4)
4966 .block_height(8)
4967 .element_size(2)
4968 .iterations(1)
4969 .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
4970 }
4971
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_5_8_bw_4)4972 TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_5_8_bw_4){
4973 TEST_REQUIRES_ARM_NEON;
4974 for(size_t i = 5; i < 8; ++i){
4975 TransposeMicrokernelTester()
4976 .input_stride(21)
4977 .output_stride(i)
4978 .block_width(7)
4979 .block_height(i)
4980 .element_size(2)
4981 .iterations(1)
4982 .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
4983 }
4984 }
4985
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_5_8_bw_8)4986 TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_5_8_bw_8){
4987 TEST_REQUIRES_ARM_NEON;
4988 for(size_t i = 5; i < 8; ++i){
4989 TransposeMicrokernelTester()
4990 .input_stride(8)
4991 .output_stride(i)
4992 .block_width(8)
4993 .block_height(i)
4994 .element_size(2)
4995 .iterations(1)
4996 .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
4997 }
4998 }
4999
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_5_8_bw_5_8)5000 TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_5_8_bw_5_8) {
5001 TEST_REQUIRES_ARM_NEON;
5002 for(size_t i = 5; i < 8; ++i){
5003 for(size_t j = 5; j < 8; ++j){
5004 TransposeMicrokernelTester()
5005 .input_stride(j)
5006 .output_stride(i)
5007 .block_width(j)
5008 .block_height(i)
5009 .element_size(2)
5010 .iterations(1)
5011 .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
5012 }
5013 }
5014 }
5015
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_4_bw_4_is_8)5016 TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_4_bw_4_is_8) {
5017 TEST_REQUIRES_ARM_NEON;
5018 TransposeMicrokernelTester()
5019 .input_stride(8)
5020 .output_stride(4)
5021 .block_width(4)
5022 .block_height(4)
5023 .element_size(2)
5024 .iterations(1)
5025 .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
5026 }
5027
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_4_bw_4_os_8)5028 TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_4_bw_4_os_8) {
5029 TEST_REQUIRES_ARM_NEON;
5030 TransposeMicrokernelTester()
5031 .input_stride(4)
5032 .output_stride(8)
5033 .block_width(4)
5034 .block_height(4)
5035 .element_size(2)
5036 .iterations(1)
5037 .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
5038 }
5039
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_4_bw_4_is_8_os_8)5040 TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_4_bw_4_is_8_os_8) {
5041 TEST_REQUIRES_ARM_NEON;
5042 TransposeMicrokernelTester()
5043 .input_stride(8)
5044 .output_stride(8)
5045 .block_width(4)
5046 .block_height(4)
5047 .element_size(2)
5048 .iterations(1)
5049 .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
5050 }
5051
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_68_bw_76_ies_13)5052 TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_68_bw_76_ies_13) {
5053 TEST_REQUIRES_ARM_NEON;
5054 TransposeMicrokernelTester()
5055 .input_stride(76)
5056 .output_stride(68)
5057 .block_width(76)
5058 .block_height(68)
5059 .element_size(2)
5060 .input_element_stride(13)
5061 .iterations(1)
5062 .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
5063 }
5064
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_12_bw_20_oes_13)5065 TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_12_bw_20_oes_13) {
5066 TEST_REQUIRES_ARM_NEON;
5067 TransposeMicrokernelTester()
5068 .input_stride(20)
5069 .output_stride(12)
5070 .block_width(20)
5071 .block_height(12)
5072 .element_size(2)
5073 .output_element_stride(13)
5074 .iterations(1)
5075 .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
5076 }
5077
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_28_bw_92_ies_19_oes_15)5078 TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
5079 TEST_REQUIRES_ARM_NEON;
5080 TransposeMicrokernelTester()
5081 .input_stride(97)
5082 .output_stride(34)
5083 .block_width(92)
5084 .block_height(28)
5085 .element_size(2)
5086 .input_element_stride(19)
5087 .output_element_stride(15)
5088 .iterations(1)
5089 .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
5090 }
5091 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5092
5093
5094 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_8_bw_8)5095 TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_8_bw_8) {
5096 TEST_REQUIRES_ARM_NEON;
5097 TransposeMicrokernelTester()
5098 .input_stride(16)
5099 .output_stride(16)
5100 .block_width(8)
5101 .block_height(8)
5102 .element_size(2)
5103 .iterations(1)
5104 .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5105 }
5106
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_1_16_bw_1_16)5107 TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_1_16_bw_1_16) {
5108 TEST_REQUIRES_ARM_NEON;
5109 for(size_t i = 1; i <= 16; ++i){
5110 for(size_t j = 1; j <= 16; ++j){
5111 TransposeMicrokernelTester()
5112 .input_stride(j * 3)
5113 .output_stride(i * 7)
5114 .block_width(j)
5115 .block_height(i)
5116 .element_size(2)
5117 .iterations(1)
5118 .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5119 }
5120 }
5121 }
5122
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_8_bw_16)5123 TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_8_bw_16) {
5124 TEST_REQUIRES_ARM_NEON;
5125 TransposeMicrokernelTester()
5126 .input_stride(16)
5127 .output_stride(8)
5128 .block_width(16)
5129 .block_height(8)
5130 .element_size(2)
5131 .iterations(1)
5132 .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5133 }
5134
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_8_bw_9_16)5135 TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_8_bw_9_16) {
5136 TEST_REQUIRES_ARM_NEON;
5137 for(size_t i = 9; i < 16; ++i){
5138 TransposeMicrokernelTester()
5139 .input_stride(i)
5140 .output_stride(16)
5141 .block_width(i)
5142 .block_height(8)
5143 .element_size(2)
5144 .iterations(1)
5145 .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5146 }
5147 }
5148
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_16_bw_9_16)5149 TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_16_bw_9_16) {
5150 TEST_REQUIRES_ARM_NEON;
5151 for(size_t i = 9; i < 16; ++i){
5152 TransposeMicrokernelTester()
5153 .input_stride(i)
5154 .output_stride(16)
5155 .block_width(i)
5156 .block_height(16)
5157 .element_size(2)
5158 .iterations(1)
5159 .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5160 }
5161 }
5162
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_16_bw_8)5163 TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_16_bw_8) {
5164 TEST_REQUIRES_ARM_NEON;
5165 TransposeMicrokernelTester()
5166 .input_stride(8)
5167 .output_stride(28)
5168 .block_width(8)
5169 .block_height(16)
5170 .element_size(2)
5171 .iterations(1)
5172 .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5173 }
5174
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_9_16_bw_8)5175 TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_9_16_bw_8){
5176 TEST_REQUIRES_ARM_NEON;
5177 for(size_t i = 9; i < 16; ++i){
5178 TransposeMicrokernelTester()
5179 .input_stride(25)
5180 .output_stride(i)
5181 .block_width(11)
5182 .block_height(i)
5183 .element_size(2)
5184 .iterations(1)
5185 .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5186 }
5187 }
5188
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_9_16_bw_16)5189 TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_9_16_bw_16){
5190 TEST_REQUIRES_ARM_NEON;
5191 for(size_t i = 9; i < 16; ++i){
5192 TransposeMicrokernelTester()
5193 .input_stride(16)
5194 .output_stride(i)
5195 .block_width(16)
5196 .block_height(i)
5197 .element_size(2)
5198 .iterations(1)
5199 .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5200 }
5201 }
5202
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_9_16_bw_9_16)5203 TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_9_16_bw_9_16) {
5204 TEST_REQUIRES_ARM_NEON;
5205 for(size_t i = 9; i < 16; ++i){
5206 for(size_t j = 9; j < 16; ++j){
5207 TransposeMicrokernelTester()
5208 .input_stride(j)
5209 .output_stride(i)
5210 .block_width(j)
5211 .block_height(i)
5212 .element_size(2)
5213 .iterations(1)
5214 .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5215 }
5216 }
5217 }
5218
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_8_bw_8_is_16)5219 TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_8_bw_8_is_16) {
5220 TEST_REQUIRES_ARM_NEON;
5221 TransposeMicrokernelTester()
5222 .input_stride(16)
5223 .output_stride(8)
5224 .block_width(8)
5225 .block_height(8)
5226 .element_size(2)
5227 .iterations(1)
5228 .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5229 }
5230
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_8_bw_8_os_16)5231 TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_8_bw_8_os_16) {
5232 TEST_REQUIRES_ARM_NEON;
5233 TransposeMicrokernelTester()
5234 .input_stride(8)
5235 .output_stride(16)
5236 .block_width(8)
5237 .block_height(8)
5238 .element_size(2)
5239 .iterations(1)
5240 .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5241 }
5242
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_8_bw_8_is_16_os_16)5243 TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_8_bw_8_is_16_os_16) {
5244 TEST_REQUIRES_ARM_NEON;
5245 TransposeMicrokernelTester()
5246 .input_stride(16)
5247 .output_stride(16)
5248 .block_width(8)
5249 .block_height(8)
5250 .element_size(2)
5251 .iterations(1)
5252 .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5253 }
5254
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_136_bw_152_ies_13)5255 TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_136_bw_152_ies_13) {
5256 TEST_REQUIRES_ARM_NEON;
5257 TransposeMicrokernelTester()
5258 .input_stride(152)
5259 .output_stride(136)
5260 .block_width(152)
5261 .block_height(136)
5262 .element_size(2)
5263 .input_element_stride(13)
5264 .iterations(1)
5265 .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5266 }
5267
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_24_bw_40_oes_13)5268 TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_24_bw_40_oes_13) {
5269 TEST_REQUIRES_ARM_NEON;
5270 TransposeMicrokernelTester()
5271 .input_stride(40)
5272 .output_stride(24)
5273 .block_width(40)
5274 .block_height(24)
5275 .element_size(2)
5276 .output_element_stride(13)
5277 .iterations(1)
5278 .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5279 }
5280
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_56_bw_184_ies_19_oes_15)5281 TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_56_bw_184_ies_19_oes_15) {
5282 TEST_REQUIRES_ARM_NEON;
5283 TransposeMicrokernelTester()
5284 .input_stride(189)
5285 .output_stride(62)
5286 .block_width(184)
5287 .block_height(56)
5288 .element_size(2)
5289 .input_element_stride(19)
5290 .output_element_stride(15)
5291 .iterations(1)
5292 .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5293 }
5294 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5295
5296
5297 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_8_bw_8)5298 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_8_bw_8) {
5299 TEST_REQUIRES_ARM_NEON;
5300 TransposeMicrokernelTester()
5301 .input_stride(16)
5302 .output_stride(16)
5303 .block_width(8)
5304 .block_height(8)
5305 .element_size(2)
5306 .iterations(1)
5307 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5308 }
5309
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_1_16_bw_1_16)5310 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_1_16_bw_1_16) {
5311 TEST_REQUIRES_ARM_NEON;
5312 for(size_t i = 1; i <= 16; ++i){
5313 for(size_t j = 1; j <= 16; ++j){
5314 TransposeMicrokernelTester()
5315 .input_stride(j * 3)
5316 .output_stride(i * 7)
5317 .block_width(j)
5318 .block_height(i)
5319 .element_size(2)
5320 .iterations(1)
5321 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5322 }
5323 }
5324 }
5325
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_8_bw_16)5326 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_8_bw_16) {
5327 TEST_REQUIRES_ARM_NEON;
5328 TransposeMicrokernelTester()
5329 .input_stride(16)
5330 .output_stride(8)
5331 .block_width(16)
5332 .block_height(8)
5333 .element_size(2)
5334 .iterations(1)
5335 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5336 }
5337
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_8_bw_9_16)5338 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_8_bw_9_16) {
5339 TEST_REQUIRES_ARM_NEON;
5340 for(size_t i = 9; i < 16; ++i){
5341 TransposeMicrokernelTester()
5342 .input_stride(i)
5343 .output_stride(16)
5344 .block_width(i)
5345 .block_height(8)
5346 .element_size(2)
5347 .iterations(1)
5348 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5349 }
5350 }
5351
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_16_bw_9_16)5352 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_16_bw_9_16) {
5353 TEST_REQUIRES_ARM_NEON;
5354 for(size_t i = 9; i < 16; ++i){
5355 TransposeMicrokernelTester()
5356 .input_stride(i)
5357 .output_stride(16)
5358 .block_width(i)
5359 .block_height(16)
5360 .element_size(2)
5361 .iterations(1)
5362 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5363 }
5364 }
5365
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_16_bw_8)5366 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_16_bw_8) {
5367 TEST_REQUIRES_ARM_NEON;
5368 TransposeMicrokernelTester()
5369 .input_stride(8)
5370 .output_stride(28)
5371 .block_width(8)
5372 .block_height(16)
5373 .element_size(2)
5374 .iterations(1)
5375 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5376 }
5377
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_9_16_bw_8)5378 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_9_16_bw_8){
5379 TEST_REQUIRES_ARM_NEON;
5380 for(size_t i = 9; i < 16; ++i){
5381 TransposeMicrokernelTester()
5382 .input_stride(25)
5383 .output_stride(i)
5384 .block_width(11)
5385 .block_height(i)
5386 .element_size(2)
5387 .iterations(1)
5388 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5389 }
5390 }
5391
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_9_16_bw_16)5392 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_9_16_bw_16){
5393 TEST_REQUIRES_ARM_NEON;
5394 for(size_t i = 9; i < 16; ++i){
5395 TransposeMicrokernelTester()
5396 .input_stride(16)
5397 .output_stride(i)
5398 .block_width(16)
5399 .block_height(i)
5400 .element_size(2)
5401 .iterations(1)
5402 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5403 }
5404 }
5405
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_9_16_bw_9_16)5406 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_9_16_bw_9_16) {
5407 TEST_REQUIRES_ARM_NEON;
5408 for(size_t i = 9; i < 16; ++i){
5409 for(size_t j = 9; j < 16; ++j){
5410 TransposeMicrokernelTester()
5411 .input_stride(j)
5412 .output_stride(i)
5413 .block_width(j)
5414 .block_height(i)
5415 .element_size(2)
5416 .iterations(1)
5417 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5418 }
5419 }
5420 }
5421
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_8_bw_8_is_16)5422 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_8_bw_8_is_16) {
5423 TEST_REQUIRES_ARM_NEON;
5424 TransposeMicrokernelTester()
5425 .input_stride(16)
5426 .output_stride(8)
5427 .block_width(8)
5428 .block_height(8)
5429 .element_size(2)
5430 .iterations(1)
5431 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5432 }
5433
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_8_bw_8_os_16)5434 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_8_bw_8_os_16) {
5435 TEST_REQUIRES_ARM_NEON;
5436 TransposeMicrokernelTester()
5437 .input_stride(8)
5438 .output_stride(16)
5439 .block_width(8)
5440 .block_height(8)
5441 .element_size(2)
5442 .iterations(1)
5443 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5444 }
5445
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_8_bw_8_is_16_os_16)5446 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_8_bw_8_is_16_os_16) {
5447 TEST_REQUIRES_ARM_NEON;
5448 TransposeMicrokernelTester()
5449 .input_stride(16)
5450 .output_stride(16)
5451 .block_width(8)
5452 .block_height(8)
5453 .element_size(2)
5454 .iterations(1)
5455 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5456 }
5457
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_136_bw_152_ies_13)5458 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_136_bw_152_ies_13) {
5459 TEST_REQUIRES_ARM_NEON;
5460 TransposeMicrokernelTester()
5461 .input_stride(152)
5462 .output_stride(136)
5463 .block_width(152)
5464 .block_height(136)
5465 .element_size(2)
5466 .input_element_stride(13)
5467 .iterations(1)
5468 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5469 }
5470
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_24_bw_40_oes_13)5471 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_24_bw_40_oes_13) {
5472 TEST_REQUIRES_ARM_NEON;
5473 TransposeMicrokernelTester()
5474 .input_stride(40)
5475 .output_stride(24)
5476 .block_width(40)
5477 .block_height(24)
5478 .element_size(2)
5479 .output_element_stride(13)
5480 .iterations(1)
5481 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5482 }
5483
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_56_bw_184_ies_19_oes_15)5484 TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_56_bw_184_ies_19_oes_15) {
5485 TEST_REQUIRES_ARM_NEON;
5486 TransposeMicrokernelTester()
5487 .input_stride(189)
5488 .output_stride(62)
5489 .block_width(184)
5490 .block_height(56)
5491 .element_size(2)
5492 .input_element_stride(19)
5493 .output_element_stride(15)
5494 .iterations(1)
5495 .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5496 }
5497 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5498
5499
5500 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_8_bw_8)5501 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_8_bw_8) {
5502 TEST_REQUIRES_ARM_NEON;
5503 TransposeMicrokernelTester()
5504 .input_stride(16)
5505 .output_stride(16)
5506 .block_width(8)
5507 .block_height(8)
5508 .element_size(2)
5509 .iterations(1)
5510 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5511 }
5512
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_1_16_bw_1_16)5513 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_1_16_bw_1_16) {
5514 TEST_REQUIRES_ARM_NEON;
5515 for(size_t i = 1; i <= 16; ++i){
5516 for(size_t j = 1; j <= 16; ++j){
5517 TransposeMicrokernelTester()
5518 .input_stride(j * 3)
5519 .output_stride(i * 7)
5520 .block_width(j)
5521 .block_height(i)
5522 .element_size(2)
5523 .iterations(1)
5524 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5525 }
5526 }
5527 }
5528
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_8_bw_16)5529 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_8_bw_16) {
5530 TEST_REQUIRES_ARM_NEON;
5531 TransposeMicrokernelTester()
5532 .input_stride(16)
5533 .output_stride(8)
5534 .block_width(16)
5535 .block_height(8)
5536 .element_size(2)
5537 .iterations(1)
5538 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5539 }
5540
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_8_bw_9_16)5541 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_8_bw_9_16) {
5542 TEST_REQUIRES_ARM_NEON;
5543 for(size_t i = 9; i < 16; ++i){
5544 TransposeMicrokernelTester()
5545 .input_stride(i)
5546 .output_stride(16)
5547 .block_width(i)
5548 .block_height(8)
5549 .element_size(2)
5550 .iterations(1)
5551 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5552 }
5553 }
5554
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_16_bw_9_16)5555 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_16_bw_9_16) {
5556 TEST_REQUIRES_ARM_NEON;
5557 for(size_t i = 9; i < 16; ++i){
5558 TransposeMicrokernelTester()
5559 .input_stride(i)
5560 .output_stride(16)
5561 .block_width(i)
5562 .block_height(16)
5563 .element_size(2)
5564 .iterations(1)
5565 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5566 }
5567 }
5568
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_16_bw_8)5569 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_16_bw_8) {
5570 TEST_REQUIRES_ARM_NEON;
5571 TransposeMicrokernelTester()
5572 .input_stride(8)
5573 .output_stride(28)
5574 .block_width(8)
5575 .block_height(16)
5576 .element_size(2)
5577 .iterations(1)
5578 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5579 }
5580
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_9_16_bw_8)5581 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_9_16_bw_8){
5582 TEST_REQUIRES_ARM_NEON;
5583 for(size_t i = 9; i < 16; ++i){
5584 TransposeMicrokernelTester()
5585 .input_stride(25)
5586 .output_stride(i)
5587 .block_width(11)
5588 .block_height(i)
5589 .element_size(2)
5590 .iterations(1)
5591 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5592 }
5593 }
5594
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_9_16_bw_16)5595 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_9_16_bw_16){
5596 TEST_REQUIRES_ARM_NEON;
5597 for(size_t i = 9; i < 16; ++i){
5598 TransposeMicrokernelTester()
5599 .input_stride(16)
5600 .output_stride(i)
5601 .block_width(16)
5602 .block_height(i)
5603 .element_size(2)
5604 .iterations(1)
5605 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5606 }
5607 }
5608
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_9_16_bw_9_16)5609 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_9_16_bw_9_16) {
5610 TEST_REQUIRES_ARM_NEON;
5611 for(size_t i = 9; i < 16; ++i){
5612 for(size_t j = 9; j < 16; ++j){
5613 TransposeMicrokernelTester()
5614 .input_stride(j)
5615 .output_stride(i)
5616 .block_width(j)
5617 .block_height(i)
5618 .element_size(2)
5619 .iterations(1)
5620 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5621 }
5622 }
5623 }
5624
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_8_bw_8_is_16)5625 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_8_bw_8_is_16) {
5626 TEST_REQUIRES_ARM_NEON;
5627 TransposeMicrokernelTester()
5628 .input_stride(16)
5629 .output_stride(8)
5630 .block_width(8)
5631 .block_height(8)
5632 .element_size(2)
5633 .iterations(1)
5634 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5635 }
5636
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_8_bw_8_os_16)5637 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_8_bw_8_os_16) {
5638 TEST_REQUIRES_ARM_NEON;
5639 TransposeMicrokernelTester()
5640 .input_stride(8)
5641 .output_stride(16)
5642 .block_width(8)
5643 .block_height(8)
5644 .element_size(2)
5645 .iterations(1)
5646 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5647 }
5648
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_8_bw_8_is_16_os_16)5649 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_8_bw_8_is_16_os_16) {
5650 TEST_REQUIRES_ARM_NEON;
5651 TransposeMicrokernelTester()
5652 .input_stride(16)
5653 .output_stride(16)
5654 .block_width(8)
5655 .block_height(8)
5656 .element_size(2)
5657 .iterations(1)
5658 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5659 }
5660
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_136_bw_152_ies_13)5661 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_136_bw_152_ies_13) {
5662 TEST_REQUIRES_ARM_NEON;
5663 TransposeMicrokernelTester()
5664 .input_stride(152)
5665 .output_stride(136)
5666 .block_width(152)
5667 .block_height(136)
5668 .element_size(2)
5669 .input_element_stride(13)
5670 .iterations(1)
5671 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5672 }
5673
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_24_bw_40_oes_13)5674 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_24_bw_40_oes_13) {
5675 TEST_REQUIRES_ARM_NEON;
5676 TransposeMicrokernelTester()
5677 .input_stride(40)
5678 .output_stride(24)
5679 .block_width(40)
5680 .block_height(24)
5681 .element_size(2)
5682 .output_element_stride(13)
5683 .iterations(1)
5684 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5685 }
5686
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_56_bw_184_ies_19_oes_15)5687 TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_56_bw_184_ies_19_oes_15) {
5688 TEST_REQUIRES_ARM_NEON;
5689 TransposeMicrokernelTester()
5690 .input_stride(189)
5691 .output_stride(62)
5692 .block_width(184)
5693 .block_height(56)
5694 .element_size(2)
5695 .input_element_stride(19)
5696 .output_element_stride(15)
5697 .iterations(1)
5698 .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5699 }
5700 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5701
5702
5703 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_8_bw_8)5704 TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_8_bw_8) {
5705 TEST_REQUIRES_ARM_NEON;
5706 TransposeMicrokernelTester()
5707 .input_stride(16)
5708 .output_stride(16)
5709 .block_width(8)
5710 .block_height(8)
5711 .element_size(2)
5712 .iterations(1)
5713 .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5714 }
5715
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_1_16_bw_1_16)5716 TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_1_16_bw_1_16) {
5717 TEST_REQUIRES_ARM_NEON;
5718 for(size_t i = 1; i <= 16; ++i){
5719 for(size_t j = 1; j <= 16; ++j){
5720 TransposeMicrokernelTester()
5721 .input_stride(j * 3)
5722 .output_stride(i * 7)
5723 .block_width(j)
5724 .block_height(i)
5725 .element_size(2)
5726 .iterations(1)
5727 .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5728 }
5729 }
5730 }
5731
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_8_bw_16)5732 TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_8_bw_16) {
5733 TEST_REQUIRES_ARM_NEON;
5734 TransposeMicrokernelTester()
5735 .input_stride(16)
5736 .output_stride(8)
5737 .block_width(16)
5738 .block_height(8)
5739 .element_size(2)
5740 .iterations(1)
5741 .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5742 }
5743
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_8_bw_9_16)5744 TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_8_bw_9_16) {
5745 TEST_REQUIRES_ARM_NEON;
5746 for(size_t i = 9; i < 16; ++i){
5747 TransposeMicrokernelTester()
5748 .input_stride(i)
5749 .output_stride(16)
5750 .block_width(i)
5751 .block_height(8)
5752 .element_size(2)
5753 .iterations(1)
5754 .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5755 }
5756 }
5757
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_16_bw_9_16)5758 TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_16_bw_9_16) {
5759 TEST_REQUIRES_ARM_NEON;
5760 for(size_t i = 9; i < 16; ++i){
5761 TransposeMicrokernelTester()
5762 .input_stride(i)
5763 .output_stride(16)
5764 .block_width(i)
5765 .block_height(16)
5766 .element_size(2)
5767 .iterations(1)
5768 .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5769 }
5770 }
5771
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_16_bw_8)5772 TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_16_bw_8) {
5773 TEST_REQUIRES_ARM_NEON;
5774 TransposeMicrokernelTester()
5775 .input_stride(8)
5776 .output_stride(28)
5777 .block_width(8)
5778 .block_height(16)
5779 .element_size(2)
5780 .iterations(1)
5781 .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5782 }
5783
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_9_16_bw_8)5784 TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_9_16_bw_8){
5785 TEST_REQUIRES_ARM_NEON;
5786 for(size_t i = 9; i < 16; ++i){
5787 TransposeMicrokernelTester()
5788 .input_stride(25)
5789 .output_stride(i)
5790 .block_width(11)
5791 .block_height(i)
5792 .element_size(2)
5793 .iterations(1)
5794 .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5795 }
5796 }
5797
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_9_16_bw_16)5798 TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_9_16_bw_16){
5799 TEST_REQUIRES_ARM_NEON;
5800 for(size_t i = 9; i < 16; ++i){
5801 TransposeMicrokernelTester()
5802 .input_stride(16)
5803 .output_stride(i)
5804 .block_width(16)
5805 .block_height(i)
5806 .element_size(2)
5807 .iterations(1)
5808 .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5809 }
5810 }
5811
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_9_16_bw_9_16)5812 TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_9_16_bw_9_16) {
5813 TEST_REQUIRES_ARM_NEON;
5814 for(size_t i = 9; i < 16; ++i){
5815 for(size_t j = 9; j < 16; ++j){
5816 TransposeMicrokernelTester()
5817 .input_stride(j)
5818 .output_stride(i)
5819 .block_width(j)
5820 .block_height(i)
5821 .element_size(2)
5822 .iterations(1)
5823 .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5824 }
5825 }
5826 }
5827
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_8_bw_8_is_16)5828 TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_8_bw_8_is_16) {
5829 TEST_REQUIRES_ARM_NEON;
5830 TransposeMicrokernelTester()
5831 .input_stride(16)
5832 .output_stride(8)
5833 .block_width(8)
5834 .block_height(8)
5835 .element_size(2)
5836 .iterations(1)
5837 .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5838 }
5839
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_8_bw_8_os_16)5840 TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_8_bw_8_os_16) {
5841 TEST_REQUIRES_ARM_NEON;
5842 TransposeMicrokernelTester()
5843 .input_stride(8)
5844 .output_stride(16)
5845 .block_width(8)
5846 .block_height(8)
5847 .element_size(2)
5848 .iterations(1)
5849 .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5850 }
5851
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_8_bw_8_is_16_os_16)5852 TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_8_bw_8_is_16_os_16) {
5853 TEST_REQUIRES_ARM_NEON;
5854 TransposeMicrokernelTester()
5855 .input_stride(16)
5856 .output_stride(16)
5857 .block_width(8)
5858 .block_height(8)
5859 .element_size(2)
5860 .iterations(1)
5861 .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5862 }
5863
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_136_bw_152_ies_13)5864 TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_136_bw_152_ies_13) {
5865 TEST_REQUIRES_ARM_NEON;
5866 TransposeMicrokernelTester()
5867 .input_stride(152)
5868 .output_stride(136)
5869 .block_width(152)
5870 .block_height(136)
5871 .element_size(2)
5872 .input_element_stride(13)
5873 .iterations(1)
5874 .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5875 }
5876
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_24_bw_40_oes_13)5877 TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_24_bw_40_oes_13) {
5878 TEST_REQUIRES_ARM_NEON;
5879 TransposeMicrokernelTester()
5880 .input_stride(40)
5881 .output_stride(24)
5882 .block_width(40)
5883 .block_height(24)
5884 .element_size(2)
5885 .output_element_stride(13)
5886 .iterations(1)
5887 .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5888 }
5889
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_56_bw_184_ies_19_oes_15)5890 TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_56_bw_184_ies_19_oes_15) {
5891 TEST_REQUIRES_ARM_NEON;
5892 TransposeMicrokernelTester()
5893 .input_stride(189)
5894 .output_stride(62)
5895 .block_width(184)
5896 .block_height(56)
5897 .element_size(2)
5898 .input_element_stride(19)
5899 .output_element_stride(15)
5900 .iterations(1)
5901 .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5902 }
5903 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5904
5905
5906 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_8_bw_8)5907 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_8_bw_8) {
5908 TEST_REQUIRES_ARM_NEON;
5909 TransposeMicrokernelTester()
5910 .input_stride(16)
5911 .output_stride(16)
5912 .block_width(8)
5913 .block_height(8)
5914 .element_size(2)
5915 .iterations(1)
5916 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
5917 }
5918
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_1_16_bw_1_16)5919 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_1_16_bw_1_16) {
5920 TEST_REQUIRES_ARM_NEON;
5921 for(size_t i = 1; i <= 16; ++i){
5922 for(size_t j = 1; j <= 16; ++j){
5923 TransposeMicrokernelTester()
5924 .input_stride(j * 3)
5925 .output_stride(i * 7)
5926 .block_width(j)
5927 .block_height(i)
5928 .element_size(2)
5929 .iterations(1)
5930 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
5931 }
5932 }
5933 }
5934
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_8_bw_16)5935 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_8_bw_16) {
5936 TEST_REQUIRES_ARM_NEON;
5937 TransposeMicrokernelTester()
5938 .input_stride(16)
5939 .output_stride(8)
5940 .block_width(16)
5941 .block_height(8)
5942 .element_size(2)
5943 .iterations(1)
5944 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
5945 }
5946
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_8_bw_9_16)5947 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_8_bw_9_16) {
5948 TEST_REQUIRES_ARM_NEON;
5949 for(size_t i = 9; i < 16; ++i){
5950 TransposeMicrokernelTester()
5951 .input_stride(i)
5952 .output_stride(16)
5953 .block_width(i)
5954 .block_height(8)
5955 .element_size(2)
5956 .iterations(1)
5957 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
5958 }
5959 }
5960
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_16_bw_9_16)5961 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_16_bw_9_16) {
5962 TEST_REQUIRES_ARM_NEON;
5963 for(size_t i = 9; i < 16; ++i){
5964 TransposeMicrokernelTester()
5965 .input_stride(i)
5966 .output_stride(16)
5967 .block_width(i)
5968 .block_height(16)
5969 .element_size(2)
5970 .iterations(1)
5971 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
5972 }
5973 }
5974
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_16_bw_8)5975 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_16_bw_8) {
5976 TEST_REQUIRES_ARM_NEON;
5977 TransposeMicrokernelTester()
5978 .input_stride(8)
5979 .output_stride(28)
5980 .block_width(8)
5981 .block_height(16)
5982 .element_size(2)
5983 .iterations(1)
5984 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
5985 }
5986
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_9_16_bw_8)5987 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_9_16_bw_8){
5988 TEST_REQUIRES_ARM_NEON;
5989 for(size_t i = 9; i < 16; ++i){
5990 TransposeMicrokernelTester()
5991 .input_stride(25)
5992 .output_stride(i)
5993 .block_width(11)
5994 .block_height(i)
5995 .element_size(2)
5996 .iterations(1)
5997 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
5998 }
5999 }
6000
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_9_16_bw_16)6001 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_9_16_bw_16){
6002 TEST_REQUIRES_ARM_NEON;
6003 for(size_t i = 9; i < 16; ++i){
6004 TransposeMicrokernelTester()
6005 .input_stride(16)
6006 .output_stride(i)
6007 .block_width(16)
6008 .block_height(i)
6009 .element_size(2)
6010 .iterations(1)
6011 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
6012 }
6013 }
6014
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_9_16_bw_9_16)6015 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_9_16_bw_9_16) {
6016 TEST_REQUIRES_ARM_NEON;
6017 for(size_t i = 9; i < 16; ++i){
6018 for(size_t j = 9; j < 16; ++j){
6019 TransposeMicrokernelTester()
6020 .input_stride(j)
6021 .output_stride(i)
6022 .block_width(j)
6023 .block_height(i)
6024 .element_size(2)
6025 .iterations(1)
6026 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
6027 }
6028 }
6029 }
6030
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_8_bw_8_is_16)6031 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_8_bw_8_is_16) {
6032 TEST_REQUIRES_ARM_NEON;
6033 TransposeMicrokernelTester()
6034 .input_stride(16)
6035 .output_stride(8)
6036 .block_width(8)
6037 .block_height(8)
6038 .element_size(2)
6039 .iterations(1)
6040 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
6041 }
6042
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_8_bw_8_os_16)6043 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_8_bw_8_os_16) {
6044 TEST_REQUIRES_ARM_NEON;
6045 TransposeMicrokernelTester()
6046 .input_stride(8)
6047 .output_stride(16)
6048 .block_width(8)
6049 .block_height(8)
6050 .element_size(2)
6051 .iterations(1)
6052 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
6053 }
6054
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_8_bw_8_is_16_os_16)6055 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_8_bw_8_is_16_os_16) {
6056 TEST_REQUIRES_ARM_NEON;
6057 TransposeMicrokernelTester()
6058 .input_stride(16)
6059 .output_stride(16)
6060 .block_width(8)
6061 .block_height(8)
6062 .element_size(2)
6063 .iterations(1)
6064 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
6065 }
6066
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_136_bw_152_ies_13)6067 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_136_bw_152_ies_13) {
6068 TEST_REQUIRES_ARM_NEON;
6069 TransposeMicrokernelTester()
6070 .input_stride(152)
6071 .output_stride(136)
6072 .block_width(152)
6073 .block_height(136)
6074 .element_size(2)
6075 .input_element_stride(13)
6076 .iterations(1)
6077 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
6078 }
6079
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_24_bw_40_oes_13)6080 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_24_bw_40_oes_13) {
6081 TEST_REQUIRES_ARM_NEON;
6082 TransposeMicrokernelTester()
6083 .input_stride(40)
6084 .output_stride(24)
6085 .block_width(40)
6086 .block_height(24)
6087 .element_size(2)
6088 .output_element_stride(13)
6089 .iterations(1)
6090 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
6091 }
6092
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_56_bw_184_ies_19_oes_15)6093 TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_56_bw_184_ies_19_oes_15) {
6094 TEST_REQUIRES_ARM_NEON;
6095 TransposeMicrokernelTester()
6096 .input_stride(189)
6097 .output_stride(62)
6098 .block_width(184)
6099 .block_height(56)
6100 .element_size(2)
6101 .input_element_stride(19)
6102 .output_element_stride(15)
6103 .iterations(1)
6104 .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
6105 }
6106 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6107
6108
6109 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_8_bw_8)6110 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_8_bw_8) {
6111 TEST_REQUIRES_ARM_NEON;
6112 TransposeMicrokernelTester()
6113 .input_stride(16)
6114 .output_stride(16)
6115 .block_width(8)
6116 .block_height(8)
6117 .element_size(2)
6118 .iterations(1)
6119 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6120 }
6121
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_1_16_bw_1_16)6122 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_1_16_bw_1_16) {
6123 TEST_REQUIRES_ARM_NEON;
6124 for(size_t i = 1; i <= 16; ++i){
6125 for(size_t j = 1; j <= 16; ++j){
6126 TransposeMicrokernelTester()
6127 .input_stride(j * 3)
6128 .output_stride(i * 7)
6129 .block_width(j)
6130 .block_height(i)
6131 .element_size(2)
6132 .iterations(1)
6133 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6134 }
6135 }
6136 }
6137
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_8_bw_16)6138 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_8_bw_16) {
6139 TEST_REQUIRES_ARM_NEON;
6140 TransposeMicrokernelTester()
6141 .input_stride(16)
6142 .output_stride(8)
6143 .block_width(16)
6144 .block_height(8)
6145 .element_size(2)
6146 .iterations(1)
6147 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6148 }
6149
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_8_bw_9_16)6150 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_8_bw_9_16) {
6151 TEST_REQUIRES_ARM_NEON;
6152 for(size_t i = 9; i < 16; ++i){
6153 TransposeMicrokernelTester()
6154 .input_stride(i)
6155 .output_stride(16)
6156 .block_width(i)
6157 .block_height(8)
6158 .element_size(2)
6159 .iterations(1)
6160 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6161 }
6162 }
6163
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_16_bw_9_16)6164 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_16_bw_9_16) {
6165 TEST_REQUIRES_ARM_NEON;
6166 for(size_t i = 9; i < 16; ++i){
6167 TransposeMicrokernelTester()
6168 .input_stride(i)
6169 .output_stride(16)
6170 .block_width(i)
6171 .block_height(16)
6172 .element_size(2)
6173 .iterations(1)
6174 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6175 }
6176 }
6177
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_16_bw_8)6178 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_16_bw_8) {
6179 TEST_REQUIRES_ARM_NEON;
6180 TransposeMicrokernelTester()
6181 .input_stride(8)
6182 .output_stride(28)
6183 .block_width(8)
6184 .block_height(16)
6185 .element_size(2)
6186 .iterations(1)
6187 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6188 }
6189
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_9_16_bw_8)6190 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_9_16_bw_8){
6191 TEST_REQUIRES_ARM_NEON;
6192 for(size_t i = 9; i < 16; ++i){
6193 TransposeMicrokernelTester()
6194 .input_stride(25)
6195 .output_stride(i)
6196 .block_width(11)
6197 .block_height(i)
6198 .element_size(2)
6199 .iterations(1)
6200 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6201 }
6202 }
6203
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_9_16_bw_16)6204 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_9_16_bw_16){
6205 TEST_REQUIRES_ARM_NEON;
6206 for(size_t i = 9; i < 16; ++i){
6207 TransposeMicrokernelTester()
6208 .input_stride(16)
6209 .output_stride(i)
6210 .block_width(16)
6211 .block_height(i)
6212 .element_size(2)
6213 .iterations(1)
6214 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6215 }
6216 }
6217
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_9_16_bw_9_16)6218 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_9_16_bw_9_16) {
6219 TEST_REQUIRES_ARM_NEON;
6220 for(size_t i = 9; i < 16; ++i){
6221 for(size_t j = 9; j < 16; ++j){
6222 TransposeMicrokernelTester()
6223 .input_stride(j)
6224 .output_stride(i)
6225 .block_width(j)
6226 .block_height(i)
6227 .element_size(2)
6228 .iterations(1)
6229 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6230 }
6231 }
6232 }
6233
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_8_bw_8_is_16)6234 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_8_bw_8_is_16) {
6235 TEST_REQUIRES_ARM_NEON;
6236 TransposeMicrokernelTester()
6237 .input_stride(16)
6238 .output_stride(8)
6239 .block_width(8)
6240 .block_height(8)
6241 .element_size(2)
6242 .iterations(1)
6243 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6244 }
6245
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_8_bw_8_os_16)6246 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_8_bw_8_os_16) {
6247 TEST_REQUIRES_ARM_NEON;
6248 TransposeMicrokernelTester()
6249 .input_stride(8)
6250 .output_stride(16)
6251 .block_width(8)
6252 .block_height(8)
6253 .element_size(2)
6254 .iterations(1)
6255 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6256 }
6257
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_8_bw_8_is_16_os_16)6258 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_8_bw_8_is_16_os_16) {
6259 TEST_REQUIRES_ARM_NEON;
6260 TransposeMicrokernelTester()
6261 .input_stride(16)
6262 .output_stride(16)
6263 .block_width(8)
6264 .block_height(8)
6265 .element_size(2)
6266 .iterations(1)
6267 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6268 }
6269
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_136_bw_152_ies_13)6270 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_136_bw_152_ies_13) {
6271 TEST_REQUIRES_ARM_NEON;
6272 TransposeMicrokernelTester()
6273 .input_stride(152)
6274 .output_stride(136)
6275 .block_width(152)
6276 .block_height(136)
6277 .element_size(2)
6278 .input_element_stride(13)
6279 .iterations(1)
6280 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6281 }
6282
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_24_bw_40_oes_13)6283 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_24_bw_40_oes_13) {
6284 TEST_REQUIRES_ARM_NEON;
6285 TransposeMicrokernelTester()
6286 .input_stride(40)
6287 .output_stride(24)
6288 .block_width(40)
6289 .block_height(24)
6290 .element_size(2)
6291 .output_element_stride(13)
6292 .iterations(1)
6293 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6294 }
6295
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_56_bw_184_ies_19_oes_15)6296 TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_56_bw_184_ies_19_oes_15) {
6297 TEST_REQUIRES_ARM_NEON;
6298 TransposeMicrokernelTester()
6299 .input_stride(189)
6300 .output_stride(62)
6301 .block_width(184)
6302 .block_height(56)
6303 .element_size(2)
6304 .input_element_stride(19)
6305 .output_element_stride(15)
6306 .iterations(1)
6307 .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6308 }
6309 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6310
6311
6312 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_8_bw_8)6313 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_8_bw_8) {
6314 TEST_REQUIRES_ARM_NEON;
6315 TransposeMicrokernelTester()
6316 .input_stride(16)
6317 .output_stride(16)
6318 .block_width(8)
6319 .block_height(8)
6320 .element_size(2)
6321 .iterations(1)
6322 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6323 }
6324
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_1_16_bw_1_16)6325 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_1_16_bw_1_16) {
6326 TEST_REQUIRES_ARM_NEON;
6327 for(size_t i = 1; i <= 16; ++i){
6328 for(size_t j = 1; j <= 16; ++j){
6329 TransposeMicrokernelTester()
6330 .input_stride(j * 3)
6331 .output_stride(i * 7)
6332 .block_width(j)
6333 .block_height(i)
6334 .element_size(2)
6335 .iterations(1)
6336 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6337 }
6338 }
6339 }
6340
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_8_bw_16)6341 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_8_bw_16) {
6342 TEST_REQUIRES_ARM_NEON;
6343 TransposeMicrokernelTester()
6344 .input_stride(16)
6345 .output_stride(8)
6346 .block_width(16)
6347 .block_height(8)
6348 .element_size(2)
6349 .iterations(1)
6350 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6351 }
6352
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_8_bw_9_16)6353 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_8_bw_9_16) {
6354 TEST_REQUIRES_ARM_NEON;
6355 for(size_t i = 9; i < 16; ++i){
6356 TransposeMicrokernelTester()
6357 .input_stride(i)
6358 .output_stride(16)
6359 .block_width(i)
6360 .block_height(8)
6361 .element_size(2)
6362 .iterations(1)
6363 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6364 }
6365 }
6366
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_16_bw_9_16)6367 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_16_bw_9_16) {
6368 TEST_REQUIRES_ARM_NEON;
6369 for(size_t i = 9; i < 16; ++i){
6370 TransposeMicrokernelTester()
6371 .input_stride(i)
6372 .output_stride(16)
6373 .block_width(i)
6374 .block_height(16)
6375 .element_size(2)
6376 .iterations(1)
6377 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6378 }
6379 }
6380
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_16_bw_8)6381 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_16_bw_8) {
6382 TEST_REQUIRES_ARM_NEON;
6383 TransposeMicrokernelTester()
6384 .input_stride(8)
6385 .output_stride(28)
6386 .block_width(8)
6387 .block_height(16)
6388 .element_size(2)
6389 .iterations(1)
6390 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6391 }
6392
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_9_16_bw_8)6393 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_9_16_bw_8){
6394 TEST_REQUIRES_ARM_NEON;
6395 for(size_t i = 9; i < 16; ++i){
6396 TransposeMicrokernelTester()
6397 .input_stride(25)
6398 .output_stride(i)
6399 .block_width(11)
6400 .block_height(i)
6401 .element_size(2)
6402 .iterations(1)
6403 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6404 }
6405 }
6406
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_9_16_bw_16)6407 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_9_16_bw_16){
6408 TEST_REQUIRES_ARM_NEON;
6409 for(size_t i = 9; i < 16; ++i){
6410 TransposeMicrokernelTester()
6411 .input_stride(16)
6412 .output_stride(i)
6413 .block_width(16)
6414 .block_height(i)
6415 .element_size(2)
6416 .iterations(1)
6417 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6418 }
6419 }
6420
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_9_16_bw_9_16)6421 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_9_16_bw_9_16) {
6422 TEST_REQUIRES_ARM_NEON;
6423 for(size_t i = 9; i < 16; ++i){
6424 for(size_t j = 9; j < 16; ++j){
6425 TransposeMicrokernelTester()
6426 .input_stride(j)
6427 .output_stride(i)
6428 .block_width(j)
6429 .block_height(i)
6430 .element_size(2)
6431 .iterations(1)
6432 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6433 }
6434 }
6435 }
6436
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_8_bw_8_is_16)6437 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_8_bw_8_is_16) {
6438 TEST_REQUIRES_ARM_NEON;
6439 TransposeMicrokernelTester()
6440 .input_stride(16)
6441 .output_stride(8)
6442 .block_width(8)
6443 .block_height(8)
6444 .element_size(2)
6445 .iterations(1)
6446 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6447 }
6448
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_8_bw_8_os_16)6449 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_8_bw_8_os_16) {
6450 TEST_REQUIRES_ARM_NEON;
6451 TransposeMicrokernelTester()
6452 .input_stride(8)
6453 .output_stride(16)
6454 .block_width(8)
6455 .block_height(8)
6456 .element_size(2)
6457 .iterations(1)
6458 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6459 }
6460
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_8_bw_8_is_16_os_16)6461 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_8_bw_8_is_16_os_16) {
6462 TEST_REQUIRES_ARM_NEON;
6463 TransposeMicrokernelTester()
6464 .input_stride(16)
6465 .output_stride(16)
6466 .block_width(8)
6467 .block_height(8)
6468 .element_size(2)
6469 .iterations(1)
6470 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6471 }
6472
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_136_bw_152_ies_13)6473 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_136_bw_152_ies_13) {
6474 TEST_REQUIRES_ARM_NEON;
6475 TransposeMicrokernelTester()
6476 .input_stride(152)
6477 .output_stride(136)
6478 .block_width(152)
6479 .block_height(136)
6480 .element_size(2)
6481 .input_element_stride(13)
6482 .iterations(1)
6483 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6484 }
6485
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_24_bw_40_oes_13)6486 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_24_bw_40_oes_13) {
6487 TEST_REQUIRES_ARM_NEON;
6488 TransposeMicrokernelTester()
6489 .input_stride(40)
6490 .output_stride(24)
6491 .block_width(40)
6492 .block_height(24)
6493 .element_size(2)
6494 .output_element_stride(13)
6495 .iterations(1)
6496 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6497 }
6498
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_56_bw_184_ies_19_oes_15)6499 TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_56_bw_184_ies_19_oes_15) {
6500 TEST_REQUIRES_ARM_NEON;
6501 TransposeMicrokernelTester()
6502 .input_stride(189)
6503 .output_stride(62)
6504 .block_width(184)
6505 .block_height(56)
6506 .element_size(2)
6507 .input_element_stride(19)
6508 .output_element_stride(15)
6509 .iterations(1)
6510 .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6511 }
6512 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6513