1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 // Specification: test/x32-transpose.yaml
8 // Generator: tools/generate-transpose-test.py
9
10
11 #include <gtest/gtest.h>
12
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15
16 #include <xnnpack/transpose.h>
17 #include "transpose-microkernel-tester.h"
18
19
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_1_bw_2)20 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_1_bw_2) {
21 TransposeMicrokernelTester()
22 .input_stride(4)
23 .output_stride(2)
24 .block_width(2)
25 .block_height(1)
26 .element_size(4)
27 .iterations(1)
28 .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
29 }
30
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_1_2_bw_1_4)31 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_1_2_bw_1_4) {
32 for(size_t i = 1; i <= 2; ++i){
33 for(size_t j = 1; j <= 4; ++j){
34 TransposeMicrokernelTester()
35 .input_stride(j * 3)
36 .output_stride(i * 7)
37 .block_width(j)
38 .block_height(i)
39 .element_size(4)
40 .iterations(1)
41 .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
42 }
43 }
44 }
45
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_1_bw_4)46 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_1_bw_4) {
47 TransposeMicrokernelTester()
48 .input_stride(4)
49 .output_stride(1)
50 .block_width(4)
51 .block_height(1)
52 .element_size(4)
53 .iterations(1)
54 .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
55 }
56
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_1_bw_3_4)57 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_1_bw_3_4) {
58 for(size_t i = 3; i < 4; ++i){
59 TransposeMicrokernelTester()
60 .input_stride(i)
61 .output_stride(2)
62 .block_width(i)
63 .block_height(1)
64 .element_size(4)
65 .iterations(1)
66 .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
67 }
68 }
69
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_2_bw_3_4)70 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_2_bw_3_4) {
71 for(size_t i = 3; i < 4; ++i){
72 TransposeMicrokernelTester()
73 .input_stride(i)
74 .output_stride(2)
75 .block_width(i)
76 .block_height(2)
77 .element_size(4)
78 .iterations(1)
79 .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
80 }
81 }
82
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_2_bw_2)83 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_2_bw_2) {
84 TransposeMicrokernelTester()
85 .input_stride(2)
86 .output_stride(7)
87 .block_width(2)
88 .block_height(2)
89 .element_size(4)
90 .iterations(1)
91 .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
92 }
93
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_2_2_bw_2)94 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_2_2_bw_2){
95 for(size_t i = 2; i < 2; ++i){
96 TransposeMicrokernelTester()
97 .input_stride(19)
98 .output_stride(i)
99 .block_width(5)
100 .block_height(i)
101 .element_size(4)
102 .iterations(1)
103 .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
104 }
105 }
106
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_2_2_bw_4)107 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_2_2_bw_4){
108 for(size_t i = 2; i < 2; ++i){
109 TransposeMicrokernelTester()
110 .input_stride(4)
111 .output_stride(i)
112 .block_width(4)
113 .block_height(i)
114 .element_size(4)
115 .iterations(1)
116 .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
117 }
118 }
119
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_2_2_bw_3_4)120 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_2_2_bw_3_4) {
121 for(size_t i = 2; i < 2; ++i){
122 for(size_t j = 3; j < 4; ++j){
123 TransposeMicrokernelTester()
124 .input_stride(j)
125 .output_stride(i)
126 .block_width(j)
127 .block_height(i)
128 .element_size(4)
129 .iterations(1)
130 .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
131 }
132 }
133 }
134
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_1_bw_2_is_4)135 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_1_bw_2_is_4) {
136 TransposeMicrokernelTester()
137 .input_stride(4)
138 .output_stride(1)
139 .block_width(2)
140 .block_height(1)
141 .element_size(4)
142 .iterations(1)
143 .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
144 }
145
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_1_bw_2_os_2)146 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_1_bw_2_os_2) {
147 TransposeMicrokernelTester()
148 .input_stride(2)
149 .output_stride(2)
150 .block_width(2)
151 .block_height(1)
152 .element_size(4)
153 .iterations(1)
154 .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
155 }
156
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_1_bw_2_is_4_os_2)157 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_1_bw_2_is_4_os_2) {
158 TransposeMicrokernelTester()
159 .input_stride(4)
160 .output_stride(2)
161 .block_width(2)
162 .block_height(1)
163 .element_size(4)
164 .iterations(1)
165 .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
166 }
167
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_17_bw_38_ies_15)168 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_17_bw_38_ies_15) {
169 TransposeMicrokernelTester()
170 .input_stride(38)
171 .output_stride(17)
172 .block_width(38)
173 .block_height(17)
174 .element_size(4)
175 .input_element_stride(15)
176 .iterations(1)
177 .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
178 }
179
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_3_bw_10_oes_15)180 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_3_bw_10_oes_15) {
181 TransposeMicrokernelTester()
182 .input_stride(10)
183 .output_stride(3)
184 .block_width(10)
185 .block_height(3)
186 .element_size(4)
187 .output_element_stride(15)
188 .iterations(1)
189 .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
190 }
191
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_7_bw_46_ies_21_oes_17)192 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_7_bw_46_ies_21_oes_17) {
193 TransposeMicrokernelTester()
194 .input_stride(51)
195 .output_stride(13)
196 .block_width(46)
197 .block_height(7)
198 .element_size(4)
199 .input_element_stride(21)
200 .output_element_stride(17)
201 .iterations(1)
202 .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
203 }
204
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_1_bw_2)205 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_1_bw_2) {
206 TransposeMicrokernelTester()
207 .input_stride(4)
208 .output_stride(2)
209 .block_width(2)
210 .block_height(1)
211 .element_size(4)
212 .iterations(1)
213 .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
214 }
215
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_1_2_bw_1_4)216 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_1_2_bw_1_4) {
217 for(size_t i = 1; i <= 2; ++i){
218 for(size_t j = 1; j <= 4; ++j){
219 TransposeMicrokernelTester()
220 .input_stride(j * 3)
221 .output_stride(i * 7)
222 .block_width(j)
223 .block_height(i)
224 .element_size(4)
225 .iterations(1)
226 .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
227 }
228 }
229 }
230
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_1_bw_4)231 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_1_bw_4) {
232 TransposeMicrokernelTester()
233 .input_stride(4)
234 .output_stride(1)
235 .block_width(4)
236 .block_height(1)
237 .element_size(4)
238 .iterations(1)
239 .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
240 }
241
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_1_bw_3_4)242 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_1_bw_3_4) {
243 for(size_t i = 3; i < 4; ++i){
244 TransposeMicrokernelTester()
245 .input_stride(i)
246 .output_stride(2)
247 .block_width(i)
248 .block_height(1)
249 .element_size(4)
250 .iterations(1)
251 .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
252 }
253 }
254
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_2_bw_3_4)255 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_2_bw_3_4) {
256 for(size_t i = 3; i < 4; ++i){
257 TransposeMicrokernelTester()
258 .input_stride(i)
259 .output_stride(2)
260 .block_width(i)
261 .block_height(2)
262 .element_size(4)
263 .iterations(1)
264 .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
265 }
266 }
267
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_2_bw_2)268 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_2_bw_2) {
269 TransposeMicrokernelTester()
270 .input_stride(2)
271 .output_stride(7)
272 .block_width(2)
273 .block_height(2)
274 .element_size(4)
275 .iterations(1)
276 .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
277 }
278
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_2_2_bw_2)279 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_2_2_bw_2){
280 for(size_t i = 2; i < 2; ++i){
281 TransposeMicrokernelTester()
282 .input_stride(19)
283 .output_stride(i)
284 .block_width(5)
285 .block_height(i)
286 .element_size(4)
287 .iterations(1)
288 .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
289 }
290 }
291
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_2_2_bw_4)292 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_2_2_bw_4){
293 for(size_t i = 2; i < 2; ++i){
294 TransposeMicrokernelTester()
295 .input_stride(4)
296 .output_stride(i)
297 .block_width(4)
298 .block_height(i)
299 .element_size(4)
300 .iterations(1)
301 .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
302 }
303 }
304
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_2_2_bw_3_4)305 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_2_2_bw_3_4) {
306 for(size_t i = 2; i < 2; ++i){
307 for(size_t j = 3; j < 4; ++j){
308 TransposeMicrokernelTester()
309 .input_stride(j)
310 .output_stride(i)
311 .block_width(j)
312 .block_height(i)
313 .element_size(4)
314 .iterations(1)
315 .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
316 }
317 }
318 }
319
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_1_bw_2_is_4)320 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_1_bw_2_is_4) {
321 TransposeMicrokernelTester()
322 .input_stride(4)
323 .output_stride(1)
324 .block_width(2)
325 .block_height(1)
326 .element_size(4)
327 .iterations(1)
328 .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
329 }
330
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_1_bw_2_os_2)331 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_1_bw_2_os_2) {
332 TransposeMicrokernelTester()
333 .input_stride(2)
334 .output_stride(2)
335 .block_width(2)
336 .block_height(1)
337 .element_size(4)
338 .iterations(1)
339 .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
340 }
341
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_1_bw_2_is_4_os_2)342 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_1_bw_2_is_4_os_2) {
343 TransposeMicrokernelTester()
344 .input_stride(4)
345 .output_stride(2)
346 .block_width(2)
347 .block_height(1)
348 .element_size(4)
349 .iterations(1)
350 .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
351 }
352
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_17_bw_38_ies_15)353 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_17_bw_38_ies_15) {
354 TransposeMicrokernelTester()
355 .input_stride(38)
356 .output_stride(17)
357 .block_width(38)
358 .block_height(17)
359 .element_size(4)
360 .input_element_stride(15)
361 .iterations(1)
362 .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
363 }
364
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_3_bw_10_oes_15)365 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_3_bw_10_oes_15) {
366 TransposeMicrokernelTester()
367 .input_stride(10)
368 .output_stride(3)
369 .block_width(10)
370 .block_height(3)
371 .element_size(4)
372 .output_element_stride(15)
373 .iterations(1)
374 .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
375 }
376
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_7_bw_46_ies_21_oes_17)377 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_7_bw_46_ies_21_oes_17) {
378 TransposeMicrokernelTester()
379 .input_stride(51)
380 .output_stride(13)
381 .block_width(46)
382 .block_height(7)
383 .element_size(4)
384 .input_element_stride(21)
385 .output_element_stride(17)
386 .iterations(1)
387 .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
388 }
389
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_1_bw_4)390 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_1_bw_4) {
391 TransposeMicrokernelTester()
392 .input_stride(8)
393 .output_stride(2)
394 .block_width(4)
395 .block_height(1)
396 .element_size(4)
397 .iterations(1)
398 .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
399 }
400
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_1_2_bw_1_8)401 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_1_2_bw_1_8) {
402 for(size_t i = 1; i <= 2; ++i){
403 for(size_t j = 1; j <= 8; ++j){
404 TransposeMicrokernelTester()
405 .input_stride(j * 3)
406 .output_stride(i * 7)
407 .block_width(j)
408 .block_height(i)
409 .element_size(4)
410 .iterations(1)
411 .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
412 }
413 }
414 }
415
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_1_bw_8)416 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_1_bw_8) {
417 TransposeMicrokernelTester()
418 .input_stride(8)
419 .output_stride(1)
420 .block_width(8)
421 .block_height(1)
422 .element_size(4)
423 .iterations(1)
424 .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
425 }
426
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_1_bw_5_8)427 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_1_bw_5_8) {
428 for(size_t i = 5; i < 8; ++i){
429 TransposeMicrokernelTester()
430 .input_stride(i)
431 .output_stride(2)
432 .block_width(i)
433 .block_height(1)
434 .element_size(4)
435 .iterations(1)
436 .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
437 }
438 }
439
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_2_bw_5_8)440 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_2_bw_5_8) {
441 for(size_t i = 5; i < 8; ++i){
442 TransposeMicrokernelTester()
443 .input_stride(i)
444 .output_stride(2)
445 .block_width(i)
446 .block_height(2)
447 .element_size(4)
448 .iterations(1)
449 .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
450 }
451 }
452
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_2_bw_4)453 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_2_bw_4) {
454 TransposeMicrokernelTester()
455 .input_stride(4)
456 .output_stride(7)
457 .block_width(4)
458 .block_height(2)
459 .element_size(4)
460 .iterations(1)
461 .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
462 }
463
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_2_2_bw_4)464 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_2_2_bw_4){
465 for(size_t i = 2; i < 2; ++i){
466 TransposeMicrokernelTester()
467 .input_stride(21)
468 .output_stride(i)
469 .block_width(7)
470 .block_height(i)
471 .element_size(4)
472 .iterations(1)
473 .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
474 }
475 }
476
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_2_2_bw_8)477 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_2_2_bw_8){
478 for(size_t i = 2; i < 2; ++i){
479 TransposeMicrokernelTester()
480 .input_stride(8)
481 .output_stride(i)
482 .block_width(8)
483 .block_height(i)
484 .element_size(4)
485 .iterations(1)
486 .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
487 }
488 }
489
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_2_2_bw_5_8)490 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_2_2_bw_5_8) {
491 for(size_t i = 2; i < 2; ++i){
492 for(size_t j = 5; j < 8; ++j){
493 TransposeMicrokernelTester()
494 .input_stride(j)
495 .output_stride(i)
496 .block_width(j)
497 .block_height(i)
498 .element_size(4)
499 .iterations(1)
500 .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
501 }
502 }
503 }
504
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_1_bw_4_is_8)505 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_1_bw_4_is_8) {
506 TransposeMicrokernelTester()
507 .input_stride(8)
508 .output_stride(1)
509 .block_width(4)
510 .block_height(1)
511 .element_size(4)
512 .iterations(1)
513 .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
514 }
515
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_1_bw_4_os_2)516 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_1_bw_4_os_2) {
517 TransposeMicrokernelTester()
518 .input_stride(4)
519 .output_stride(2)
520 .block_width(4)
521 .block_height(1)
522 .element_size(4)
523 .iterations(1)
524 .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
525 }
526
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_1_bw_4_is_8_os_2)527 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_1_bw_4_is_8_os_2) {
528 TransposeMicrokernelTester()
529 .input_stride(8)
530 .output_stride(2)
531 .block_width(4)
532 .block_height(1)
533 .element_size(4)
534 .iterations(1)
535 .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
536 }
537
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_17_bw_76_ies_15)538 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_17_bw_76_ies_15) {
539 TransposeMicrokernelTester()
540 .input_stride(76)
541 .output_stride(17)
542 .block_width(76)
543 .block_height(17)
544 .element_size(4)
545 .input_element_stride(15)
546 .iterations(1)
547 .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
548 }
549
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_3_bw_20_oes_15)550 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_3_bw_20_oes_15) {
551 TransposeMicrokernelTester()
552 .input_stride(20)
553 .output_stride(3)
554 .block_width(20)
555 .block_height(3)
556 .element_size(4)
557 .output_element_stride(15)
558 .iterations(1)
559 .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
560 }
561
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_7_bw_92_ies_21_oes_17)562 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_7_bw_92_ies_21_oes_17) {
563 TransposeMicrokernelTester()
564 .input_stride(97)
565 .output_stride(13)
566 .block_width(92)
567 .block_height(7)
568 .element_size(4)
569 .input_element_stride(21)
570 .output_element_stride(17)
571 .iterations(1)
572 .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
573 }
574
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_1_bw_4)575 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_1_bw_4) {
576 TransposeMicrokernelTester()
577 .input_stride(8)
578 .output_stride(2)
579 .block_width(4)
580 .block_height(1)
581 .element_size(4)
582 .iterations(1)
583 .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
584 }
585
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_1_2_bw_1_8)586 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_1_2_bw_1_8) {
587 for(size_t i = 1; i <= 2; ++i){
588 for(size_t j = 1; j <= 8; ++j){
589 TransposeMicrokernelTester()
590 .input_stride(j * 3)
591 .output_stride(i * 7)
592 .block_width(j)
593 .block_height(i)
594 .element_size(4)
595 .iterations(1)
596 .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
597 }
598 }
599 }
600
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_1_bw_8)601 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_1_bw_8) {
602 TransposeMicrokernelTester()
603 .input_stride(8)
604 .output_stride(1)
605 .block_width(8)
606 .block_height(1)
607 .element_size(4)
608 .iterations(1)
609 .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
610 }
611
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_1_bw_5_8)612 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_1_bw_5_8) {
613 for(size_t i = 5; i < 8; ++i){
614 TransposeMicrokernelTester()
615 .input_stride(i)
616 .output_stride(2)
617 .block_width(i)
618 .block_height(1)
619 .element_size(4)
620 .iterations(1)
621 .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
622 }
623 }
624
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_2_bw_5_8)625 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_2_bw_5_8) {
626 for(size_t i = 5; i < 8; ++i){
627 TransposeMicrokernelTester()
628 .input_stride(i)
629 .output_stride(2)
630 .block_width(i)
631 .block_height(2)
632 .element_size(4)
633 .iterations(1)
634 .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
635 }
636 }
637
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_2_bw_4)638 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_2_bw_4) {
639 TransposeMicrokernelTester()
640 .input_stride(4)
641 .output_stride(7)
642 .block_width(4)
643 .block_height(2)
644 .element_size(4)
645 .iterations(1)
646 .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
647 }
648
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_2_2_bw_4)649 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_2_2_bw_4){
650 for(size_t i = 2; i < 2; ++i){
651 TransposeMicrokernelTester()
652 .input_stride(21)
653 .output_stride(i)
654 .block_width(7)
655 .block_height(i)
656 .element_size(4)
657 .iterations(1)
658 .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
659 }
660 }
661
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_2_2_bw_8)662 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_2_2_bw_8){
663 for(size_t i = 2; i < 2; ++i){
664 TransposeMicrokernelTester()
665 .input_stride(8)
666 .output_stride(i)
667 .block_width(8)
668 .block_height(i)
669 .element_size(4)
670 .iterations(1)
671 .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
672 }
673 }
674
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_2_2_bw_5_8)675 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_2_2_bw_5_8) {
676 for(size_t i = 2; i < 2; ++i){
677 for(size_t j = 5; j < 8; ++j){
678 TransposeMicrokernelTester()
679 .input_stride(j)
680 .output_stride(i)
681 .block_width(j)
682 .block_height(i)
683 .element_size(4)
684 .iterations(1)
685 .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
686 }
687 }
688 }
689
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_1_bw_4_is_8)690 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_1_bw_4_is_8) {
691 TransposeMicrokernelTester()
692 .input_stride(8)
693 .output_stride(1)
694 .block_width(4)
695 .block_height(1)
696 .element_size(4)
697 .iterations(1)
698 .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
699 }
700
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_1_bw_4_os_2)701 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_1_bw_4_os_2) {
702 TransposeMicrokernelTester()
703 .input_stride(4)
704 .output_stride(2)
705 .block_width(4)
706 .block_height(1)
707 .element_size(4)
708 .iterations(1)
709 .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
710 }
711
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_1_bw_4_is_8_os_2)712 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_1_bw_4_is_8_os_2) {
713 TransposeMicrokernelTester()
714 .input_stride(8)
715 .output_stride(2)
716 .block_width(4)
717 .block_height(1)
718 .element_size(4)
719 .iterations(1)
720 .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
721 }
722
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_17_bw_76_ies_15)723 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_17_bw_76_ies_15) {
724 TransposeMicrokernelTester()
725 .input_stride(76)
726 .output_stride(17)
727 .block_width(76)
728 .block_height(17)
729 .element_size(4)
730 .input_element_stride(15)
731 .iterations(1)
732 .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
733 }
734
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_3_bw_20_oes_15)735 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_3_bw_20_oes_15) {
736 TransposeMicrokernelTester()
737 .input_stride(20)
738 .output_stride(3)
739 .block_width(20)
740 .block_height(3)
741 .element_size(4)
742 .output_element_stride(15)
743 .iterations(1)
744 .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
745 }
746
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_7_bw_92_ies_21_oes_17)747 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_7_bw_92_ies_21_oes_17) {
748 TransposeMicrokernelTester()
749 .input_stride(97)
750 .output_stride(13)
751 .block_width(92)
752 .block_height(7)
753 .element_size(4)
754 .input_element_stride(21)
755 .output_element_stride(17)
756 .iterations(1)
757 .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
758 }
759
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_2_bw_1)760 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_2_bw_1) {
761 TransposeMicrokernelTester()
762 .input_stride(2)
763 .output_stride(4)
764 .block_width(1)
765 .block_height(2)
766 .element_size(4)
767 .iterations(1)
768 .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
769 }
770
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_1_4_bw_1_2)771 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_1_4_bw_1_2) {
772 for(size_t i = 1; i <= 4; ++i){
773 for(size_t j = 1; j <= 2; ++j){
774 TransposeMicrokernelTester()
775 .input_stride(j * 3)
776 .output_stride(i * 7)
777 .block_width(j)
778 .block_height(i)
779 .element_size(4)
780 .iterations(1)
781 .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
782 }
783 }
784 }
785
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_2_bw_2)786 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_2_bw_2) {
787 TransposeMicrokernelTester()
788 .input_stride(2)
789 .output_stride(2)
790 .block_width(2)
791 .block_height(2)
792 .element_size(4)
793 .iterations(1)
794 .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
795 }
796
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_2_bw_2_2)797 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_2_bw_2_2) {
798 for(size_t i = 2; i < 2; ++i){
799 TransposeMicrokernelTester()
800 .input_stride(i)
801 .output_stride(4)
802 .block_width(i)
803 .block_height(2)
804 .element_size(4)
805 .iterations(1)
806 .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
807 }
808 }
809
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_4_bw_2_2)810 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_4_bw_2_2) {
811 for(size_t i = 2; i < 2; ++i){
812 TransposeMicrokernelTester()
813 .input_stride(i)
814 .output_stride(4)
815 .block_width(i)
816 .block_height(4)
817 .element_size(4)
818 .iterations(1)
819 .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
820 }
821 }
822
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_4_bw_1)823 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_4_bw_1) {
824 TransposeMicrokernelTester()
825 .input_stride(1)
826 .output_stride(10)
827 .block_width(1)
828 .block_height(4)
829 .element_size(4)
830 .iterations(1)
831 .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
832 }
833
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_3_4_bw_1)834 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_3_4_bw_1){
835 for(size_t i = 3; i < 4; ++i){
836 TransposeMicrokernelTester()
837 .input_stride(18)
838 .output_stride(i)
839 .block_width(4)
840 .block_height(i)
841 .element_size(4)
842 .iterations(1)
843 .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
844 }
845 }
846
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_3_4_bw_2)847 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_3_4_bw_2){
848 for(size_t i = 3; i < 4; ++i){
849 TransposeMicrokernelTester()
850 .input_stride(2)
851 .output_stride(i)
852 .block_width(2)
853 .block_height(i)
854 .element_size(4)
855 .iterations(1)
856 .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
857 }
858 }
859
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_3_4_bw_2_2)860 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_3_4_bw_2_2) {
861 for(size_t i = 3; i < 4; ++i){
862 for(size_t j = 2; j < 2; ++j){
863 TransposeMicrokernelTester()
864 .input_stride(j)
865 .output_stride(i)
866 .block_width(j)
867 .block_height(i)
868 .element_size(4)
869 .iterations(1)
870 .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
871 }
872 }
873 }
874
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_2_bw_1_is_2)875 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_2_bw_1_is_2) {
876 TransposeMicrokernelTester()
877 .input_stride(2)
878 .output_stride(2)
879 .block_width(1)
880 .block_height(2)
881 .element_size(4)
882 .iterations(1)
883 .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
884 }
885
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_2_bw_1_os_4)886 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_2_bw_1_os_4) {
887 TransposeMicrokernelTester()
888 .input_stride(1)
889 .output_stride(4)
890 .block_width(1)
891 .block_height(2)
892 .element_size(4)
893 .iterations(1)
894 .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
895 }
896
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_2_bw_1_is_2_os_4)897 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_2_bw_1_is_2_os_4) {
898 TransposeMicrokernelTester()
899 .input_stride(2)
900 .output_stride(4)
901 .block_width(1)
902 .block_height(2)
903 .element_size(4)
904 .iterations(1)
905 .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
906 }
907
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_34_bw_19_ies_15)908 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_34_bw_19_ies_15) {
909 TransposeMicrokernelTester()
910 .input_stride(19)
911 .output_stride(34)
912 .block_width(19)
913 .block_height(34)
914 .element_size(4)
915 .input_element_stride(15)
916 .iterations(1)
917 .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
918 }
919
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_6_bw_5_oes_15)920 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_6_bw_5_oes_15) {
921 TransposeMicrokernelTester()
922 .input_stride(5)
923 .output_stride(6)
924 .block_width(5)
925 .block_height(6)
926 .element_size(4)
927 .output_element_stride(15)
928 .iterations(1)
929 .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
930 }
931
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_14_bw_23_ies_21_oes_17)932 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_14_bw_23_ies_21_oes_17) {
933 TransposeMicrokernelTester()
934 .input_stride(28)
935 .output_stride(20)
936 .block_width(23)
937 .block_height(14)
938 .element_size(4)
939 .input_element_stride(21)
940 .output_element_stride(17)
941 .iterations(1)
942 .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
943 }
944
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_2_bw_1)945 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_2_bw_1) {
946 TransposeMicrokernelTester()
947 .input_stride(2)
948 .output_stride(4)
949 .block_width(1)
950 .block_height(2)
951 .element_size(4)
952 .iterations(1)
953 .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
954 }
955
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_1_4_bw_1_2)956 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_1_4_bw_1_2) {
957 for(size_t i = 1; i <= 4; ++i){
958 for(size_t j = 1; j <= 2; ++j){
959 TransposeMicrokernelTester()
960 .input_stride(j * 3)
961 .output_stride(i * 7)
962 .block_width(j)
963 .block_height(i)
964 .element_size(4)
965 .iterations(1)
966 .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
967 }
968 }
969 }
970
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_2_bw_2)971 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_2_bw_2) {
972 TransposeMicrokernelTester()
973 .input_stride(2)
974 .output_stride(2)
975 .block_width(2)
976 .block_height(2)
977 .element_size(4)
978 .iterations(1)
979 .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
980 }
981
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_2_bw_2_2)982 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_2_bw_2_2) {
983 for(size_t i = 2; i < 2; ++i){
984 TransposeMicrokernelTester()
985 .input_stride(i)
986 .output_stride(4)
987 .block_width(i)
988 .block_height(2)
989 .element_size(4)
990 .iterations(1)
991 .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
992 }
993 }
994
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_4_bw_2_2)995 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_4_bw_2_2) {
996 for(size_t i = 2; i < 2; ++i){
997 TransposeMicrokernelTester()
998 .input_stride(i)
999 .output_stride(4)
1000 .block_width(i)
1001 .block_height(4)
1002 .element_size(4)
1003 .iterations(1)
1004 .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1005 }
1006 }
1007
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_4_bw_1)1008 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_4_bw_1) {
1009 TransposeMicrokernelTester()
1010 .input_stride(1)
1011 .output_stride(10)
1012 .block_width(1)
1013 .block_height(4)
1014 .element_size(4)
1015 .iterations(1)
1016 .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1017 }
1018
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_3_4_bw_1)1019 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_3_4_bw_1){
1020 for(size_t i = 3; i < 4; ++i){
1021 TransposeMicrokernelTester()
1022 .input_stride(18)
1023 .output_stride(i)
1024 .block_width(4)
1025 .block_height(i)
1026 .element_size(4)
1027 .iterations(1)
1028 .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1029 }
1030 }
1031
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_3_4_bw_2)1032 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_3_4_bw_2){
1033 for(size_t i = 3; i < 4; ++i){
1034 TransposeMicrokernelTester()
1035 .input_stride(2)
1036 .output_stride(i)
1037 .block_width(2)
1038 .block_height(i)
1039 .element_size(4)
1040 .iterations(1)
1041 .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1042 }
1043 }
1044
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_3_4_bw_2_2)1045 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_3_4_bw_2_2) {
1046 for(size_t i = 3; i < 4; ++i){
1047 for(size_t j = 2; j < 2; ++j){
1048 TransposeMicrokernelTester()
1049 .input_stride(j)
1050 .output_stride(i)
1051 .block_width(j)
1052 .block_height(i)
1053 .element_size(4)
1054 .iterations(1)
1055 .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1056 }
1057 }
1058 }
1059
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_2_bw_1_is_2)1060 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_2_bw_1_is_2) {
1061 TransposeMicrokernelTester()
1062 .input_stride(2)
1063 .output_stride(2)
1064 .block_width(1)
1065 .block_height(2)
1066 .element_size(4)
1067 .iterations(1)
1068 .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1069 }
1070
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_2_bw_1_os_4)1071 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_2_bw_1_os_4) {
1072 TransposeMicrokernelTester()
1073 .input_stride(1)
1074 .output_stride(4)
1075 .block_width(1)
1076 .block_height(2)
1077 .element_size(4)
1078 .iterations(1)
1079 .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1080 }
1081
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_2_bw_1_is_2_os_4)1082 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_2_bw_1_is_2_os_4) {
1083 TransposeMicrokernelTester()
1084 .input_stride(2)
1085 .output_stride(4)
1086 .block_width(1)
1087 .block_height(2)
1088 .element_size(4)
1089 .iterations(1)
1090 .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1091 }
1092
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_34_bw_19_ies_15)1093 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_34_bw_19_ies_15) {
1094 TransposeMicrokernelTester()
1095 .input_stride(19)
1096 .output_stride(34)
1097 .block_width(19)
1098 .block_height(34)
1099 .element_size(4)
1100 .input_element_stride(15)
1101 .iterations(1)
1102 .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1103 }
1104
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_6_bw_5_oes_15)1105 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_6_bw_5_oes_15) {
1106 TransposeMicrokernelTester()
1107 .input_stride(5)
1108 .output_stride(6)
1109 .block_width(5)
1110 .block_height(6)
1111 .element_size(4)
1112 .output_element_stride(15)
1113 .iterations(1)
1114 .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1115 }
1116
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_14_bw_23_ies_21_oes_17)1117 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_14_bw_23_ies_21_oes_17) {
1118 TransposeMicrokernelTester()
1119 .input_stride(28)
1120 .output_stride(20)
1121 .block_width(23)
1122 .block_height(14)
1123 .element_size(4)
1124 .input_element_stride(21)
1125 .output_element_stride(17)
1126 .iterations(1)
1127 .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1128 }
1129
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_2_bw_2)1130 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_2_bw_2) {
1131 TransposeMicrokernelTester()
1132 .input_stride(4)
1133 .output_stride(4)
1134 .block_width(2)
1135 .block_height(2)
1136 .element_size(4)
1137 .iterations(1)
1138 .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1139 }
1140
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_1_4_bw_1_4)1141 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_1_4_bw_1_4) {
1142 for(size_t i = 1; i <= 4; ++i){
1143 for(size_t j = 1; j <= 4; ++j){
1144 TransposeMicrokernelTester()
1145 .input_stride(j * 3)
1146 .output_stride(i * 7)
1147 .block_width(j)
1148 .block_height(i)
1149 .element_size(4)
1150 .iterations(1)
1151 .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1152 }
1153 }
1154 }
1155
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_2_bw_4)1156 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_2_bw_4) {
1157 TransposeMicrokernelTester()
1158 .input_stride(4)
1159 .output_stride(2)
1160 .block_width(4)
1161 .block_height(2)
1162 .element_size(4)
1163 .iterations(1)
1164 .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1165 }
1166
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_2_bw_3_4)1167 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_2_bw_3_4) {
1168 for(size_t i = 3; i < 4; ++i){
1169 TransposeMicrokernelTester()
1170 .input_stride(i)
1171 .output_stride(4)
1172 .block_width(i)
1173 .block_height(2)
1174 .element_size(4)
1175 .iterations(1)
1176 .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1177 }
1178 }
1179
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_4_bw_3_4)1180 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_4_bw_3_4) {
1181 for(size_t i = 3; i < 4; ++i){
1182 TransposeMicrokernelTester()
1183 .input_stride(i)
1184 .output_stride(4)
1185 .block_width(i)
1186 .block_height(4)
1187 .element_size(4)
1188 .iterations(1)
1189 .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1190 }
1191 }
1192
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_4_bw_2)1193 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_4_bw_2) {
1194 TransposeMicrokernelTester()
1195 .input_stride(2)
1196 .output_stride(10)
1197 .block_width(2)
1198 .block_height(4)
1199 .element_size(4)
1200 .iterations(1)
1201 .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1202 }
1203
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_3_4_bw_2)1204 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_3_4_bw_2){
1205 for(size_t i = 3; i < 4; ++i){
1206 TransposeMicrokernelTester()
1207 .input_stride(19)
1208 .output_stride(i)
1209 .block_width(5)
1210 .block_height(i)
1211 .element_size(4)
1212 .iterations(1)
1213 .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1214 }
1215 }
1216
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_3_4_bw_4)1217 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_3_4_bw_4){
1218 for(size_t i = 3; i < 4; ++i){
1219 TransposeMicrokernelTester()
1220 .input_stride(4)
1221 .output_stride(i)
1222 .block_width(4)
1223 .block_height(i)
1224 .element_size(4)
1225 .iterations(1)
1226 .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1227 }
1228 }
1229
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_3_4_bw_3_4)1230 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_3_4_bw_3_4) {
1231 for(size_t i = 3; i < 4; ++i){
1232 for(size_t j = 3; j < 4; ++j){
1233 TransposeMicrokernelTester()
1234 .input_stride(j)
1235 .output_stride(i)
1236 .block_width(j)
1237 .block_height(i)
1238 .element_size(4)
1239 .iterations(1)
1240 .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1241 }
1242 }
1243 }
1244
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_2_bw_2_is_4)1245 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_2_bw_2_is_4) {
1246 TransposeMicrokernelTester()
1247 .input_stride(4)
1248 .output_stride(2)
1249 .block_width(2)
1250 .block_height(2)
1251 .element_size(4)
1252 .iterations(1)
1253 .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1254 }
1255
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_2_bw_2_os_4)1256 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_2_bw_2_os_4) {
1257 TransposeMicrokernelTester()
1258 .input_stride(2)
1259 .output_stride(4)
1260 .block_width(2)
1261 .block_height(2)
1262 .element_size(4)
1263 .iterations(1)
1264 .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1265 }
1266
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_2_bw_2_is_4_os_4)1267 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_2_bw_2_is_4_os_4) {
1268 TransposeMicrokernelTester()
1269 .input_stride(4)
1270 .output_stride(4)
1271 .block_width(2)
1272 .block_height(2)
1273 .element_size(4)
1274 .iterations(1)
1275 .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1276 }
1277
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_34_bw_38_ies_15)1278 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_34_bw_38_ies_15) {
1279 TransposeMicrokernelTester()
1280 .input_stride(38)
1281 .output_stride(34)
1282 .block_width(38)
1283 .block_height(34)
1284 .element_size(4)
1285 .input_element_stride(15)
1286 .iterations(1)
1287 .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1288 }
1289
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_6_bw_10_oes_15)1290 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_6_bw_10_oes_15) {
1291 TransposeMicrokernelTester()
1292 .input_stride(10)
1293 .output_stride(6)
1294 .block_width(10)
1295 .block_height(6)
1296 .element_size(4)
1297 .output_element_stride(15)
1298 .iterations(1)
1299 .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1300 }
1301
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_14_bw_46_ies_21_oes_17)1302 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_14_bw_46_ies_21_oes_17) {
1303 TransposeMicrokernelTester()
1304 .input_stride(51)
1305 .output_stride(20)
1306 .block_width(46)
1307 .block_height(14)
1308 .element_size(4)
1309 .input_element_stride(21)
1310 .output_element_stride(17)
1311 .iterations(1)
1312 .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1313 }
1314
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_2_bw_2)1315 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_2_bw_2) {
1316 TransposeMicrokernelTester()
1317 .input_stride(4)
1318 .output_stride(4)
1319 .block_width(2)
1320 .block_height(2)
1321 .element_size(4)
1322 .iterations(1)
1323 .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1324 }
1325
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_1_4_bw_1_4)1326 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_1_4_bw_1_4) {
1327 for(size_t i = 1; i <= 4; ++i){
1328 for(size_t j = 1; j <= 4; ++j){
1329 TransposeMicrokernelTester()
1330 .input_stride(j * 3)
1331 .output_stride(i * 7)
1332 .block_width(j)
1333 .block_height(i)
1334 .element_size(4)
1335 .iterations(1)
1336 .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1337 }
1338 }
1339 }
1340
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_2_bw_4)1341 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_2_bw_4) {
1342 TransposeMicrokernelTester()
1343 .input_stride(4)
1344 .output_stride(2)
1345 .block_width(4)
1346 .block_height(2)
1347 .element_size(4)
1348 .iterations(1)
1349 .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1350 }
1351
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_2_bw_3_4)1352 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_2_bw_3_4) {
1353 for(size_t i = 3; i < 4; ++i){
1354 TransposeMicrokernelTester()
1355 .input_stride(i)
1356 .output_stride(4)
1357 .block_width(i)
1358 .block_height(2)
1359 .element_size(4)
1360 .iterations(1)
1361 .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1362 }
1363 }
1364
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_4_bw_3_4)1365 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_4_bw_3_4) {
1366 for(size_t i = 3; i < 4; ++i){
1367 TransposeMicrokernelTester()
1368 .input_stride(i)
1369 .output_stride(4)
1370 .block_width(i)
1371 .block_height(4)
1372 .element_size(4)
1373 .iterations(1)
1374 .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1375 }
1376 }
1377
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_4_bw_2)1378 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_4_bw_2) {
1379 TransposeMicrokernelTester()
1380 .input_stride(2)
1381 .output_stride(10)
1382 .block_width(2)
1383 .block_height(4)
1384 .element_size(4)
1385 .iterations(1)
1386 .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1387 }
1388
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_3_4_bw_2)1389 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_3_4_bw_2){
1390 for(size_t i = 3; i < 4; ++i){
1391 TransposeMicrokernelTester()
1392 .input_stride(19)
1393 .output_stride(i)
1394 .block_width(5)
1395 .block_height(i)
1396 .element_size(4)
1397 .iterations(1)
1398 .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1399 }
1400 }
1401
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_3_4_bw_4)1402 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_3_4_bw_4){
1403 for(size_t i = 3; i < 4; ++i){
1404 TransposeMicrokernelTester()
1405 .input_stride(4)
1406 .output_stride(i)
1407 .block_width(4)
1408 .block_height(i)
1409 .element_size(4)
1410 .iterations(1)
1411 .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1412 }
1413 }
1414
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_3_4_bw_3_4)1415 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_3_4_bw_3_4) {
1416 for(size_t i = 3; i < 4; ++i){
1417 for(size_t j = 3; j < 4; ++j){
1418 TransposeMicrokernelTester()
1419 .input_stride(j)
1420 .output_stride(i)
1421 .block_width(j)
1422 .block_height(i)
1423 .element_size(4)
1424 .iterations(1)
1425 .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1426 }
1427 }
1428 }
1429
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_2_bw_2_is_4)1430 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_2_bw_2_is_4) {
1431 TransposeMicrokernelTester()
1432 .input_stride(4)
1433 .output_stride(2)
1434 .block_width(2)
1435 .block_height(2)
1436 .element_size(4)
1437 .iterations(1)
1438 .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1439 }
1440
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_2_bw_2_os_4)1441 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_2_bw_2_os_4) {
1442 TransposeMicrokernelTester()
1443 .input_stride(2)
1444 .output_stride(4)
1445 .block_width(2)
1446 .block_height(2)
1447 .element_size(4)
1448 .iterations(1)
1449 .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1450 }
1451
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_2_bw_2_is_4_os_4)1452 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_2_bw_2_is_4_os_4) {
1453 TransposeMicrokernelTester()
1454 .input_stride(4)
1455 .output_stride(4)
1456 .block_width(2)
1457 .block_height(2)
1458 .element_size(4)
1459 .iterations(1)
1460 .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1461 }
1462
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_34_bw_38_ies_15)1463 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_34_bw_38_ies_15) {
1464 TransposeMicrokernelTester()
1465 .input_stride(38)
1466 .output_stride(34)
1467 .block_width(38)
1468 .block_height(34)
1469 .element_size(4)
1470 .input_element_stride(15)
1471 .iterations(1)
1472 .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1473 }
1474
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_6_bw_10_oes_15)1475 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_6_bw_10_oes_15) {
1476 TransposeMicrokernelTester()
1477 .input_stride(10)
1478 .output_stride(6)
1479 .block_width(10)
1480 .block_height(6)
1481 .element_size(4)
1482 .output_element_stride(15)
1483 .iterations(1)
1484 .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1485 }
1486
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_14_bw_46_ies_21_oes_17)1487 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_14_bw_46_ies_21_oes_17) {
1488 TransposeMicrokernelTester()
1489 .input_stride(51)
1490 .output_stride(20)
1491 .block_width(46)
1492 .block_height(14)
1493 .element_size(4)
1494 .input_element_stride(21)
1495 .output_element_stride(17)
1496 .iterations(1)
1497 .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1498 }
1499
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_2_bw_4)1500 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_2_bw_4) {
1501 TransposeMicrokernelTester()
1502 .input_stride(8)
1503 .output_stride(4)
1504 .block_width(4)
1505 .block_height(2)
1506 .element_size(4)
1507 .iterations(1)
1508 .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1509 }
1510
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_1_4_bw_1_8)1511 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_1_4_bw_1_8) {
1512 for(size_t i = 1; i <= 4; ++i){
1513 for(size_t j = 1; j <= 8; ++j){
1514 TransposeMicrokernelTester()
1515 .input_stride(j * 3)
1516 .output_stride(i * 7)
1517 .block_width(j)
1518 .block_height(i)
1519 .element_size(4)
1520 .iterations(1)
1521 .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1522 }
1523 }
1524 }
1525
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_2_bw_8)1526 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_2_bw_8) {
1527 TransposeMicrokernelTester()
1528 .input_stride(8)
1529 .output_stride(2)
1530 .block_width(8)
1531 .block_height(2)
1532 .element_size(4)
1533 .iterations(1)
1534 .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1535 }
1536
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_2_bw_5_8)1537 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_2_bw_5_8) {
1538 for(size_t i = 5; i < 8; ++i){
1539 TransposeMicrokernelTester()
1540 .input_stride(i)
1541 .output_stride(4)
1542 .block_width(i)
1543 .block_height(2)
1544 .element_size(4)
1545 .iterations(1)
1546 .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1547 }
1548 }
1549
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_4_bw_5_8)1550 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_4_bw_5_8) {
1551 for(size_t i = 5; i < 8; ++i){
1552 TransposeMicrokernelTester()
1553 .input_stride(i)
1554 .output_stride(4)
1555 .block_width(i)
1556 .block_height(4)
1557 .element_size(4)
1558 .iterations(1)
1559 .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1560 }
1561 }
1562
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_4_bw_4)1563 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_4_bw_4) {
1564 TransposeMicrokernelTester()
1565 .input_stride(4)
1566 .output_stride(10)
1567 .block_width(4)
1568 .block_height(4)
1569 .element_size(4)
1570 .iterations(1)
1571 .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1572 }
1573
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_3_4_bw_4)1574 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_3_4_bw_4){
1575 for(size_t i = 3; i < 4; ++i){
1576 TransposeMicrokernelTester()
1577 .input_stride(21)
1578 .output_stride(i)
1579 .block_width(7)
1580 .block_height(i)
1581 .element_size(4)
1582 .iterations(1)
1583 .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1584 }
1585 }
1586
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_3_4_bw_8)1587 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_3_4_bw_8){
1588 for(size_t i = 3; i < 4; ++i){
1589 TransposeMicrokernelTester()
1590 .input_stride(8)
1591 .output_stride(i)
1592 .block_width(8)
1593 .block_height(i)
1594 .element_size(4)
1595 .iterations(1)
1596 .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1597 }
1598 }
1599
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_3_4_bw_5_8)1600 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_3_4_bw_5_8) {
1601 for(size_t i = 3; i < 4; ++i){
1602 for(size_t j = 5; j < 8; ++j){
1603 TransposeMicrokernelTester()
1604 .input_stride(j)
1605 .output_stride(i)
1606 .block_width(j)
1607 .block_height(i)
1608 .element_size(4)
1609 .iterations(1)
1610 .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1611 }
1612 }
1613 }
1614
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_2_bw_4_is_8)1615 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_2_bw_4_is_8) {
1616 TransposeMicrokernelTester()
1617 .input_stride(8)
1618 .output_stride(2)
1619 .block_width(4)
1620 .block_height(2)
1621 .element_size(4)
1622 .iterations(1)
1623 .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1624 }
1625
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_2_bw_4_os_4)1626 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_2_bw_4_os_4) {
1627 TransposeMicrokernelTester()
1628 .input_stride(4)
1629 .output_stride(4)
1630 .block_width(4)
1631 .block_height(2)
1632 .element_size(4)
1633 .iterations(1)
1634 .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1635 }
1636
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_2_bw_4_is_8_os_4)1637 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_2_bw_4_is_8_os_4) {
1638 TransposeMicrokernelTester()
1639 .input_stride(8)
1640 .output_stride(4)
1641 .block_width(4)
1642 .block_height(2)
1643 .element_size(4)
1644 .iterations(1)
1645 .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1646 }
1647
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_34_bw_76_ies_15)1648 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_34_bw_76_ies_15) {
1649 TransposeMicrokernelTester()
1650 .input_stride(76)
1651 .output_stride(34)
1652 .block_width(76)
1653 .block_height(34)
1654 .element_size(4)
1655 .input_element_stride(15)
1656 .iterations(1)
1657 .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1658 }
1659
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_6_bw_20_oes_15)1660 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_6_bw_20_oes_15) {
1661 TransposeMicrokernelTester()
1662 .input_stride(20)
1663 .output_stride(6)
1664 .block_width(20)
1665 .block_height(6)
1666 .element_size(4)
1667 .output_element_stride(15)
1668 .iterations(1)
1669 .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1670 }
1671
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_14_bw_92_ies_21_oes_17)1672 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_14_bw_92_ies_21_oes_17) {
1673 TransposeMicrokernelTester()
1674 .input_stride(97)
1675 .output_stride(20)
1676 .block_width(92)
1677 .block_height(14)
1678 .element_size(4)
1679 .input_element_stride(21)
1680 .output_element_stride(17)
1681 .iterations(1)
1682 .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1683 }
1684
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_2_bw_4)1685 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_2_bw_4) {
1686 TransposeMicrokernelTester()
1687 .input_stride(8)
1688 .output_stride(4)
1689 .block_width(4)
1690 .block_height(2)
1691 .element_size(4)
1692 .iterations(1)
1693 .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1694 }
1695
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_1_4_bw_1_8)1696 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_1_4_bw_1_8) {
1697 for(size_t i = 1; i <= 4; ++i){
1698 for(size_t j = 1; j <= 8; ++j){
1699 TransposeMicrokernelTester()
1700 .input_stride(j * 3)
1701 .output_stride(i * 7)
1702 .block_width(j)
1703 .block_height(i)
1704 .element_size(4)
1705 .iterations(1)
1706 .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1707 }
1708 }
1709 }
1710
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_2_bw_8)1711 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_2_bw_8) {
1712 TransposeMicrokernelTester()
1713 .input_stride(8)
1714 .output_stride(2)
1715 .block_width(8)
1716 .block_height(2)
1717 .element_size(4)
1718 .iterations(1)
1719 .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1720 }
1721
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_2_bw_5_8)1722 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_2_bw_5_8) {
1723 for(size_t i = 5; i < 8; ++i){
1724 TransposeMicrokernelTester()
1725 .input_stride(i)
1726 .output_stride(4)
1727 .block_width(i)
1728 .block_height(2)
1729 .element_size(4)
1730 .iterations(1)
1731 .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1732 }
1733 }
1734
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_4_bw_5_8)1735 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_4_bw_5_8) {
1736 for(size_t i = 5; i < 8; ++i){
1737 TransposeMicrokernelTester()
1738 .input_stride(i)
1739 .output_stride(4)
1740 .block_width(i)
1741 .block_height(4)
1742 .element_size(4)
1743 .iterations(1)
1744 .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1745 }
1746 }
1747
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_4_bw_4)1748 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_4_bw_4) {
1749 TransposeMicrokernelTester()
1750 .input_stride(4)
1751 .output_stride(10)
1752 .block_width(4)
1753 .block_height(4)
1754 .element_size(4)
1755 .iterations(1)
1756 .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1757 }
1758
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_3_4_bw_4)1759 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_3_4_bw_4){
1760 for(size_t i = 3; i < 4; ++i){
1761 TransposeMicrokernelTester()
1762 .input_stride(21)
1763 .output_stride(i)
1764 .block_width(7)
1765 .block_height(i)
1766 .element_size(4)
1767 .iterations(1)
1768 .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1769 }
1770 }
1771
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_3_4_bw_8)1772 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_3_4_bw_8){
1773 for(size_t i = 3; i < 4; ++i){
1774 TransposeMicrokernelTester()
1775 .input_stride(8)
1776 .output_stride(i)
1777 .block_width(8)
1778 .block_height(i)
1779 .element_size(4)
1780 .iterations(1)
1781 .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1782 }
1783 }
1784
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_3_4_bw_5_8)1785 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_3_4_bw_5_8) {
1786 for(size_t i = 3; i < 4; ++i){
1787 for(size_t j = 5; j < 8; ++j){
1788 TransposeMicrokernelTester()
1789 .input_stride(j)
1790 .output_stride(i)
1791 .block_width(j)
1792 .block_height(i)
1793 .element_size(4)
1794 .iterations(1)
1795 .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1796 }
1797 }
1798 }
1799
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_2_bw_4_is_8)1800 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_2_bw_4_is_8) {
1801 TransposeMicrokernelTester()
1802 .input_stride(8)
1803 .output_stride(2)
1804 .block_width(4)
1805 .block_height(2)
1806 .element_size(4)
1807 .iterations(1)
1808 .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1809 }
1810
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_2_bw_4_os_4)1811 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_2_bw_4_os_4) {
1812 TransposeMicrokernelTester()
1813 .input_stride(4)
1814 .output_stride(4)
1815 .block_width(4)
1816 .block_height(2)
1817 .element_size(4)
1818 .iterations(1)
1819 .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1820 }
1821
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_2_bw_4_is_8_os_4)1822 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_2_bw_4_is_8_os_4) {
1823 TransposeMicrokernelTester()
1824 .input_stride(8)
1825 .output_stride(4)
1826 .block_width(4)
1827 .block_height(2)
1828 .element_size(4)
1829 .iterations(1)
1830 .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1831 }
1832
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_34_bw_76_ies_15)1833 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_34_bw_76_ies_15) {
1834 TransposeMicrokernelTester()
1835 .input_stride(76)
1836 .output_stride(34)
1837 .block_width(76)
1838 .block_height(34)
1839 .element_size(4)
1840 .input_element_stride(15)
1841 .iterations(1)
1842 .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1843 }
1844
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_6_bw_20_oes_15)1845 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_6_bw_20_oes_15) {
1846 TransposeMicrokernelTester()
1847 .input_stride(20)
1848 .output_stride(6)
1849 .block_width(20)
1850 .block_height(6)
1851 .element_size(4)
1852 .output_element_stride(15)
1853 .iterations(1)
1854 .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1855 }
1856
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_14_bw_92_ies_21_oes_17)1857 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_14_bw_92_ies_21_oes_17) {
1858 TransposeMicrokernelTester()
1859 .input_stride(97)
1860 .output_stride(20)
1861 .block_width(92)
1862 .block_height(14)
1863 .element_size(4)
1864 .input_element_stride(21)
1865 .output_element_stride(17)
1866 .iterations(1)
1867 .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1868 }
1869
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_4_bw_1)1870 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_4_bw_1) {
1871 TransposeMicrokernelTester()
1872 .input_stride(2)
1873 .output_stride(8)
1874 .block_width(1)
1875 .block_height(4)
1876 .element_size(4)
1877 .iterations(1)
1878 .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
1879 }
1880
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_1_8_bw_1_2)1881 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_1_8_bw_1_2) {
1882 for(size_t i = 1; i <= 8; ++i){
1883 for(size_t j = 1; j <= 2; ++j){
1884 TransposeMicrokernelTester()
1885 .input_stride(j * 3)
1886 .output_stride(i * 7)
1887 .block_width(j)
1888 .block_height(i)
1889 .element_size(4)
1890 .iterations(1)
1891 .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
1892 }
1893 }
1894 }
1895
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_4_bw_2)1896 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_4_bw_2) {
1897 TransposeMicrokernelTester()
1898 .input_stride(2)
1899 .output_stride(4)
1900 .block_width(2)
1901 .block_height(4)
1902 .element_size(4)
1903 .iterations(1)
1904 .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
1905 }
1906
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_4_bw_2_2)1907 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_4_bw_2_2) {
1908 for(size_t i = 2; i < 2; ++i){
1909 TransposeMicrokernelTester()
1910 .input_stride(i)
1911 .output_stride(8)
1912 .block_width(i)
1913 .block_height(4)
1914 .element_size(4)
1915 .iterations(1)
1916 .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
1917 }
1918 }
1919
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_8_bw_2_2)1920 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_8_bw_2_2) {
1921 for(size_t i = 2; i < 2; ++i){
1922 TransposeMicrokernelTester()
1923 .input_stride(i)
1924 .output_stride(8)
1925 .block_width(i)
1926 .block_height(8)
1927 .element_size(4)
1928 .iterations(1)
1929 .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
1930 }
1931 }
1932
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_8_bw_1)1933 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_8_bw_1) {
1934 TransposeMicrokernelTester()
1935 .input_stride(1)
1936 .output_stride(16)
1937 .block_width(1)
1938 .block_height(8)
1939 .element_size(4)
1940 .iterations(1)
1941 .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
1942 }
1943
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_5_8_bw_1)1944 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_5_8_bw_1){
1945 for(size_t i = 5; i < 8; ++i){
1946 TransposeMicrokernelTester()
1947 .input_stride(18)
1948 .output_stride(i)
1949 .block_width(4)
1950 .block_height(i)
1951 .element_size(4)
1952 .iterations(1)
1953 .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
1954 }
1955 }
1956
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_5_8_bw_2)1957 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_5_8_bw_2){
1958 for(size_t i = 5; i < 8; ++i){
1959 TransposeMicrokernelTester()
1960 .input_stride(2)
1961 .output_stride(i)
1962 .block_width(2)
1963 .block_height(i)
1964 .element_size(4)
1965 .iterations(1)
1966 .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
1967 }
1968 }
1969
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_5_8_bw_2_2)1970 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_5_8_bw_2_2) {
1971 for(size_t i = 5; i < 8; ++i){
1972 for(size_t j = 2; j < 2; ++j){
1973 TransposeMicrokernelTester()
1974 .input_stride(j)
1975 .output_stride(i)
1976 .block_width(j)
1977 .block_height(i)
1978 .element_size(4)
1979 .iterations(1)
1980 .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
1981 }
1982 }
1983 }
1984
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_4_bw_1_is_2)1985 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_4_bw_1_is_2) {
1986 TransposeMicrokernelTester()
1987 .input_stride(2)
1988 .output_stride(4)
1989 .block_width(1)
1990 .block_height(4)
1991 .element_size(4)
1992 .iterations(1)
1993 .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
1994 }
1995
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_4_bw_1_os_8)1996 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_4_bw_1_os_8) {
1997 TransposeMicrokernelTester()
1998 .input_stride(1)
1999 .output_stride(8)
2000 .block_width(1)
2001 .block_height(4)
2002 .element_size(4)
2003 .iterations(1)
2004 .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
2005 }
2006
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_4_bw_1_is_2_os_8)2007 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_4_bw_1_is_2_os_8) {
2008 TransposeMicrokernelTester()
2009 .input_stride(2)
2010 .output_stride(8)
2011 .block_width(1)
2012 .block_height(4)
2013 .element_size(4)
2014 .iterations(1)
2015 .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
2016 }
2017
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_68_bw_19_ies_15)2018 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_68_bw_19_ies_15) {
2019 TransposeMicrokernelTester()
2020 .input_stride(19)
2021 .output_stride(68)
2022 .block_width(19)
2023 .block_height(68)
2024 .element_size(4)
2025 .input_element_stride(15)
2026 .iterations(1)
2027 .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
2028 }
2029
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_12_bw_5_oes_15)2030 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_12_bw_5_oes_15) {
2031 TransposeMicrokernelTester()
2032 .input_stride(5)
2033 .output_stride(12)
2034 .block_width(5)
2035 .block_height(12)
2036 .element_size(4)
2037 .output_element_stride(15)
2038 .iterations(1)
2039 .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
2040 }
2041
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_28_bw_23_ies_21_oes_17)2042 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_28_bw_23_ies_21_oes_17) {
2043 TransposeMicrokernelTester()
2044 .input_stride(28)
2045 .output_stride(34)
2046 .block_width(23)
2047 .block_height(28)
2048 .element_size(4)
2049 .input_element_stride(21)
2050 .output_element_stride(17)
2051 .iterations(1)
2052 .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
2053 }
2054
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_4_bw_1)2055 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_4_bw_1) {
2056 TransposeMicrokernelTester()
2057 .input_stride(2)
2058 .output_stride(8)
2059 .block_width(1)
2060 .block_height(4)
2061 .element_size(4)
2062 .iterations(1)
2063 .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2064 }
2065
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_1_8_bw_1_2)2066 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_1_8_bw_1_2) {
2067 for(size_t i = 1; i <= 8; ++i){
2068 for(size_t j = 1; j <= 2; ++j){
2069 TransposeMicrokernelTester()
2070 .input_stride(j * 3)
2071 .output_stride(i * 7)
2072 .block_width(j)
2073 .block_height(i)
2074 .element_size(4)
2075 .iterations(1)
2076 .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2077 }
2078 }
2079 }
2080
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_4_bw_2)2081 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_4_bw_2) {
2082 TransposeMicrokernelTester()
2083 .input_stride(2)
2084 .output_stride(4)
2085 .block_width(2)
2086 .block_height(4)
2087 .element_size(4)
2088 .iterations(1)
2089 .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2090 }
2091
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_4_bw_2_2)2092 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_4_bw_2_2) {
2093 for(size_t i = 2; i < 2; ++i){
2094 TransposeMicrokernelTester()
2095 .input_stride(i)
2096 .output_stride(8)
2097 .block_width(i)
2098 .block_height(4)
2099 .element_size(4)
2100 .iterations(1)
2101 .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2102 }
2103 }
2104
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_8_bw_2_2)2105 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_8_bw_2_2) {
2106 for(size_t i = 2; i < 2; ++i){
2107 TransposeMicrokernelTester()
2108 .input_stride(i)
2109 .output_stride(8)
2110 .block_width(i)
2111 .block_height(8)
2112 .element_size(4)
2113 .iterations(1)
2114 .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2115 }
2116 }
2117
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_8_bw_1)2118 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_8_bw_1) {
2119 TransposeMicrokernelTester()
2120 .input_stride(1)
2121 .output_stride(16)
2122 .block_width(1)
2123 .block_height(8)
2124 .element_size(4)
2125 .iterations(1)
2126 .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2127 }
2128
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_5_8_bw_1)2129 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_5_8_bw_1){
2130 for(size_t i = 5; i < 8; ++i){
2131 TransposeMicrokernelTester()
2132 .input_stride(18)
2133 .output_stride(i)
2134 .block_width(4)
2135 .block_height(i)
2136 .element_size(4)
2137 .iterations(1)
2138 .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2139 }
2140 }
2141
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_5_8_bw_2)2142 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_5_8_bw_2){
2143 for(size_t i = 5; i < 8; ++i){
2144 TransposeMicrokernelTester()
2145 .input_stride(2)
2146 .output_stride(i)
2147 .block_width(2)
2148 .block_height(i)
2149 .element_size(4)
2150 .iterations(1)
2151 .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2152 }
2153 }
2154
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_5_8_bw_2_2)2155 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_5_8_bw_2_2) {
2156 for(size_t i = 5; i < 8; ++i){
2157 for(size_t j = 2; j < 2; ++j){
2158 TransposeMicrokernelTester()
2159 .input_stride(j)
2160 .output_stride(i)
2161 .block_width(j)
2162 .block_height(i)
2163 .element_size(4)
2164 .iterations(1)
2165 .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2166 }
2167 }
2168 }
2169
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_4_bw_1_is_2)2170 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_4_bw_1_is_2) {
2171 TransposeMicrokernelTester()
2172 .input_stride(2)
2173 .output_stride(4)
2174 .block_width(1)
2175 .block_height(4)
2176 .element_size(4)
2177 .iterations(1)
2178 .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2179 }
2180
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_4_bw_1_os_8)2181 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_4_bw_1_os_8) {
2182 TransposeMicrokernelTester()
2183 .input_stride(1)
2184 .output_stride(8)
2185 .block_width(1)
2186 .block_height(4)
2187 .element_size(4)
2188 .iterations(1)
2189 .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2190 }
2191
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_4_bw_1_is_2_os_8)2192 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_4_bw_1_is_2_os_8) {
2193 TransposeMicrokernelTester()
2194 .input_stride(2)
2195 .output_stride(8)
2196 .block_width(1)
2197 .block_height(4)
2198 .element_size(4)
2199 .iterations(1)
2200 .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2201 }
2202
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_68_bw_19_ies_15)2203 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_68_bw_19_ies_15) {
2204 TransposeMicrokernelTester()
2205 .input_stride(19)
2206 .output_stride(68)
2207 .block_width(19)
2208 .block_height(68)
2209 .element_size(4)
2210 .input_element_stride(15)
2211 .iterations(1)
2212 .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2213 }
2214
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_12_bw_5_oes_15)2215 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_12_bw_5_oes_15) {
2216 TransposeMicrokernelTester()
2217 .input_stride(5)
2218 .output_stride(12)
2219 .block_width(5)
2220 .block_height(12)
2221 .element_size(4)
2222 .output_element_stride(15)
2223 .iterations(1)
2224 .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2225 }
2226
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_28_bw_23_ies_21_oes_17)2227 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_28_bw_23_ies_21_oes_17) {
2228 TransposeMicrokernelTester()
2229 .input_stride(28)
2230 .output_stride(34)
2231 .block_width(23)
2232 .block_height(28)
2233 .element_size(4)
2234 .input_element_stride(21)
2235 .output_element_stride(17)
2236 .iterations(1)
2237 .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2238 }
2239
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_4_bw_2)2240 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_4_bw_2) {
2241 TransposeMicrokernelTester()
2242 .input_stride(4)
2243 .output_stride(8)
2244 .block_width(2)
2245 .block_height(4)
2246 .element_size(4)
2247 .iterations(1)
2248 .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2249 }
2250
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_1_8_bw_1_4)2251 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_1_8_bw_1_4) {
2252 for(size_t i = 1; i <= 8; ++i){
2253 for(size_t j = 1; j <= 4; ++j){
2254 TransposeMicrokernelTester()
2255 .input_stride(j * 3)
2256 .output_stride(i * 7)
2257 .block_width(j)
2258 .block_height(i)
2259 .element_size(4)
2260 .iterations(1)
2261 .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2262 }
2263 }
2264 }
2265
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_4_bw_4)2266 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_4_bw_4) {
2267 TransposeMicrokernelTester()
2268 .input_stride(4)
2269 .output_stride(4)
2270 .block_width(4)
2271 .block_height(4)
2272 .element_size(4)
2273 .iterations(1)
2274 .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2275 }
2276
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_4_bw_3_4)2277 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_4_bw_3_4) {
2278 for(size_t i = 3; i < 4; ++i){
2279 TransposeMicrokernelTester()
2280 .input_stride(i)
2281 .output_stride(8)
2282 .block_width(i)
2283 .block_height(4)
2284 .element_size(4)
2285 .iterations(1)
2286 .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2287 }
2288 }
2289
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_8_bw_3_4)2290 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_8_bw_3_4) {
2291 for(size_t i = 3; i < 4; ++i){
2292 TransposeMicrokernelTester()
2293 .input_stride(i)
2294 .output_stride(8)
2295 .block_width(i)
2296 .block_height(8)
2297 .element_size(4)
2298 .iterations(1)
2299 .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2300 }
2301 }
2302
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_8_bw_2)2303 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_8_bw_2) {
2304 TransposeMicrokernelTester()
2305 .input_stride(2)
2306 .output_stride(16)
2307 .block_width(2)
2308 .block_height(8)
2309 .element_size(4)
2310 .iterations(1)
2311 .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2312 }
2313
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_5_8_bw_2)2314 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_5_8_bw_2){
2315 for(size_t i = 5; i < 8; ++i){
2316 TransposeMicrokernelTester()
2317 .input_stride(19)
2318 .output_stride(i)
2319 .block_width(5)
2320 .block_height(i)
2321 .element_size(4)
2322 .iterations(1)
2323 .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2324 }
2325 }
2326
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_5_8_bw_4)2327 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_5_8_bw_4){
2328 for(size_t i = 5; i < 8; ++i){
2329 TransposeMicrokernelTester()
2330 .input_stride(4)
2331 .output_stride(i)
2332 .block_width(4)
2333 .block_height(i)
2334 .element_size(4)
2335 .iterations(1)
2336 .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2337 }
2338 }
2339
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_5_8_bw_3_4)2340 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_5_8_bw_3_4) {
2341 for(size_t i = 5; i < 8; ++i){
2342 for(size_t j = 3; j < 4; ++j){
2343 TransposeMicrokernelTester()
2344 .input_stride(j)
2345 .output_stride(i)
2346 .block_width(j)
2347 .block_height(i)
2348 .element_size(4)
2349 .iterations(1)
2350 .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2351 }
2352 }
2353 }
2354
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_4_bw_2_is_4)2355 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_4_bw_2_is_4) {
2356 TransposeMicrokernelTester()
2357 .input_stride(4)
2358 .output_stride(4)
2359 .block_width(2)
2360 .block_height(4)
2361 .element_size(4)
2362 .iterations(1)
2363 .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2364 }
2365
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_4_bw_2_os_8)2366 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_4_bw_2_os_8) {
2367 TransposeMicrokernelTester()
2368 .input_stride(2)
2369 .output_stride(8)
2370 .block_width(2)
2371 .block_height(4)
2372 .element_size(4)
2373 .iterations(1)
2374 .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2375 }
2376
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_4_bw_2_is_4_os_8)2377 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_4_bw_2_is_4_os_8) {
2378 TransposeMicrokernelTester()
2379 .input_stride(4)
2380 .output_stride(8)
2381 .block_width(2)
2382 .block_height(4)
2383 .element_size(4)
2384 .iterations(1)
2385 .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2386 }
2387
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_68_bw_38_ies_15)2388 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_68_bw_38_ies_15) {
2389 TransposeMicrokernelTester()
2390 .input_stride(38)
2391 .output_stride(68)
2392 .block_width(38)
2393 .block_height(68)
2394 .element_size(4)
2395 .input_element_stride(15)
2396 .iterations(1)
2397 .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2398 }
2399
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_12_bw_10_oes_15)2400 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_12_bw_10_oes_15) {
2401 TransposeMicrokernelTester()
2402 .input_stride(10)
2403 .output_stride(12)
2404 .block_width(10)
2405 .block_height(12)
2406 .element_size(4)
2407 .output_element_stride(15)
2408 .iterations(1)
2409 .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2410 }
2411
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_28_bw_46_ies_21_oes_17)2412 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_28_bw_46_ies_21_oes_17) {
2413 TransposeMicrokernelTester()
2414 .input_stride(51)
2415 .output_stride(34)
2416 .block_width(46)
2417 .block_height(28)
2418 .element_size(4)
2419 .input_element_stride(21)
2420 .output_element_stride(17)
2421 .iterations(1)
2422 .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2423 }
2424
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_4_bw_2)2425 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_4_bw_2) {
2426 TransposeMicrokernelTester()
2427 .input_stride(4)
2428 .output_stride(8)
2429 .block_width(2)
2430 .block_height(4)
2431 .element_size(4)
2432 .iterations(1)
2433 .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2434 }
2435
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_1_8_bw_1_4)2436 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_1_8_bw_1_4) {
2437 for(size_t i = 1; i <= 8; ++i){
2438 for(size_t j = 1; j <= 4; ++j){
2439 TransposeMicrokernelTester()
2440 .input_stride(j * 3)
2441 .output_stride(i * 7)
2442 .block_width(j)
2443 .block_height(i)
2444 .element_size(4)
2445 .iterations(1)
2446 .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2447 }
2448 }
2449 }
2450
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_4_bw_4)2451 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_4_bw_4) {
2452 TransposeMicrokernelTester()
2453 .input_stride(4)
2454 .output_stride(4)
2455 .block_width(4)
2456 .block_height(4)
2457 .element_size(4)
2458 .iterations(1)
2459 .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2460 }
2461
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_4_bw_3_4)2462 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_4_bw_3_4) {
2463 for(size_t i = 3; i < 4; ++i){
2464 TransposeMicrokernelTester()
2465 .input_stride(i)
2466 .output_stride(8)
2467 .block_width(i)
2468 .block_height(4)
2469 .element_size(4)
2470 .iterations(1)
2471 .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2472 }
2473 }
2474
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_8_bw_3_4)2475 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_8_bw_3_4) {
2476 for(size_t i = 3; i < 4; ++i){
2477 TransposeMicrokernelTester()
2478 .input_stride(i)
2479 .output_stride(8)
2480 .block_width(i)
2481 .block_height(8)
2482 .element_size(4)
2483 .iterations(1)
2484 .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2485 }
2486 }
2487
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_8_bw_2)2488 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_8_bw_2) {
2489 TransposeMicrokernelTester()
2490 .input_stride(2)
2491 .output_stride(16)
2492 .block_width(2)
2493 .block_height(8)
2494 .element_size(4)
2495 .iterations(1)
2496 .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2497 }
2498
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_5_8_bw_2)2499 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_5_8_bw_2){
2500 for(size_t i = 5; i < 8; ++i){
2501 TransposeMicrokernelTester()
2502 .input_stride(19)
2503 .output_stride(i)
2504 .block_width(5)
2505 .block_height(i)
2506 .element_size(4)
2507 .iterations(1)
2508 .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2509 }
2510 }
2511
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_5_8_bw_4)2512 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_5_8_bw_4){
2513 for(size_t i = 5; i < 8; ++i){
2514 TransposeMicrokernelTester()
2515 .input_stride(4)
2516 .output_stride(i)
2517 .block_width(4)
2518 .block_height(i)
2519 .element_size(4)
2520 .iterations(1)
2521 .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2522 }
2523 }
2524
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_5_8_bw_3_4)2525 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_5_8_bw_3_4) {
2526 for(size_t i = 5; i < 8; ++i){
2527 for(size_t j = 3; j < 4; ++j){
2528 TransposeMicrokernelTester()
2529 .input_stride(j)
2530 .output_stride(i)
2531 .block_width(j)
2532 .block_height(i)
2533 .element_size(4)
2534 .iterations(1)
2535 .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2536 }
2537 }
2538 }
2539
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_4_bw_2_is_4)2540 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_4_bw_2_is_4) {
2541 TransposeMicrokernelTester()
2542 .input_stride(4)
2543 .output_stride(4)
2544 .block_width(2)
2545 .block_height(4)
2546 .element_size(4)
2547 .iterations(1)
2548 .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2549 }
2550
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_4_bw_2_os_8)2551 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_4_bw_2_os_8) {
2552 TransposeMicrokernelTester()
2553 .input_stride(2)
2554 .output_stride(8)
2555 .block_width(2)
2556 .block_height(4)
2557 .element_size(4)
2558 .iterations(1)
2559 .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2560 }
2561
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_4_bw_2_is_4_os_8)2562 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_4_bw_2_is_4_os_8) {
2563 TransposeMicrokernelTester()
2564 .input_stride(4)
2565 .output_stride(8)
2566 .block_width(2)
2567 .block_height(4)
2568 .element_size(4)
2569 .iterations(1)
2570 .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2571 }
2572
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_68_bw_38_ies_15)2573 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_68_bw_38_ies_15) {
2574 TransposeMicrokernelTester()
2575 .input_stride(38)
2576 .output_stride(68)
2577 .block_width(38)
2578 .block_height(68)
2579 .element_size(4)
2580 .input_element_stride(15)
2581 .iterations(1)
2582 .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2583 }
2584
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_12_bw_10_oes_15)2585 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_12_bw_10_oes_15) {
2586 TransposeMicrokernelTester()
2587 .input_stride(10)
2588 .output_stride(12)
2589 .block_width(10)
2590 .block_height(12)
2591 .element_size(4)
2592 .output_element_stride(15)
2593 .iterations(1)
2594 .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2595 }
2596
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_28_bw_46_ies_21_oes_17)2597 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_28_bw_46_ies_21_oes_17) {
2598 TransposeMicrokernelTester()
2599 .input_stride(51)
2600 .output_stride(34)
2601 .block_width(46)
2602 .block_height(28)
2603 .element_size(4)
2604 .input_element_stride(21)
2605 .output_element_stride(17)
2606 .iterations(1)
2607 .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2608 }
2609
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_4_bw_4)2610 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_4_bw_4) {
2611 TransposeMicrokernelTester()
2612 .input_stride(8)
2613 .output_stride(8)
2614 .block_width(4)
2615 .block_height(4)
2616 .element_size(4)
2617 .iterations(1)
2618 .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2619 }
2620
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_1_8_bw_1_8)2621 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_1_8_bw_1_8) {
2622 for(size_t i = 1; i <= 8; ++i){
2623 for(size_t j = 1; j <= 8; ++j){
2624 TransposeMicrokernelTester()
2625 .input_stride(j * 3)
2626 .output_stride(i * 7)
2627 .block_width(j)
2628 .block_height(i)
2629 .element_size(4)
2630 .iterations(1)
2631 .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2632 }
2633 }
2634 }
2635
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_4_bw_8)2636 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_4_bw_8) {
2637 TransposeMicrokernelTester()
2638 .input_stride(8)
2639 .output_stride(4)
2640 .block_width(8)
2641 .block_height(4)
2642 .element_size(4)
2643 .iterations(1)
2644 .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2645 }
2646
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_4_bw_5_8)2647 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_4_bw_5_8) {
2648 for(size_t i = 5; i < 8; ++i){
2649 TransposeMicrokernelTester()
2650 .input_stride(i)
2651 .output_stride(8)
2652 .block_width(i)
2653 .block_height(4)
2654 .element_size(4)
2655 .iterations(1)
2656 .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2657 }
2658 }
2659
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_8_bw_5_8)2660 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_8_bw_5_8) {
2661 for(size_t i = 5; i < 8; ++i){
2662 TransposeMicrokernelTester()
2663 .input_stride(i)
2664 .output_stride(8)
2665 .block_width(i)
2666 .block_height(8)
2667 .element_size(4)
2668 .iterations(1)
2669 .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2670 }
2671 }
2672
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_8_bw_4)2673 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_8_bw_4) {
2674 TransposeMicrokernelTester()
2675 .input_stride(4)
2676 .output_stride(16)
2677 .block_width(4)
2678 .block_height(8)
2679 .element_size(4)
2680 .iterations(1)
2681 .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2682 }
2683
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_5_8_bw_4)2684 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_5_8_bw_4){
2685 for(size_t i = 5; i < 8; ++i){
2686 TransposeMicrokernelTester()
2687 .input_stride(21)
2688 .output_stride(i)
2689 .block_width(7)
2690 .block_height(i)
2691 .element_size(4)
2692 .iterations(1)
2693 .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2694 }
2695 }
2696
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_5_8_bw_8)2697 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_5_8_bw_8){
2698 for(size_t i = 5; i < 8; ++i){
2699 TransposeMicrokernelTester()
2700 .input_stride(8)
2701 .output_stride(i)
2702 .block_width(8)
2703 .block_height(i)
2704 .element_size(4)
2705 .iterations(1)
2706 .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2707 }
2708 }
2709
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_5_8_bw_5_8)2710 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_5_8_bw_5_8) {
2711 for(size_t i = 5; i < 8; ++i){
2712 for(size_t j = 5; j < 8; ++j){
2713 TransposeMicrokernelTester()
2714 .input_stride(j)
2715 .output_stride(i)
2716 .block_width(j)
2717 .block_height(i)
2718 .element_size(4)
2719 .iterations(1)
2720 .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2721 }
2722 }
2723 }
2724
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_4_bw_4_is_8)2725 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_4_bw_4_is_8) {
2726 TransposeMicrokernelTester()
2727 .input_stride(8)
2728 .output_stride(4)
2729 .block_width(4)
2730 .block_height(4)
2731 .element_size(4)
2732 .iterations(1)
2733 .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2734 }
2735
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_4_bw_4_os_8)2736 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_4_bw_4_os_8) {
2737 TransposeMicrokernelTester()
2738 .input_stride(4)
2739 .output_stride(8)
2740 .block_width(4)
2741 .block_height(4)
2742 .element_size(4)
2743 .iterations(1)
2744 .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2745 }
2746
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_4_bw_4_is_8_os_8)2747 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_4_bw_4_is_8_os_8) {
2748 TransposeMicrokernelTester()
2749 .input_stride(8)
2750 .output_stride(8)
2751 .block_width(4)
2752 .block_height(4)
2753 .element_size(4)
2754 .iterations(1)
2755 .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2756 }
2757
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_68_bw_76_ies_15)2758 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_68_bw_76_ies_15) {
2759 TransposeMicrokernelTester()
2760 .input_stride(76)
2761 .output_stride(68)
2762 .block_width(76)
2763 .block_height(68)
2764 .element_size(4)
2765 .input_element_stride(15)
2766 .iterations(1)
2767 .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2768 }
2769
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_12_bw_20_oes_15)2770 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_12_bw_20_oes_15) {
2771 TransposeMicrokernelTester()
2772 .input_stride(20)
2773 .output_stride(12)
2774 .block_width(20)
2775 .block_height(12)
2776 .element_size(4)
2777 .output_element_stride(15)
2778 .iterations(1)
2779 .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2780 }
2781
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_28_bw_92_ies_21_oes_17)2782 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_28_bw_92_ies_21_oes_17) {
2783 TransposeMicrokernelTester()
2784 .input_stride(97)
2785 .output_stride(34)
2786 .block_width(92)
2787 .block_height(28)
2788 .element_size(4)
2789 .input_element_stride(21)
2790 .output_element_stride(17)
2791 .iterations(1)
2792 .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2793 }
2794
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_4_bw_4)2795 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4) {
2796 TransposeMicrokernelTester()
2797 .input_stride(8)
2798 .output_stride(8)
2799 .block_width(4)
2800 .block_height(4)
2801 .element_size(4)
2802 .iterations(1)
2803 .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2804 }
2805
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_1_8_bw_1_8)2806 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_1_8_bw_1_8) {
2807 for(size_t i = 1; i <= 8; ++i){
2808 for(size_t j = 1; j <= 8; ++j){
2809 TransposeMicrokernelTester()
2810 .input_stride(j * 3)
2811 .output_stride(i * 7)
2812 .block_width(j)
2813 .block_height(i)
2814 .element_size(4)
2815 .iterations(1)
2816 .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2817 }
2818 }
2819 }
2820
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_4_bw_8)2821 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_8) {
2822 TransposeMicrokernelTester()
2823 .input_stride(8)
2824 .output_stride(4)
2825 .block_width(8)
2826 .block_height(4)
2827 .element_size(4)
2828 .iterations(1)
2829 .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2830 }
2831
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_4_bw_5_8)2832 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_5_8) {
2833 for(size_t i = 5; i < 8; ++i){
2834 TransposeMicrokernelTester()
2835 .input_stride(i)
2836 .output_stride(8)
2837 .block_width(i)
2838 .block_height(4)
2839 .element_size(4)
2840 .iterations(1)
2841 .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2842 }
2843 }
2844
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_8_bw_5_8)2845 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_8_bw_5_8) {
2846 for(size_t i = 5; i < 8; ++i){
2847 TransposeMicrokernelTester()
2848 .input_stride(i)
2849 .output_stride(8)
2850 .block_width(i)
2851 .block_height(8)
2852 .element_size(4)
2853 .iterations(1)
2854 .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2855 }
2856 }
2857
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_8_bw_4)2858 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_8_bw_4) {
2859 TransposeMicrokernelTester()
2860 .input_stride(4)
2861 .output_stride(16)
2862 .block_width(4)
2863 .block_height(8)
2864 .element_size(4)
2865 .iterations(1)
2866 .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2867 }
2868
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_5_8_bw_4)2869 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_5_8_bw_4){
2870 for(size_t i = 5; i < 8; ++i){
2871 TransposeMicrokernelTester()
2872 .input_stride(21)
2873 .output_stride(i)
2874 .block_width(7)
2875 .block_height(i)
2876 .element_size(4)
2877 .iterations(1)
2878 .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2879 }
2880 }
2881
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_5_8_bw_8)2882 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_5_8_bw_8){
2883 for(size_t i = 5; i < 8; ++i){
2884 TransposeMicrokernelTester()
2885 .input_stride(8)
2886 .output_stride(i)
2887 .block_width(8)
2888 .block_height(i)
2889 .element_size(4)
2890 .iterations(1)
2891 .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2892 }
2893 }
2894
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_5_8_bw_5_8)2895 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_5_8_bw_5_8) {
2896 for(size_t i = 5; i < 8; ++i){
2897 for(size_t j = 5; j < 8; ++j){
2898 TransposeMicrokernelTester()
2899 .input_stride(j)
2900 .output_stride(i)
2901 .block_width(j)
2902 .block_height(i)
2903 .element_size(4)
2904 .iterations(1)
2905 .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2906 }
2907 }
2908 }
2909
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_4_bw_4_is_8)2910 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8) {
2911 TransposeMicrokernelTester()
2912 .input_stride(8)
2913 .output_stride(4)
2914 .block_width(4)
2915 .block_height(4)
2916 .element_size(4)
2917 .iterations(1)
2918 .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2919 }
2920
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_4_bw_4_os_8)2921 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_os_8) {
2922 TransposeMicrokernelTester()
2923 .input_stride(4)
2924 .output_stride(8)
2925 .block_width(4)
2926 .block_height(4)
2927 .element_size(4)
2928 .iterations(1)
2929 .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2930 }
2931
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_4_bw_4_is_8_os_8)2932 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
2933 TransposeMicrokernelTester()
2934 .input_stride(8)
2935 .output_stride(8)
2936 .block_width(4)
2937 .block_height(4)
2938 .element_size(4)
2939 .iterations(1)
2940 .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2941 }
2942
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_68_bw_76_ies_15)2943 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_68_bw_76_ies_15) {
2944 TransposeMicrokernelTester()
2945 .input_stride(76)
2946 .output_stride(68)
2947 .block_width(76)
2948 .block_height(68)
2949 .element_size(4)
2950 .input_element_stride(15)
2951 .iterations(1)
2952 .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2953 }
2954
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_12_bw_20_oes_15)2955 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_12_bw_20_oes_15) {
2956 TransposeMicrokernelTester()
2957 .input_stride(20)
2958 .output_stride(12)
2959 .block_width(20)
2960 .block_height(12)
2961 .element_size(4)
2962 .output_element_stride(15)
2963 .iterations(1)
2964 .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2965 }
2966
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_28_bw_92_ies_21_oes_17)2967 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_28_bw_92_ies_21_oes_17) {
2968 TransposeMicrokernelTester()
2969 .input_stride(97)
2970 .output_stride(34)
2971 .block_width(92)
2972 .block_height(28)
2973 .element_size(4)
2974 .input_element_stride(21)
2975 .output_element_stride(17)
2976 .iterations(1)
2977 .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2978 }
2979
2980 #if XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_4_bw_4)2981 TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_4_bw_4) {
2982 TEST_REQUIRES_ARM_NEON;
2983 TransposeMicrokernelTester()
2984 .input_stride(8)
2985 .output_stride(8)
2986 .block_width(4)
2987 .block_height(4)
2988 .element_size(4)
2989 .iterations(1)
2990 .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
2991 }
2992
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_1_8_bw_1_8)2993 TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_1_8_bw_1_8) {
2994 TEST_REQUIRES_ARM_NEON;
2995 for(size_t i = 1; i <= 8; ++i){
2996 for(size_t j = 1; j <= 8; ++j){
2997 TransposeMicrokernelTester()
2998 .input_stride(j * 3)
2999 .output_stride(i * 7)
3000 .block_width(j)
3001 .block_height(i)
3002 .element_size(4)
3003 .iterations(1)
3004 .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3005 }
3006 }
3007 }
3008
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_4_bw_8)3009 TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_4_bw_8) {
3010 TEST_REQUIRES_ARM_NEON;
3011 TransposeMicrokernelTester()
3012 .input_stride(8)
3013 .output_stride(4)
3014 .block_width(8)
3015 .block_height(4)
3016 .element_size(4)
3017 .iterations(1)
3018 .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3019 }
3020
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_4_bw_5_8)3021 TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_4_bw_5_8) {
3022 TEST_REQUIRES_ARM_NEON;
3023 for(size_t i = 5; i < 8; ++i){
3024 TransposeMicrokernelTester()
3025 .input_stride(i)
3026 .output_stride(8)
3027 .block_width(i)
3028 .block_height(4)
3029 .element_size(4)
3030 .iterations(1)
3031 .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3032 }
3033 }
3034
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_8_bw_5_8)3035 TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_8_bw_5_8) {
3036 TEST_REQUIRES_ARM_NEON;
3037 for(size_t i = 5; i < 8; ++i){
3038 TransposeMicrokernelTester()
3039 .input_stride(i)
3040 .output_stride(8)
3041 .block_width(i)
3042 .block_height(8)
3043 .element_size(4)
3044 .iterations(1)
3045 .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3046 }
3047 }
3048
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_8_bw_4)3049 TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_8_bw_4) {
3050 TEST_REQUIRES_ARM_NEON;
3051 TransposeMicrokernelTester()
3052 .input_stride(4)
3053 .output_stride(16)
3054 .block_width(4)
3055 .block_height(8)
3056 .element_size(4)
3057 .iterations(1)
3058 .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3059 }
3060
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_5_8_bw_4)3061 TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_5_8_bw_4){
3062 TEST_REQUIRES_ARM_NEON;
3063 for(size_t i = 5; i < 8; ++i){
3064 TransposeMicrokernelTester()
3065 .input_stride(21)
3066 .output_stride(i)
3067 .block_width(7)
3068 .block_height(i)
3069 .element_size(4)
3070 .iterations(1)
3071 .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3072 }
3073 }
3074
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_5_8_bw_8)3075 TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_5_8_bw_8){
3076 TEST_REQUIRES_ARM_NEON;
3077 for(size_t i = 5; i < 8; ++i){
3078 TransposeMicrokernelTester()
3079 .input_stride(8)
3080 .output_stride(i)
3081 .block_width(8)
3082 .block_height(i)
3083 .element_size(4)
3084 .iterations(1)
3085 .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3086 }
3087 }
3088
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_5_8_bw_5_8)3089 TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_5_8_bw_5_8) {
3090 TEST_REQUIRES_ARM_NEON;
3091 for(size_t i = 5; i < 8; ++i){
3092 for(size_t j = 5; j < 8; ++j){
3093 TransposeMicrokernelTester()
3094 .input_stride(j)
3095 .output_stride(i)
3096 .block_width(j)
3097 .block_height(i)
3098 .element_size(4)
3099 .iterations(1)
3100 .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3101 }
3102 }
3103 }
3104
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_4_bw_4_is_8)3105 TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_4_bw_4_is_8) {
3106 TEST_REQUIRES_ARM_NEON;
3107 TransposeMicrokernelTester()
3108 .input_stride(8)
3109 .output_stride(4)
3110 .block_width(4)
3111 .block_height(4)
3112 .element_size(4)
3113 .iterations(1)
3114 .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3115 }
3116
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_4_bw_4_os_8)3117 TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_4_bw_4_os_8) {
3118 TEST_REQUIRES_ARM_NEON;
3119 TransposeMicrokernelTester()
3120 .input_stride(4)
3121 .output_stride(8)
3122 .block_width(4)
3123 .block_height(4)
3124 .element_size(4)
3125 .iterations(1)
3126 .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3127 }
3128
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_4_bw_4_is_8_os_8)3129 TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_4_bw_4_is_8_os_8) {
3130 TEST_REQUIRES_ARM_NEON;
3131 TransposeMicrokernelTester()
3132 .input_stride(8)
3133 .output_stride(8)
3134 .block_width(4)
3135 .block_height(4)
3136 .element_size(4)
3137 .iterations(1)
3138 .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3139 }
3140
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_68_bw_76_ies_15)3141 TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_68_bw_76_ies_15) {
3142 TEST_REQUIRES_ARM_NEON;
3143 TransposeMicrokernelTester()
3144 .input_stride(76)
3145 .output_stride(68)
3146 .block_width(76)
3147 .block_height(68)
3148 .element_size(4)
3149 .input_element_stride(15)
3150 .iterations(1)
3151 .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3152 }
3153
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_12_bw_20_oes_15)3154 TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_12_bw_20_oes_15) {
3155 TEST_REQUIRES_ARM_NEON;
3156 TransposeMicrokernelTester()
3157 .input_stride(20)
3158 .output_stride(12)
3159 .block_width(20)
3160 .block_height(12)
3161 .element_size(4)
3162 .output_element_stride(15)
3163 .iterations(1)
3164 .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3165 }
3166
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_28_bw_92_ies_21_oes_17)3167 TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_28_bw_92_ies_21_oes_17) {
3168 TEST_REQUIRES_ARM_NEON;
3169 TransposeMicrokernelTester()
3170 .input_stride(97)
3171 .output_stride(34)
3172 .block_width(92)
3173 .block_height(28)
3174 .element_size(4)
3175 .input_element_stride(21)
3176 .output_element_stride(17)
3177 .iterations(1)
3178 .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3179 }
3180 #endif // XNN_ARCH_ARM64
3181
3182
3183 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_4_bw_4)3184 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_4_bw_4) {
3185 TEST_REQUIRES_X86_SSE2;
3186 TransposeMicrokernelTester()
3187 .input_stride(8)
3188 .output_stride(8)
3189 .block_width(4)
3190 .block_height(4)
3191 .element_size(4)
3192 .iterations(1)
3193 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3194 }
3195
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_1_8_bw_1_8)3196 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_1_8_bw_1_8) {
3197 TEST_REQUIRES_X86_SSE2;
3198 for(size_t i = 1; i <= 8; ++i){
3199 for(size_t j = 1; j <= 8; ++j){
3200 TransposeMicrokernelTester()
3201 .input_stride(j * 3)
3202 .output_stride(i * 7)
3203 .block_width(j)
3204 .block_height(i)
3205 .element_size(4)
3206 .iterations(1)
3207 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3208 }
3209 }
3210 }
3211
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_4_bw_8)3212 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_4_bw_8) {
3213 TEST_REQUIRES_X86_SSE2;
3214 TransposeMicrokernelTester()
3215 .input_stride(8)
3216 .output_stride(4)
3217 .block_width(8)
3218 .block_height(4)
3219 .element_size(4)
3220 .iterations(1)
3221 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3222 }
3223
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_4_bw_5_8)3224 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_4_bw_5_8) {
3225 TEST_REQUIRES_X86_SSE2;
3226 for(size_t i = 5; i < 8; ++i){
3227 TransposeMicrokernelTester()
3228 .input_stride(i)
3229 .output_stride(8)
3230 .block_width(i)
3231 .block_height(4)
3232 .element_size(4)
3233 .iterations(1)
3234 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3235 }
3236 }
3237
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_8_bw_5_8)3238 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_8_bw_5_8) {
3239 TEST_REQUIRES_X86_SSE2;
3240 for(size_t i = 5; i < 8; ++i){
3241 TransposeMicrokernelTester()
3242 .input_stride(i)
3243 .output_stride(8)
3244 .block_width(i)
3245 .block_height(8)
3246 .element_size(4)
3247 .iterations(1)
3248 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3249 }
3250 }
3251
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_8_bw_4)3252 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_8_bw_4) {
3253 TEST_REQUIRES_X86_SSE2;
3254 TransposeMicrokernelTester()
3255 .input_stride(4)
3256 .output_stride(16)
3257 .block_width(4)
3258 .block_height(8)
3259 .element_size(4)
3260 .iterations(1)
3261 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3262 }
3263
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_5_8_bw_4)3264 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_5_8_bw_4){
3265 TEST_REQUIRES_X86_SSE2;
3266 for(size_t i = 5; i < 8; ++i){
3267 TransposeMicrokernelTester()
3268 .input_stride(21)
3269 .output_stride(i)
3270 .block_width(7)
3271 .block_height(i)
3272 .element_size(4)
3273 .iterations(1)
3274 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3275 }
3276 }
3277
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_5_8_bw_8)3278 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_5_8_bw_8){
3279 TEST_REQUIRES_X86_SSE2;
3280 for(size_t i = 5; i < 8; ++i){
3281 TransposeMicrokernelTester()
3282 .input_stride(8)
3283 .output_stride(i)
3284 .block_width(8)
3285 .block_height(i)
3286 .element_size(4)
3287 .iterations(1)
3288 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3289 }
3290 }
3291
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_5_8_bw_5_8)3292 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_5_8_bw_5_8) {
3293 TEST_REQUIRES_X86_SSE2;
3294 for(size_t i = 5; i < 8; ++i){
3295 for(size_t j = 5; j < 8; ++j){
3296 TransposeMicrokernelTester()
3297 .input_stride(j)
3298 .output_stride(i)
3299 .block_width(j)
3300 .block_height(i)
3301 .element_size(4)
3302 .iterations(1)
3303 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3304 }
3305 }
3306 }
3307
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_4_bw_4_is_8)3308 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_4_bw_4_is_8) {
3309 TEST_REQUIRES_X86_SSE2;
3310 TransposeMicrokernelTester()
3311 .input_stride(8)
3312 .output_stride(4)
3313 .block_width(4)
3314 .block_height(4)
3315 .element_size(4)
3316 .iterations(1)
3317 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3318 }
3319
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_4_bw_4_os_8)3320 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_4_bw_4_os_8) {
3321 TEST_REQUIRES_X86_SSE2;
3322 TransposeMicrokernelTester()
3323 .input_stride(4)
3324 .output_stride(8)
3325 .block_width(4)
3326 .block_height(4)
3327 .element_size(4)
3328 .iterations(1)
3329 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3330 }
3331
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_4_bw_4_is_8_os_8)3332 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_4_bw_4_is_8_os_8) {
3333 TEST_REQUIRES_X86_SSE2;
3334 TransposeMicrokernelTester()
3335 .input_stride(8)
3336 .output_stride(8)
3337 .block_width(4)
3338 .block_height(4)
3339 .element_size(4)
3340 .iterations(1)
3341 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3342 }
3343
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_68_bw_76_ies_15)3344 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_68_bw_76_ies_15) {
3345 TEST_REQUIRES_X86_SSE2;
3346 TransposeMicrokernelTester()
3347 .input_stride(76)
3348 .output_stride(68)
3349 .block_width(76)
3350 .block_height(68)
3351 .element_size(4)
3352 .input_element_stride(15)
3353 .iterations(1)
3354 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3355 }
3356
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_12_bw_20_oes_15)3357 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_12_bw_20_oes_15) {
3358 TEST_REQUIRES_X86_SSE2;
3359 TransposeMicrokernelTester()
3360 .input_stride(20)
3361 .output_stride(12)
3362 .block_width(20)
3363 .block_height(12)
3364 .element_size(4)
3365 .output_element_stride(15)
3366 .iterations(1)
3367 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3368 }
3369
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_28_bw_92_ies_21_oes_17)3370 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_28_bw_92_ies_21_oes_17) {
3371 TEST_REQUIRES_X86_SSE2;
3372 TransposeMicrokernelTester()
3373 .input_stride(97)
3374 .output_stride(34)
3375 .block_width(92)
3376 .block_height(28)
3377 .element_size(4)
3378 .input_element_stride(21)
3379 .output_element_stride(17)
3380 .iterations(1)
3381 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3382 }
3383 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3384
3385
3386 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_4_bw_4)3387 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_4_bw_4) {
3388 TEST_REQUIRES_X86_SSE2;
3389 TransposeMicrokernelTester()
3390 .input_stride(8)
3391 .output_stride(8)
3392 .block_width(4)
3393 .block_height(4)
3394 .element_size(4)
3395 .iterations(1)
3396 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3397 }
3398
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_1_8_bw_1_8)3399 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_1_8_bw_1_8) {
3400 TEST_REQUIRES_X86_SSE2;
3401 for(size_t i = 1; i <= 8; ++i){
3402 for(size_t j = 1; j <= 8; ++j){
3403 TransposeMicrokernelTester()
3404 .input_stride(j * 3)
3405 .output_stride(i * 7)
3406 .block_width(j)
3407 .block_height(i)
3408 .element_size(4)
3409 .iterations(1)
3410 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3411 }
3412 }
3413 }
3414
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_4_bw_8)3415 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_4_bw_8) {
3416 TEST_REQUIRES_X86_SSE2;
3417 TransposeMicrokernelTester()
3418 .input_stride(8)
3419 .output_stride(4)
3420 .block_width(8)
3421 .block_height(4)
3422 .element_size(4)
3423 .iterations(1)
3424 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3425 }
3426
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_4_bw_5_8)3427 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_4_bw_5_8) {
3428 TEST_REQUIRES_X86_SSE2;
3429 for(size_t i = 5; i < 8; ++i){
3430 TransposeMicrokernelTester()
3431 .input_stride(i)
3432 .output_stride(8)
3433 .block_width(i)
3434 .block_height(4)
3435 .element_size(4)
3436 .iterations(1)
3437 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3438 }
3439 }
3440
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_8_bw_5_8)3441 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_8_bw_5_8) {
3442 TEST_REQUIRES_X86_SSE2;
3443 for(size_t i = 5; i < 8; ++i){
3444 TransposeMicrokernelTester()
3445 .input_stride(i)
3446 .output_stride(8)
3447 .block_width(i)
3448 .block_height(8)
3449 .element_size(4)
3450 .iterations(1)
3451 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3452 }
3453 }
3454
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_8_bw_4)3455 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_8_bw_4) {
3456 TEST_REQUIRES_X86_SSE2;
3457 TransposeMicrokernelTester()
3458 .input_stride(4)
3459 .output_stride(16)
3460 .block_width(4)
3461 .block_height(8)
3462 .element_size(4)
3463 .iterations(1)
3464 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3465 }
3466
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_5_8_bw_4)3467 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_5_8_bw_4){
3468 TEST_REQUIRES_X86_SSE2;
3469 for(size_t i = 5; i < 8; ++i){
3470 TransposeMicrokernelTester()
3471 .input_stride(21)
3472 .output_stride(i)
3473 .block_width(7)
3474 .block_height(i)
3475 .element_size(4)
3476 .iterations(1)
3477 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3478 }
3479 }
3480
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_5_8_bw_8)3481 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_5_8_bw_8){
3482 TEST_REQUIRES_X86_SSE2;
3483 for(size_t i = 5; i < 8; ++i){
3484 TransposeMicrokernelTester()
3485 .input_stride(8)
3486 .output_stride(i)
3487 .block_width(8)
3488 .block_height(i)
3489 .element_size(4)
3490 .iterations(1)
3491 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3492 }
3493 }
3494
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_5_8_bw_5_8)3495 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_5_8_bw_5_8) {
3496 TEST_REQUIRES_X86_SSE2;
3497 for(size_t i = 5; i < 8; ++i){
3498 for(size_t j = 5; j < 8; ++j){
3499 TransposeMicrokernelTester()
3500 .input_stride(j)
3501 .output_stride(i)
3502 .block_width(j)
3503 .block_height(i)
3504 .element_size(4)
3505 .iterations(1)
3506 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3507 }
3508 }
3509 }
3510
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_4_bw_4_is_8)3511 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_4_bw_4_is_8) {
3512 TEST_REQUIRES_X86_SSE2;
3513 TransposeMicrokernelTester()
3514 .input_stride(8)
3515 .output_stride(4)
3516 .block_width(4)
3517 .block_height(4)
3518 .element_size(4)
3519 .iterations(1)
3520 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3521 }
3522
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_4_bw_4_os_8)3523 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_4_bw_4_os_8) {
3524 TEST_REQUIRES_X86_SSE2;
3525 TransposeMicrokernelTester()
3526 .input_stride(4)
3527 .output_stride(8)
3528 .block_width(4)
3529 .block_height(4)
3530 .element_size(4)
3531 .iterations(1)
3532 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3533 }
3534
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_4_bw_4_is_8_os_8)3535 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_4_bw_4_is_8_os_8) {
3536 TEST_REQUIRES_X86_SSE2;
3537 TransposeMicrokernelTester()
3538 .input_stride(8)
3539 .output_stride(8)
3540 .block_width(4)
3541 .block_height(4)
3542 .element_size(4)
3543 .iterations(1)
3544 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3545 }
3546
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_68_bw_76_ies_15)3547 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_68_bw_76_ies_15) {
3548 TEST_REQUIRES_X86_SSE2;
3549 TransposeMicrokernelTester()
3550 .input_stride(76)
3551 .output_stride(68)
3552 .block_width(76)
3553 .block_height(68)
3554 .element_size(4)
3555 .input_element_stride(15)
3556 .iterations(1)
3557 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3558 }
3559
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_12_bw_20_oes_15)3560 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_12_bw_20_oes_15) {
3561 TEST_REQUIRES_X86_SSE2;
3562 TransposeMicrokernelTester()
3563 .input_stride(20)
3564 .output_stride(12)
3565 .block_width(20)
3566 .block_height(12)
3567 .element_size(4)
3568 .output_element_stride(15)
3569 .iterations(1)
3570 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3571 }
3572
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_28_bw_92_ies_21_oes_17)3573 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_28_bw_92_ies_21_oes_17) {
3574 TEST_REQUIRES_X86_SSE2;
3575 TransposeMicrokernelTester()
3576 .input_stride(97)
3577 .output_stride(34)
3578 .block_width(92)
3579 .block_height(28)
3580 .element_size(4)
3581 .input_element_stride(21)
3582 .output_element_stride(17)
3583 .iterations(1)
3584 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3585 }
3586 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3587
3588
3589 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_4_bw_4)3590 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_4_bw_4) {
3591 TEST_REQUIRES_X86_SSE2;
3592 TransposeMicrokernelTester()
3593 .input_stride(8)
3594 .output_stride(8)
3595 .block_width(4)
3596 .block_height(4)
3597 .element_size(4)
3598 .iterations(1)
3599 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3600 }
3601
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_1_8_bw_1_8)3602 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_1_8_bw_1_8) {
3603 TEST_REQUIRES_X86_SSE2;
3604 for(size_t i = 1; i <= 8; ++i){
3605 for(size_t j = 1; j <= 8; ++j){
3606 TransposeMicrokernelTester()
3607 .input_stride(j * 3)
3608 .output_stride(i * 7)
3609 .block_width(j)
3610 .block_height(i)
3611 .element_size(4)
3612 .iterations(1)
3613 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3614 }
3615 }
3616 }
3617
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_4_bw_8)3618 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_4_bw_8) {
3619 TEST_REQUIRES_X86_SSE2;
3620 TransposeMicrokernelTester()
3621 .input_stride(8)
3622 .output_stride(4)
3623 .block_width(8)
3624 .block_height(4)
3625 .element_size(4)
3626 .iterations(1)
3627 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3628 }
3629
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_4_bw_5_8)3630 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_4_bw_5_8) {
3631 TEST_REQUIRES_X86_SSE2;
3632 for(size_t i = 5; i < 8; ++i){
3633 TransposeMicrokernelTester()
3634 .input_stride(i)
3635 .output_stride(8)
3636 .block_width(i)
3637 .block_height(4)
3638 .element_size(4)
3639 .iterations(1)
3640 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3641 }
3642 }
3643
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_8_bw_5_8)3644 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_8_bw_5_8) {
3645 TEST_REQUIRES_X86_SSE2;
3646 for(size_t i = 5; i < 8; ++i){
3647 TransposeMicrokernelTester()
3648 .input_stride(i)
3649 .output_stride(8)
3650 .block_width(i)
3651 .block_height(8)
3652 .element_size(4)
3653 .iterations(1)
3654 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3655 }
3656 }
3657
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_8_bw_4)3658 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_8_bw_4) {
3659 TEST_REQUIRES_X86_SSE2;
3660 TransposeMicrokernelTester()
3661 .input_stride(4)
3662 .output_stride(16)
3663 .block_width(4)
3664 .block_height(8)
3665 .element_size(4)
3666 .iterations(1)
3667 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3668 }
3669
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_5_8_bw_4)3670 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_5_8_bw_4){
3671 TEST_REQUIRES_X86_SSE2;
3672 for(size_t i = 5; i < 8; ++i){
3673 TransposeMicrokernelTester()
3674 .input_stride(21)
3675 .output_stride(i)
3676 .block_width(7)
3677 .block_height(i)
3678 .element_size(4)
3679 .iterations(1)
3680 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3681 }
3682 }
3683
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_5_8_bw_8)3684 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_5_8_bw_8){
3685 TEST_REQUIRES_X86_SSE2;
3686 for(size_t i = 5; i < 8; ++i){
3687 TransposeMicrokernelTester()
3688 .input_stride(8)
3689 .output_stride(i)
3690 .block_width(8)
3691 .block_height(i)
3692 .element_size(4)
3693 .iterations(1)
3694 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3695 }
3696 }
3697
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_5_8_bw_5_8)3698 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_5_8_bw_5_8) {
3699 TEST_REQUIRES_X86_SSE2;
3700 for(size_t i = 5; i < 8; ++i){
3701 for(size_t j = 5; j < 8; ++j){
3702 TransposeMicrokernelTester()
3703 .input_stride(j)
3704 .output_stride(i)
3705 .block_width(j)
3706 .block_height(i)
3707 .element_size(4)
3708 .iterations(1)
3709 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3710 }
3711 }
3712 }
3713
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_4_bw_4_is_8)3714 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_4_bw_4_is_8) {
3715 TEST_REQUIRES_X86_SSE2;
3716 TransposeMicrokernelTester()
3717 .input_stride(8)
3718 .output_stride(4)
3719 .block_width(4)
3720 .block_height(4)
3721 .element_size(4)
3722 .iterations(1)
3723 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3724 }
3725
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_4_bw_4_os_8)3726 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_4_bw_4_os_8) {
3727 TEST_REQUIRES_X86_SSE2;
3728 TransposeMicrokernelTester()
3729 .input_stride(4)
3730 .output_stride(8)
3731 .block_width(4)
3732 .block_height(4)
3733 .element_size(4)
3734 .iterations(1)
3735 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3736 }
3737
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_4_bw_4_is_8_os_8)3738 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_4_bw_4_is_8_os_8) {
3739 TEST_REQUIRES_X86_SSE2;
3740 TransposeMicrokernelTester()
3741 .input_stride(8)
3742 .output_stride(8)
3743 .block_width(4)
3744 .block_height(4)
3745 .element_size(4)
3746 .iterations(1)
3747 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3748 }
3749
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_68_bw_76_ies_15)3750 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_68_bw_76_ies_15) {
3751 TEST_REQUIRES_X86_SSE2;
3752 TransposeMicrokernelTester()
3753 .input_stride(76)
3754 .output_stride(68)
3755 .block_width(76)
3756 .block_height(68)
3757 .element_size(4)
3758 .input_element_stride(15)
3759 .iterations(1)
3760 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3761 }
3762
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_12_bw_20_oes_15)3763 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_12_bw_20_oes_15) {
3764 TEST_REQUIRES_X86_SSE2;
3765 TransposeMicrokernelTester()
3766 .input_stride(20)
3767 .output_stride(12)
3768 .block_width(20)
3769 .block_height(12)
3770 .element_size(4)
3771 .output_element_stride(15)
3772 .iterations(1)
3773 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3774 }
3775
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_28_bw_92_ies_21_oes_17)3776 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_28_bw_92_ies_21_oes_17) {
3777 TEST_REQUIRES_X86_SSE2;
3778 TransposeMicrokernelTester()
3779 .input_stride(97)
3780 .output_stride(34)
3781 .block_width(92)
3782 .block_height(28)
3783 .element_size(4)
3784 .input_element_stride(21)
3785 .output_element_stride(17)
3786 .iterations(1)
3787 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3788 }
3789 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3790
3791
3792 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_4_bw_4)3793 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_4_bw_4) {
3794 TEST_REQUIRES_X86_SSE2;
3795 TransposeMicrokernelTester()
3796 .input_stride(8)
3797 .output_stride(8)
3798 .block_width(4)
3799 .block_height(4)
3800 .element_size(4)
3801 .iterations(1)
3802 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3803 }
3804
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_1_8_bw_1_8)3805 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_1_8_bw_1_8) {
3806 TEST_REQUIRES_X86_SSE2;
3807 for(size_t i = 1; i <= 8; ++i){
3808 for(size_t j = 1; j <= 8; ++j){
3809 TransposeMicrokernelTester()
3810 .input_stride(j * 3)
3811 .output_stride(i * 7)
3812 .block_width(j)
3813 .block_height(i)
3814 .element_size(4)
3815 .iterations(1)
3816 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3817 }
3818 }
3819 }
3820
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_4_bw_8)3821 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_4_bw_8) {
3822 TEST_REQUIRES_X86_SSE2;
3823 TransposeMicrokernelTester()
3824 .input_stride(8)
3825 .output_stride(4)
3826 .block_width(8)
3827 .block_height(4)
3828 .element_size(4)
3829 .iterations(1)
3830 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3831 }
3832
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_4_bw_5_8)3833 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_4_bw_5_8) {
3834 TEST_REQUIRES_X86_SSE2;
3835 for(size_t i = 5; i < 8; ++i){
3836 TransposeMicrokernelTester()
3837 .input_stride(i)
3838 .output_stride(8)
3839 .block_width(i)
3840 .block_height(4)
3841 .element_size(4)
3842 .iterations(1)
3843 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3844 }
3845 }
3846
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_8_bw_5_8)3847 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_8_bw_5_8) {
3848 TEST_REQUIRES_X86_SSE2;
3849 for(size_t i = 5; i < 8; ++i){
3850 TransposeMicrokernelTester()
3851 .input_stride(i)
3852 .output_stride(8)
3853 .block_width(i)
3854 .block_height(8)
3855 .element_size(4)
3856 .iterations(1)
3857 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3858 }
3859 }
3860
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_8_bw_4)3861 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_8_bw_4) {
3862 TEST_REQUIRES_X86_SSE2;
3863 TransposeMicrokernelTester()
3864 .input_stride(4)
3865 .output_stride(16)
3866 .block_width(4)
3867 .block_height(8)
3868 .element_size(4)
3869 .iterations(1)
3870 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3871 }
3872
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_5_8_bw_4)3873 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_5_8_bw_4){
3874 TEST_REQUIRES_X86_SSE2;
3875 for(size_t i = 5; i < 8; ++i){
3876 TransposeMicrokernelTester()
3877 .input_stride(21)
3878 .output_stride(i)
3879 .block_width(7)
3880 .block_height(i)
3881 .element_size(4)
3882 .iterations(1)
3883 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3884 }
3885 }
3886
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_5_8_bw_8)3887 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_5_8_bw_8){
3888 TEST_REQUIRES_X86_SSE2;
3889 for(size_t i = 5; i < 8; ++i){
3890 TransposeMicrokernelTester()
3891 .input_stride(8)
3892 .output_stride(i)
3893 .block_width(8)
3894 .block_height(i)
3895 .element_size(4)
3896 .iterations(1)
3897 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3898 }
3899 }
3900
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_5_8_bw_5_8)3901 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_5_8_bw_5_8) {
3902 TEST_REQUIRES_X86_SSE2;
3903 for(size_t i = 5; i < 8; ++i){
3904 for(size_t j = 5; j < 8; ++j){
3905 TransposeMicrokernelTester()
3906 .input_stride(j)
3907 .output_stride(i)
3908 .block_width(j)
3909 .block_height(i)
3910 .element_size(4)
3911 .iterations(1)
3912 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3913 }
3914 }
3915 }
3916
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_4_bw_4_is_8)3917 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_4_bw_4_is_8) {
3918 TEST_REQUIRES_X86_SSE2;
3919 TransposeMicrokernelTester()
3920 .input_stride(8)
3921 .output_stride(4)
3922 .block_width(4)
3923 .block_height(4)
3924 .element_size(4)
3925 .iterations(1)
3926 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3927 }
3928
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_4_bw_4_os_8)3929 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_4_bw_4_os_8) {
3930 TEST_REQUIRES_X86_SSE2;
3931 TransposeMicrokernelTester()
3932 .input_stride(4)
3933 .output_stride(8)
3934 .block_width(4)
3935 .block_height(4)
3936 .element_size(4)
3937 .iterations(1)
3938 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3939 }
3940
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_4_bw_4_is_8_os_8)3941 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_4_bw_4_is_8_os_8) {
3942 TEST_REQUIRES_X86_SSE2;
3943 TransposeMicrokernelTester()
3944 .input_stride(8)
3945 .output_stride(8)
3946 .block_width(4)
3947 .block_height(4)
3948 .element_size(4)
3949 .iterations(1)
3950 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3951 }
3952
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_68_bw_76_ies_15)3953 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_68_bw_76_ies_15) {
3954 TEST_REQUIRES_X86_SSE2;
3955 TransposeMicrokernelTester()
3956 .input_stride(76)
3957 .output_stride(68)
3958 .block_width(76)
3959 .block_height(68)
3960 .element_size(4)
3961 .input_element_stride(15)
3962 .iterations(1)
3963 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3964 }
3965
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_12_bw_20_oes_15)3966 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_12_bw_20_oes_15) {
3967 TEST_REQUIRES_X86_SSE2;
3968 TransposeMicrokernelTester()
3969 .input_stride(20)
3970 .output_stride(12)
3971 .block_width(20)
3972 .block_height(12)
3973 .element_size(4)
3974 .output_element_stride(15)
3975 .iterations(1)
3976 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3977 }
3978
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_28_bw_92_ies_21_oes_17)3979 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_28_bw_92_ies_21_oes_17) {
3980 TEST_REQUIRES_X86_SSE2;
3981 TransposeMicrokernelTester()
3982 .input_stride(97)
3983 .output_stride(34)
3984 .block_width(92)
3985 .block_height(28)
3986 .element_size(4)
3987 .input_element_stride(21)
3988 .output_element_stride(17)
3989 .iterations(1)
3990 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3991 }
3992 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3993
3994
3995 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_4_bw_4)3996 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_4_bw_4) {
3997 TEST_REQUIRES_X86_SSE2;
3998 TransposeMicrokernelTester()
3999 .input_stride(8)
4000 .output_stride(8)
4001 .block_width(4)
4002 .block_height(4)
4003 .element_size(4)
4004 .iterations(1)
4005 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4006 }
4007
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_1_8_bw_1_8)4008 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_1_8_bw_1_8) {
4009 TEST_REQUIRES_X86_SSE2;
4010 for(size_t i = 1; i <= 8; ++i){
4011 for(size_t j = 1; j <= 8; ++j){
4012 TransposeMicrokernelTester()
4013 .input_stride(j * 3)
4014 .output_stride(i * 7)
4015 .block_width(j)
4016 .block_height(i)
4017 .element_size(4)
4018 .iterations(1)
4019 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4020 }
4021 }
4022 }
4023
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_4_bw_8)4024 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_4_bw_8) {
4025 TEST_REQUIRES_X86_SSE2;
4026 TransposeMicrokernelTester()
4027 .input_stride(8)
4028 .output_stride(4)
4029 .block_width(8)
4030 .block_height(4)
4031 .element_size(4)
4032 .iterations(1)
4033 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4034 }
4035
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_4_bw_5_8)4036 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_4_bw_5_8) {
4037 TEST_REQUIRES_X86_SSE2;
4038 for(size_t i = 5; i < 8; ++i){
4039 TransposeMicrokernelTester()
4040 .input_stride(i)
4041 .output_stride(8)
4042 .block_width(i)
4043 .block_height(4)
4044 .element_size(4)
4045 .iterations(1)
4046 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4047 }
4048 }
4049
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_8_bw_5_8)4050 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_8_bw_5_8) {
4051 TEST_REQUIRES_X86_SSE2;
4052 for(size_t i = 5; i < 8; ++i){
4053 TransposeMicrokernelTester()
4054 .input_stride(i)
4055 .output_stride(8)
4056 .block_width(i)
4057 .block_height(8)
4058 .element_size(4)
4059 .iterations(1)
4060 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4061 }
4062 }
4063
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_8_bw_4)4064 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_8_bw_4) {
4065 TEST_REQUIRES_X86_SSE2;
4066 TransposeMicrokernelTester()
4067 .input_stride(4)
4068 .output_stride(16)
4069 .block_width(4)
4070 .block_height(8)
4071 .element_size(4)
4072 .iterations(1)
4073 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4074 }
4075
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_5_8_bw_4)4076 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_5_8_bw_4){
4077 TEST_REQUIRES_X86_SSE2;
4078 for(size_t i = 5; i < 8; ++i){
4079 TransposeMicrokernelTester()
4080 .input_stride(21)
4081 .output_stride(i)
4082 .block_width(7)
4083 .block_height(i)
4084 .element_size(4)
4085 .iterations(1)
4086 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4087 }
4088 }
4089
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_5_8_bw_8)4090 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_5_8_bw_8){
4091 TEST_REQUIRES_X86_SSE2;
4092 for(size_t i = 5; i < 8; ++i){
4093 TransposeMicrokernelTester()
4094 .input_stride(8)
4095 .output_stride(i)
4096 .block_width(8)
4097 .block_height(i)
4098 .element_size(4)
4099 .iterations(1)
4100 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4101 }
4102 }
4103
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_5_8_bw_5_8)4104 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_5_8_bw_5_8) {
4105 TEST_REQUIRES_X86_SSE2;
4106 for(size_t i = 5; i < 8; ++i){
4107 for(size_t j = 5; j < 8; ++j){
4108 TransposeMicrokernelTester()
4109 .input_stride(j)
4110 .output_stride(i)
4111 .block_width(j)
4112 .block_height(i)
4113 .element_size(4)
4114 .iterations(1)
4115 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4116 }
4117 }
4118 }
4119
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_4_bw_4_is_8)4120 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_4_bw_4_is_8) {
4121 TEST_REQUIRES_X86_SSE2;
4122 TransposeMicrokernelTester()
4123 .input_stride(8)
4124 .output_stride(4)
4125 .block_width(4)
4126 .block_height(4)
4127 .element_size(4)
4128 .iterations(1)
4129 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4130 }
4131
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_4_bw_4_os_8)4132 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_4_bw_4_os_8) {
4133 TEST_REQUIRES_X86_SSE2;
4134 TransposeMicrokernelTester()
4135 .input_stride(4)
4136 .output_stride(8)
4137 .block_width(4)
4138 .block_height(4)
4139 .element_size(4)
4140 .iterations(1)
4141 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4142 }
4143
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_4_bw_4_is_8_os_8)4144 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_4_bw_4_is_8_os_8) {
4145 TEST_REQUIRES_X86_SSE2;
4146 TransposeMicrokernelTester()
4147 .input_stride(8)
4148 .output_stride(8)
4149 .block_width(4)
4150 .block_height(4)
4151 .element_size(4)
4152 .iterations(1)
4153 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4154 }
4155
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_68_bw_76_ies_15)4156 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_68_bw_76_ies_15) {
4157 TEST_REQUIRES_X86_SSE2;
4158 TransposeMicrokernelTester()
4159 .input_stride(76)
4160 .output_stride(68)
4161 .block_width(76)
4162 .block_height(68)
4163 .element_size(4)
4164 .input_element_stride(15)
4165 .iterations(1)
4166 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4167 }
4168
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_12_bw_20_oes_15)4169 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_12_bw_20_oes_15) {
4170 TEST_REQUIRES_X86_SSE2;
4171 TransposeMicrokernelTester()
4172 .input_stride(20)
4173 .output_stride(12)
4174 .block_width(20)
4175 .block_height(12)
4176 .element_size(4)
4177 .output_element_stride(15)
4178 .iterations(1)
4179 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4180 }
4181
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_28_bw_92_ies_21_oes_17)4182 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_28_bw_92_ies_21_oes_17) {
4183 TEST_REQUIRES_X86_SSE2;
4184 TransposeMicrokernelTester()
4185 .input_stride(97)
4186 .output_stride(34)
4187 .block_width(92)
4188 .block_height(28)
4189 .element_size(4)
4190 .input_element_stride(21)
4191 .output_element_stride(17)
4192 .iterations(1)
4193 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4194 }
4195 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4196
4197
4198 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_4_bw_4)4199 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_4_bw_4) {
4200 TEST_REQUIRES_X86_SSE2;
4201 TransposeMicrokernelTester()
4202 .input_stride(8)
4203 .output_stride(8)
4204 .block_width(4)
4205 .block_height(4)
4206 .element_size(4)
4207 .iterations(1)
4208 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4209 }
4210
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_1_8_bw_1_8)4211 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_1_8_bw_1_8) {
4212 TEST_REQUIRES_X86_SSE2;
4213 for(size_t i = 1; i <= 8; ++i){
4214 for(size_t j = 1; j <= 8; ++j){
4215 TransposeMicrokernelTester()
4216 .input_stride(j * 3)
4217 .output_stride(i * 7)
4218 .block_width(j)
4219 .block_height(i)
4220 .element_size(4)
4221 .iterations(1)
4222 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4223 }
4224 }
4225 }
4226
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_4_bw_8)4227 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_4_bw_8) {
4228 TEST_REQUIRES_X86_SSE2;
4229 TransposeMicrokernelTester()
4230 .input_stride(8)
4231 .output_stride(4)
4232 .block_width(8)
4233 .block_height(4)
4234 .element_size(4)
4235 .iterations(1)
4236 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4237 }
4238
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_4_bw_5_8)4239 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_4_bw_5_8) {
4240 TEST_REQUIRES_X86_SSE2;
4241 for(size_t i = 5; i < 8; ++i){
4242 TransposeMicrokernelTester()
4243 .input_stride(i)
4244 .output_stride(8)
4245 .block_width(i)
4246 .block_height(4)
4247 .element_size(4)
4248 .iterations(1)
4249 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4250 }
4251 }
4252
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_8_bw_5_8)4253 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_8_bw_5_8) {
4254 TEST_REQUIRES_X86_SSE2;
4255 for(size_t i = 5; i < 8; ++i){
4256 TransposeMicrokernelTester()
4257 .input_stride(i)
4258 .output_stride(8)
4259 .block_width(i)
4260 .block_height(8)
4261 .element_size(4)
4262 .iterations(1)
4263 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4264 }
4265 }
4266
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_8_bw_4)4267 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_8_bw_4) {
4268 TEST_REQUIRES_X86_SSE2;
4269 TransposeMicrokernelTester()
4270 .input_stride(4)
4271 .output_stride(16)
4272 .block_width(4)
4273 .block_height(8)
4274 .element_size(4)
4275 .iterations(1)
4276 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4277 }
4278
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_5_8_bw_4)4279 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_5_8_bw_4){
4280 TEST_REQUIRES_X86_SSE2;
4281 for(size_t i = 5; i < 8; ++i){
4282 TransposeMicrokernelTester()
4283 .input_stride(21)
4284 .output_stride(i)
4285 .block_width(7)
4286 .block_height(i)
4287 .element_size(4)
4288 .iterations(1)
4289 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4290 }
4291 }
4292
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_5_8_bw_8)4293 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_5_8_bw_8){
4294 TEST_REQUIRES_X86_SSE2;
4295 for(size_t i = 5; i < 8; ++i){
4296 TransposeMicrokernelTester()
4297 .input_stride(8)
4298 .output_stride(i)
4299 .block_width(8)
4300 .block_height(i)
4301 .element_size(4)
4302 .iterations(1)
4303 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4304 }
4305 }
4306
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_5_8_bw_5_8)4307 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_5_8_bw_5_8) {
4308 TEST_REQUIRES_X86_SSE2;
4309 for(size_t i = 5; i < 8; ++i){
4310 for(size_t j = 5; j < 8; ++j){
4311 TransposeMicrokernelTester()
4312 .input_stride(j)
4313 .output_stride(i)
4314 .block_width(j)
4315 .block_height(i)
4316 .element_size(4)
4317 .iterations(1)
4318 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4319 }
4320 }
4321 }
4322
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_4_bw_4_is_8)4323 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_4_bw_4_is_8) {
4324 TEST_REQUIRES_X86_SSE2;
4325 TransposeMicrokernelTester()
4326 .input_stride(8)
4327 .output_stride(4)
4328 .block_width(4)
4329 .block_height(4)
4330 .element_size(4)
4331 .iterations(1)
4332 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4333 }
4334
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_4_bw_4_os_8)4335 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_4_bw_4_os_8) {
4336 TEST_REQUIRES_X86_SSE2;
4337 TransposeMicrokernelTester()
4338 .input_stride(4)
4339 .output_stride(8)
4340 .block_width(4)
4341 .block_height(4)
4342 .element_size(4)
4343 .iterations(1)
4344 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4345 }
4346
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_4_bw_4_is_8_os_8)4347 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_4_bw_4_is_8_os_8) {
4348 TEST_REQUIRES_X86_SSE2;
4349 TransposeMicrokernelTester()
4350 .input_stride(8)
4351 .output_stride(8)
4352 .block_width(4)
4353 .block_height(4)
4354 .element_size(4)
4355 .iterations(1)
4356 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4357 }
4358
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_68_bw_76_ies_15)4359 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_68_bw_76_ies_15) {
4360 TEST_REQUIRES_X86_SSE2;
4361 TransposeMicrokernelTester()
4362 .input_stride(76)
4363 .output_stride(68)
4364 .block_width(76)
4365 .block_height(68)
4366 .element_size(4)
4367 .input_element_stride(15)
4368 .iterations(1)
4369 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4370 }
4371
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_12_bw_20_oes_15)4372 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_12_bw_20_oes_15) {
4373 TEST_REQUIRES_X86_SSE2;
4374 TransposeMicrokernelTester()
4375 .input_stride(20)
4376 .output_stride(12)
4377 .block_width(20)
4378 .block_height(12)
4379 .element_size(4)
4380 .output_element_stride(15)
4381 .iterations(1)
4382 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4383 }
4384
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_28_bw_92_ies_21_oes_17)4385 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_28_bw_92_ies_21_oes_17) {
4386 TEST_REQUIRES_X86_SSE2;
4387 TransposeMicrokernelTester()
4388 .input_stride(97)
4389 .output_stride(34)
4390 .block_width(92)
4391 .block_height(28)
4392 .element_size(4)
4393 .input_element_stride(21)
4394 .output_element_stride(17)
4395 .iterations(1)
4396 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4397 }
4398 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4399
4400
4401 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_4_bw_4)4402 TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_4_bw_4) {
4403 TEST_REQUIRES_X86_SSE;
4404 TransposeMicrokernelTester()
4405 .input_stride(8)
4406 .output_stride(8)
4407 .block_width(4)
4408 .block_height(4)
4409 .element_size(4)
4410 .iterations(1)
4411 .Test(xnn_x32_transposec_ukernel__4x4_sse);
4412 }
4413
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_1_8_bw_1_8)4414 TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_1_8_bw_1_8) {
4415 TEST_REQUIRES_X86_SSE;
4416 for(size_t i = 1; i <= 8; ++i){
4417 for(size_t j = 1; j <= 8; ++j){
4418 TransposeMicrokernelTester()
4419 .input_stride(j * 3)
4420 .output_stride(i * 7)
4421 .block_width(j)
4422 .block_height(i)
4423 .element_size(4)
4424 .iterations(1)
4425 .Test(xnn_x32_transposec_ukernel__4x4_sse);
4426 }
4427 }
4428 }
4429
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_4_bw_8)4430 TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_4_bw_8) {
4431 TEST_REQUIRES_X86_SSE;
4432 TransposeMicrokernelTester()
4433 .input_stride(8)
4434 .output_stride(4)
4435 .block_width(8)
4436 .block_height(4)
4437 .element_size(4)
4438 .iterations(1)
4439 .Test(xnn_x32_transposec_ukernel__4x4_sse);
4440 }
4441
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_4_bw_5_8)4442 TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_4_bw_5_8) {
4443 TEST_REQUIRES_X86_SSE;
4444 for(size_t i = 5; i < 8; ++i){
4445 TransposeMicrokernelTester()
4446 .input_stride(i)
4447 .output_stride(8)
4448 .block_width(i)
4449 .block_height(4)
4450 .element_size(4)
4451 .iterations(1)
4452 .Test(xnn_x32_transposec_ukernel__4x4_sse);
4453 }
4454 }
4455
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_8_bw_5_8)4456 TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_8_bw_5_8) {
4457 TEST_REQUIRES_X86_SSE;
4458 for(size_t i = 5; i < 8; ++i){
4459 TransposeMicrokernelTester()
4460 .input_stride(i)
4461 .output_stride(8)
4462 .block_width(i)
4463 .block_height(8)
4464 .element_size(4)
4465 .iterations(1)
4466 .Test(xnn_x32_transposec_ukernel__4x4_sse);
4467 }
4468 }
4469
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_8_bw_4)4470 TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_8_bw_4) {
4471 TEST_REQUIRES_X86_SSE;
4472 TransposeMicrokernelTester()
4473 .input_stride(4)
4474 .output_stride(16)
4475 .block_width(4)
4476 .block_height(8)
4477 .element_size(4)
4478 .iterations(1)
4479 .Test(xnn_x32_transposec_ukernel__4x4_sse);
4480 }
4481
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_5_8_bw_4)4482 TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_5_8_bw_4){
4483 TEST_REQUIRES_X86_SSE;
4484 for(size_t i = 5; i < 8; ++i){
4485 TransposeMicrokernelTester()
4486 .input_stride(21)
4487 .output_stride(i)
4488 .block_width(7)
4489 .block_height(i)
4490 .element_size(4)
4491 .iterations(1)
4492 .Test(xnn_x32_transposec_ukernel__4x4_sse);
4493 }
4494 }
4495
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_5_8_bw_8)4496 TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_5_8_bw_8){
4497 TEST_REQUIRES_X86_SSE;
4498 for(size_t i = 5; i < 8; ++i){
4499 TransposeMicrokernelTester()
4500 .input_stride(8)
4501 .output_stride(i)
4502 .block_width(8)
4503 .block_height(i)
4504 .element_size(4)
4505 .iterations(1)
4506 .Test(xnn_x32_transposec_ukernel__4x4_sse);
4507 }
4508 }
4509
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_5_8_bw_5_8)4510 TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_5_8_bw_5_8) {
4511 TEST_REQUIRES_X86_SSE;
4512 for(size_t i = 5; i < 8; ++i){
4513 for(size_t j = 5; j < 8; ++j){
4514 TransposeMicrokernelTester()
4515 .input_stride(j)
4516 .output_stride(i)
4517 .block_width(j)
4518 .block_height(i)
4519 .element_size(4)
4520 .iterations(1)
4521 .Test(xnn_x32_transposec_ukernel__4x4_sse);
4522 }
4523 }
4524 }
4525
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_4_bw_4_is_8)4526 TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_4_bw_4_is_8) {
4527 TEST_REQUIRES_X86_SSE;
4528 TransposeMicrokernelTester()
4529 .input_stride(8)
4530 .output_stride(4)
4531 .block_width(4)
4532 .block_height(4)
4533 .element_size(4)
4534 .iterations(1)
4535 .Test(xnn_x32_transposec_ukernel__4x4_sse);
4536 }
4537
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_4_bw_4_os_8)4538 TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_4_bw_4_os_8) {
4539 TEST_REQUIRES_X86_SSE;
4540 TransposeMicrokernelTester()
4541 .input_stride(4)
4542 .output_stride(8)
4543 .block_width(4)
4544 .block_height(4)
4545 .element_size(4)
4546 .iterations(1)
4547 .Test(xnn_x32_transposec_ukernel__4x4_sse);
4548 }
4549
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_4_bw_4_is_8_os_8)4550 TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_4_bw_4_is_8_os_8) {
4551 TEST_REQUIRES_X86_SSE;
4552 TransposeMicrokernelTester()
4553 .input_stride(8)
4554 .output_stride(8)
4555 .block_width(4)
4556 .block_height(4)
4557 .element_size(4)
4558 .iterations(1)
4559 .Test(xnn_x32_transposec_ukernel__4x4_sse);
4560 }
4561
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_68_bw_76_ies_15)4562 TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_68_bw_76_ies_15) {
4563 TEST_REQUIRES_X86_SSE;
4564 TransposeMicrokernelTester()
4565 .input_stride(76)
4566 .output_stride(68)
4567 .block_width(76)
4568 .block_height(68)
4569 .element_size(4)
4570 .input_element_stride(15)
4571 .iterations(1)
4572 .Test(xnn_x32_transposec_ukernel__4x4_sse);
4573 }
4574
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_12_bw_20_oes_15)4575 TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_12_bw_20_oes_15) {
4576 TEST_REQUIRES_X86_SSE;
4577 TransposeMicrokernelTester()
4578 .input_stride(20)
4579 .output_stride(12)
4580 .block_width(20)
4581 .block_height(12)
4582 .element_size(4)
4583 .output_element_stride(15)
4584 .iterations(1)
4585 .Test(xnn_x32_transposec_ukernel__4x4_sse);
4586 }
4587
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_28_bw_92_ies_21_oes_17)4588 TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_28_bw_92_ies_21_oes_17) {
4589 TEST_REQUIRES_X86_SSE;
4590 TransposeMicrokernelTester()
4591 .input_stride(97)
4592 .output_stride(34)
4593 .block_width(92)
4594 .block_height(28)
4595 .element_size(4)
4596 .input_element_stride(21)
4597 .output_element_stride(17)
4598 .iterations(1)
4599 .Test(xnn_x32_transposec_ukernel__4x4_sse);
4600 }
4601 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4602
4603
4604 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_4_bw_4)4605 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_4_bw_4) {
4606 TransposeMicrokernelTester()
4607 .input_stride(8)
4608 .output_stride(8)
4609 .block_width(4)
4610 .block_height(4)
4611 .element_size(4)
4612 .iterations(1)
4613 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4614 }
4615
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_1_8_bw_1_8)4616 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_1_8_bw_1_8) {
4617 for(size_t i = 1; i <= 8; ++i){
4618 for(size_t j = 1; j <= 8; ++j){
4619 TransposeMicrokernelTester()
4620 .input_stride(j * 3)
4621 .output_stride(i * 7)
4622 .block_width(j)
4623 .block_height(i)
4624 .element_size(4)
4625 .iterations(1)
4626 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4627 }
4628 }
4629 }
4630
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_4_bw_8)4631 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_4_bw_8) {
4632 TransposeMicrokernelTester()
4633 .input_stride(8)
4634 .output_stride(4)
4635 .block_width(8)
4636 .block_height(4)
4637 .element_size(4)
4638 .iterations(1)
4639 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4640 }
4641
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_4_bw_5_8)4642 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_4_bw_5_8) {
4643 for(size_t i = 5; i < 8; ++i){
4644 TransposeMicrokernelTester()
4645 .input_stride(i)
4646 .output_stride(8)
4647 .block_width(i)
4648 .block_height(4)
4649 .element_size(4)
4650 .iterations(1)
4651 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4652 }
4653 }
4654
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_8_bw_5_8)4655 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_8_bw_5_8) {
4656 for(size_t i = 5; i < 8; ++i){
4657 TransposeMicrokernelTester()
4658 .input_stride(i)
4659 .output_stride(8)
4660 .block_width(i)
4661 .block_height(8)
4662 .element_size(4)
4663 .iterations(1)
4664 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4665 }
4666 }
4667
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_8_bw_4)4668 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_8_bw_4) {
4669 TransposeMicrokernelTester()
4670 .input_stride(4)
4671 .output_stride(16)
4672 .block_width(4)
4673 .block_height(8)
4674 .element_size(4)
4675 .iterations(1)
4676 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4677 }
4678
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_5_8_bw_4)4679 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_5_8_bw_4){
4680 for(size_t i = 5; i < 8; ++i){
4681 TransposeMicrokernelTester()
4682 .input_stride(21)
4683 .output_stride(i)
4684 .block_width(7)
4685 .block_height(i)
4686 .element_size(4)
4687 .iterations(1)
4688 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4689 }
4690 }
4691
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_5_8_bw_8)4692 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_5_8_bw_8){
4693 for(size_t i = 5; i < 8; ++i){
4694 TransposeMicrokernelTester()
4695 .input_stride(8)
4696 .output_stride(i)
4697 .block_width(8)
4698 .block_height(i)
4699 .element_size(4)
4700 .iterations(1)
4701 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4702 }
4703 }
4704
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_5_8_bw_5_8)4705 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_5_8_bw_5_8) {
4706 for(size_t i = 5; i < 8; ++i){
4707 for(size_t j = 5; j < 8; ++j){
4708 TransposeMicrokernelTester()
4709 .input_stride(j)
4710 .output_stride(i)
4711 .block_width(j)
4712 .block_height(i)
4713 .element_size(4)
4714 .iterations(1)
4715 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4716 }
4717 }
4718 }
4719
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_4_bw_4_is_8)4720 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_4_bw_4_is_8) {
4721 TransposeMicrokernelTester()
4722 .input_stride(8)
4723 .output_stride(4)
4724 .block_width(4)
4725 .block_height(4)
4726 .element_size(4)
4727 .iterations(1)
4728 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4729 }
4730
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_4_bw_4_os_8)4731 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_4_bw_4_os_8) {
4732 TransposeMicrokernelTester()
4733 .input_stride(4)
4734 .output_stride(8)
4735 .block_width(4)
4736 .block_height(4)
4737 .element_size(4)
4738 .iterations(1)
4739 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4740 }
4741
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_4_bw_4_is_8_os_8)4742 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_4_bw_4_is_8_os_8) {
4743 TransposeMicrokernelTester()
4744 .input_stride(8)
4745 .output_stride(8)
4746 .block_width(4)
4747 .block_height(4)
4748 .element_size(4)
4749 .iterations(1)
4750 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4751 }
4752
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_68_bw_76_ies_15)4753 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_68_bw_76_ies_15) {
4754 TransposeMicrokernelTester()
4755 .input_stride(76)
4756 .output_stride(68)
4757 .block_width(76)
4758 .block_height(68)
4759 .element_size(4)
4760 .input_element_stride(15)
4761 .iterations(1)
4762 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4763 }
4764
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_12_bw_20_oes_15)4765 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_12_bw_20_oes_15) {
4766 TransposeMicrokernelTester()
4767 .input_stride(20)
4768 .output_stride(12)
4769 .block_width(20)
4770 .block_height(12)
4771 .element_size(4)
4772 .output_element_stride(15)
4773 .iterations(1)
4774 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4775 }
4776
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_28_bw_92_ies_21_oes_17)4777 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_28_bw_92_ies_21_oes_17) {
4778 TransposeMicrokernelTester()
4779 .input_stride(97)
4780 .output_stride(34)
4781 .block_width(92)
4782 .block_height(28)
4783 .element_size(4)
4784 .input_element_stride(21)
4785 .output_element_stride(17)
4786 .iterations(1)
4787 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4788 }
4789 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
4790
4791
4792 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_4_bw_4)4793 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_4_bw_4) {
4794 TransposeMicrokernelTester()
4795 .input_stride(8)
4796 .output_stride(8)
4797 .block_width(4)
4798 .block_height(4)
4799 .element_size(4)
4800 .iterations(1)
4801 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4802 }
4803
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_1_8_bw_1_8)4804 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_1_8_bw_1_8) {
4805 for(size_t i = 1; i <= 8; ++i){
4806 for(size_t j = 1; j <= 8; ++j){
4807 TransposeMicrokernelTester()
4808 .input_stride(j * 3)
4809 .output_stride(i * 7)
4810 .block_width(j)
4811 .block_height(i)
4812 .element_size(4)
4813 .iterations(1)
4814 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4815 }
4816 }
4817 }
4818
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_4_bw_8)4819 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_4_bw_8) {
4820 TransposeMicrokernelTester()
4821 .input_stride(8)
4822 .output_stride(4)
4823 .block_width(8)
4824 .block_height(4)
4825 .element_size(4)
4826 .iterations(1)
4827 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4828 }
4829
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_4_bw_5_8)4830 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_4_bw_5_8) {
4831 for(size_t i = 5; i < 8; ++i){
4832 TransposeMicrokernelTester()
4833 .input_stride(i)
4834 .output_stride(8)
4835 .block_width(i)
4836 .block_height(4)
4837 .element_size(4)
4838 .iterations(1)
4839 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4840 }
4841 }
4842
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_8_bw_5_8)4843 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_8_bw_5_8) {
4844 for(size_t i = 5; i < 8; ++i){
4845 TransposeMicrokernelTester()
4846 .input_stride(i)
4847 .output_stride(8)
4848 .block_width(i)
4849 .block_height(8)
4850 .element_size(4)
4851 .iterations(1)
4852 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4853 }
4854 }
4855
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_8_bw_4)4856 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_8_bw_4) {
4857 TransposeMicrokernelTester()
4858 .input_stride(4)
4859 .output_stride(16)
4860 .block_width(4)
4861 .block_height(8)
4862 .element_size(4)
4863 .iterations(1)
4864 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4865 }
4866
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_5_8_bw_4)4867 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_5_8_bw_4){
4868 for(size_t i = 5; i < 8; ++i){
4869 TransposeMicrokernelTester()
4870 .input_stride(21)
4871 .output_stride(i)
4872 .block_width(7)
4873 .block_height(i)
4874 .element_size(4)
4875 .iterations(1)
4876 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4877 }
4878 }
4879
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_5_8_bw_8)4880 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_5_8_bw_8){
4881 for(size_t i = 5; i < 8; ++i){
4882 TransposeMicrokernelTester()
4883 .input_stride(8)
4884 .output_stride(i)
4885 .block_width(8)
4886 .block_height(i)
4887 .element_size(4)
4888 .iterations(1)
4889 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4890 }
4891 }
4892
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_5_8_bw_5_8)4893 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_5_8_bw_5_8) {
4894 for(size_t i = 5; i < 8; ++i){
4895 for(size_t j = 5; j < 8; ++j){
4896 TransposeMicrokernelTester()
4897 .input_stride(j)
4898 .output_stride(i)
4899 .block_width(j)
4900 .block_height(i)
4901 .element_size(4)
4902 .iterations(1)
4903 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4904 }
4905 }
4906 }
4907
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_4_bw_4_is_8)4908 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_4_bw_4_is_8) {
4909 TransposeMicrokernelTester()
4910 .input_stride(8)
4911 .output_stride(4)
4912 .block_width(4)
4913 .block_height(4)
4914 .element_size(4)
4915 .iterations(1)
4916 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4917 }
4918
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_4_bw_4_os_8)4919 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_4_bw_4_os_8) {
4920 TransposeMicrokernelTester()
4921 .input_stride(4)
4922 .output_stride(8)
4923 .block_width(4)
4924 .block_height(4)
4925 .element_size(4)
4926 .iterations(1)
4927 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4928 }
4929
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_4_bw_4_is_8_os_8)4930 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_4_bw_4_is_8_os_8) {
4931 TransposeMicrokernelTester()
4932 .input_stride(8)
4933 .output_stride(8)
4934 .block_width(4)
4935 .block_height(4)
4936 .element_size(4)
4937 .iterations(1)
4938 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4939 }
4940
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_68_bw_76_ies_15)4941 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_68_bw_76_ies_15) {
4942 TransposeMicrokernelTester()
4943 .input_stride(76)
4944 .output_stride(68)
4945 .block_width(76)
4946 .block_height(68)
4947 .element_size(4)
4948 .input_element_stride(15)
4949 .iterations(1)
4950 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4951 }
4952
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_12_bw_20_oes_15)4953 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_12_bw_20_oes_15) {
4954 TransposeMicrokernelTester()
4955 .input_stride(20)
4956 .output_stride(12)
4957 .block_width(20)
4958 .block_height(12)
4959 .element_size(4)
4960 .output_element_stride(15)
4961 .iterations(1)
4962 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4963 }
4964
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_28_bw_92_ies_21_oes_17)4965 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_28_bw_92_ies_21_oes_17) {
4966 TransposeMicrokernelTester()
4967 .input_stride(97)
4968 .output_stride(34)
4969 .block_width(92)
4970 .block_height(28)
4971 .element_size(4)
4972 .input_element_stride(21)
4973 .output_element_stride(17)
4974 .iterations(1)
4975 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4976 }
4977 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
4978
4979
4980 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_4_bw_4)4981 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_4_bw_4) {
4982 TransposeMicrokernelTester()
4983 .input_stride(8)
4984 .output_stride(8)
4985 .block_width(4)
4986 .block_height(4)
4987 .element_size(4)
4988 .iterations(1)
4989 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
4990 }
4991
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_1_8_bw_1_8)4992 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_1_8_bw_1_8) {
4993 for(size_t i = 1; i <= 8; ++i){
4994 for(size_t j = 1; j <= 8; ++j){
4995 TransposeMicrokernelTester()
4996 .input_stride(j * 3)
4997 .output_stride(i * 7)
4998 .block_width(j)
4999 .block_height(i)
5000 .element_size(4)
5001 .iterations(1)
5002 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5003 }
5004 }
5005 }
5006
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_4_bw_8)5007 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_4_bw_8) {
5008 TransposeMicrokernelTester()
5009 .input_stride(8)
5010 .output_stride(4)
5011 .block_width(8)
5012 .block_height(4)
5013 .element_size(4)
5014 .iterations(1)
5015 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5016 }
5017
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_4_bw_5_8)5018 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_4_bw_5_8) {
5019 for(size_t i = 5; i < 8; ++i){
5020 TransposeMicrokernelTester()
5021 .input_stride(i)
5022 .output_stride(8)
5023 .block_width(i)
5024 .block_height(4)
5025 .element_size(4)
5026 .iterations(1)
5027 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5028 }
5029 }
5030
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_8_bw_5_8)5031 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_8_bw_5_8) {
5032 for(size_t i = 5; i < 8; ++i){
5033 TransposeMicrokernelTester()
5034 .input_stride(i)
5035 .output_stride(8)
5036 .block_width(i)
5037 .block_height(8)
5038 .element_size(4)
5039 .iterations(1)
5040 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5041 }
5042 }
5043
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_8_bw_4)5044 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_8_bw_4) {
5045 TransposeMicrokernelTester()
5046 .input_stride(4)
5047 .output_stride(16)
5048 .block_width(4)
5049 .block_height(8)
5050 .element_size(4)
5051 .iterations(1)
5052 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5053 }
5054
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_5_8_bw_4)5055 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_5_8_bw_4){
5056 for(size_t i = 5; i < 8; ++i){
5057 TransposeMicrokernelTester()
5058 .input_stride(21)
5059 .output_stride(i)
5060 .block_width(7)
5061 .block_height(i)
5062 .element_size(4)
5063 .iterations(1)
5064 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5065 }
5066 }
5067
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_5_8_bw_8)5068 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_5_8_bw_8){
5069 for(size_t i = 5; i < 8; ++i){
5070 TransposeMicrokernelTester()
5071 .input_stride(8)
5072 .output_stride(i)
5073 .block_width(8)
5074 .block_height(i)
5075 .element_size(4)
5076 .iterations(1)
5077 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5078 }
5079 }
5080
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_5_8_bw_5_8)5081 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_5_8_bw_5_8) {
5082 for(size_t i = 5; i < 8; ++i){
5083 for(size_t j = 5; j < 8; ++j){
5084 TransposeMicrokernelTester()
5085 .input_stride(j)
5086 .output_stride(i)
5087 .block_width(j)
5088 .block_height(i)
5089 .element_size(4)
5090 .iterations(1)
5091 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5092 }
5093 }
5094 }
5095
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_4_bw_4_is_8)5096 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_4_bw_4_is_8) {
5097 TransposeMicrokernelTester()
5098 .input_stride(8)
5099 .output_stride(4)
5100 .block_width(4)
5101 .block_height(4)
5102 .element_size(4)
5103 .iterations(1)
5104 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5105 }
5106
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_4_bw_4_os_8)5107 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_4_bw_4_os_8) {
5108 TransposeMicrokernelTester()
5109 .input_stride(4)
5110 .output_stride(8)
5111 .block_width(4)
5112 .block_height(4)
5113 .element_size(4)
5114 .iterations(1)
5115 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5116 }
5117
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_4_bw_4_is_8_os_8)5118 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_4_bw_4_is_8_os_8) {
5119 TransposeMicrokernelTester()
5120 .input_stride(8)
5121 .output_stride(8)
5122 .block_width(4)
5123 .block_height(4)
5124 .element_size(4)
5125 .iterations(1)
5126 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5127 }
5128
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_68_bw_76_ies_15)5129 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_68_bw_76_ies_15) {
5130 TransposeMicrokernelTester()
5131 .input_stride(76)
5132 .output_stride(68)
5133 .block_width(76)
5134 .block_height(68)
5135 .element_size(4)
5136 .input_element_stride(15)
5137 .iterations(1)
5138 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5139 }
5140
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_12_bw_20_oes_15)5141 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_12_bw_20_oes_15) {
5142 TransposeMicrokernelTester()
5143 .input_stride(20)
5144 .output_stride(12)
5145 .block_width(20)
5146 .block_height(12)
5147 .element_size(4)
5148 .output_element_stride(15)
5149 .iterations(1)
5150 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5151 }
5152
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_28_bw_92_ies_21_oes_17)5153 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_28_bw_92_ies_21_oes_17) {
5154 TransposeMicrokernelTester()
5155 .input_stride(97)
5156 .output_stride(34)
5157 .block_width(92)
5158 .block_height(28)
5159 .element_size(4)
5160 .input_element_stride(21)
5161 .output_element_stride(17)
5162 .iterations(1)
5163 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5164 }
5165 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5166
5167
5168 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_4_bw_4)5169 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_4_bw_4) {
5170 TransposeMicrokernelTester()
5171 .input_stride(8)
5172 .output_stride(8)
5173 .block_width(4)
5174 .block_height(4)
5175 .element_size(4)
5176 .iterations(1)
5177 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5178 }
5179
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_1_8_bw_1_8)5180 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_1_8_bw_1_8) {
5181 for(size_t i = 1; i <= 8; ++i){
5182 for(size_t j = 1; j <= 8; ++j){
5183 TransposeMicrokernelTester()
5184 .input_stride(j * 3)
5185 .output_stride(i * 7)
5186 .block_width(j)
5187 .block_height(i)
5188 .element_size(4)
5189 .iterations(1)
5190 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5191 }
5192 }
5193 }
5194
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_4_bw_8)5195 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_4_bw_8) {
5196 TransposeMicrokernelTester()
5197 .input_stride(8)
5198 .output_stride(4)
5199 .block_width(8)
5200 .block_height(4)
5201 .element_size(4)
5202 .iterations(1)
5203 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5204 }
5205
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_4_bw_5_8)5206 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_4_bw_5_8) {
5207 for(size_t i = 5; i < 8; ++i){
5208 TransposeMicrokernelTester()
5209 .input_stride(i)
5210 .output_stride(8)
5211 .block_width(i)
5212 .block_height(4)
5213 .element_size(4)
5214 .iterations(1)
5215 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5216 }
5217 }
5218
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_8_bw_5_8)5219 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_8_bw_5_8) {
5220 for(size_t i = 5; i < 8; ++i){
5221 TransposeMicrokernelTester()
5222 .input_stride(i)
5223 .output_stride(8)
5224 .block_width(i)
5225 .block_height(8)
5226 .element_size(4)
5227 .iterations(1)
5228 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5229 }
5230 }
5231
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_8_bw_4)5232 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_8_bw_4) {
5233 TransposeMicrokernelTester()
5234 .input_stride(4)
5235 .output_stride(16)
5236 .block_width(4)
5237 .block_height(8)
5238 .element_size(4)
5239 .iterations(1)
5240 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5241 }
5242
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_5_8_bw_4)5243 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_5_8_bw_4){
5244 for(size_t i = 5; i < 8; ++i){
5245 TransposeMicrokernelTester()
5246 .input_stride(21)
5247 .output_stride(i)
5248 .block_width(7)
5249 .block_height(i)
5250 .element_size(4)
5251 .iterations(1)
5252 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5253 }
5254 }
5255
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_5_8_bw_8)5256 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_5_8_bw_8){
5257 for(size_t i = 5; i < 8; ++i){
5258 TransposeMicrokernelTester()
5259 .input_stride(8)
5260 .output_stride(i)
5261 .block_width(8)
5262 .block_height(i)
5263 .element_size(4)
5264 .iterations(1)
5265 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5266 }
5267 }
5268
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_5_8_bw_5_8)5269 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_5_8_bw_5_8) {
5270 for(size_t i = 5; i < 8; ++i){
5271 for(size_t j = 5; j < 8; ++j){
5272 TransposeMicrokernelTester()
5273 .input_stride(j)
5274 .output_stride(i)
5275 .block_width(j)
5276 .block_height(i)
5277 .element_size(4)
5278 .iterations(1)
5279 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5280 }
5281 }
5282 }
5283
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_4_bw_4_is_8)5284 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_4_bw_4_is_8) {
5285 TransposeMicrokernelTester()
5286 .input_stride(8)
5287 .output_stride(4)
5288 .block_width(4)
5289 .block_height(4)
5290 .element_size(4)
5291 .iterations(1)
5292 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5293 }
5294
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_4_bw_4_os_8)5295 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_4_bw_4_os_8) {
5296 TransposeMicrokernelTester()
5297 .input_stride(4)
5298 .output_stride(8)
5299 .block_width(4)
5300 .block_height(4)
5301 .element_size(4)
5302 .iterations(1)
5303 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5304 }
5305
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_4_bw_4_is_8_os_8)5306 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_4_bw_4_is_8_os_8) {
5307 TransposeMicrokernelTester()
5308 .input_stride(8)
5309 .output_stride(8)
5310 .block_width(4)
5311 .block_height(4)
5312 .element_size(4)
5313 .iterations(1)
5314 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5315 }
5316
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_68_bw_76_ies_15)5317 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_68_bw_76_ies_15) {
5318 TransposeMicrokernelTester()
5319 .input_stride(76)
5320 .output_stride(68)
5321 .block_width(76)
5322 .block_height(68)
5323 .element_size(4)
5324 .input_element_stride(15)
5325 .iterations(1)
5326 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5327 }
5328
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_12_bw_20_oes_15)5329 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_12_bw_20_oes_15) {
5330 TransposeMicrokernelTester()
5331 .input_stride(20)
5332 .output_stride(12)
5333 .block_width(20)
5334 .block_height(12)
5335 .element_size(4)
5336 .output_element_stride(15)
5337 .iterations(1)
5338 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5339 }
5340
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_28_bw_92_ies_21_oes_17)5341 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_28_bw_92_ies_21_oes_17) {
5342 TransposeMicrokernelTester()
5343 .input_stride(97)
5344 .output_stride(34)
5345 .block_width(92)
5346 .block_height(28)
5347 .element_size(4)
5348 .input_element_stride(21)
5349 .output_element_stride(17)
5350 .iterations(1)
5351 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5352 }
5353 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5354
5355
5356 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_4_bw_4)5357 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_4_bw_4) {
5358 TransposeMicrokernelTester()
5359 .input_stride(8)
5360 .output_stride(8)
5361 .block_width(4)
5362 .block_height(4)
5363 .element_size(4)
5364 .iterations(1)
5365 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5366 }
5367
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_1_8_bw_1_8)5368 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_1_8_bw_1_8) {
5369 for(size_t i = 1; i <= 8; ++i){
5370 for(size_t j = 1; j <= 8; ++j){
5371 TransposeMicrokernelTester()
5372 .input_stride(j * 3)
5373 .output_stride(i * 7)
5374 .block_width(j)
5375 .block_height(i)
5376 .element_size(4)
5377 .iterations(1)
5378 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5379 }
5380 }
5381 }
5382
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_4_bw_8)5383 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_4_bw_8) {
5384 TransposeMicrokernelTester()
5385 .input_stride(8)
5386 .output_stride(4)
5387 .block_width(8)
5388 .block_height(4)
5389 .element_size(4)
5390 .iterations(1)
5391 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5392 }
5393
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_4_bw_5_8)5394 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_4_bw_5_8) {
5395 for(size_t i = 5; i < 8; ++i){
5396 TransposeMicrokernelTester()
5397 .input_stride(i)
5398 .output_stride(8)
5399 .block_width(i)
5400 .block_height(4)
5401 .element_size(4)
5402 .iterations(1)
5403 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5404 }
5405 }
5406
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_8_bw_5_8)5407 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_8_bw_5_8) {
5408 for(size_t i = 5; i < 8; ++i){
5409 TransposeMicrokernelTester()
5410 .input_stride(i)
5411 .output_stride(8)
5412 .block_width(i)
5413 .block_height(8)
5414 .element_size(4)
5415 .iterations(1)
5416 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5417 }
5418 }
5419
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_8_bw_4)5420 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_8_bw_4) {
5421 TransposeMicrokernelTester()
5422 .input_stride(4)
5423 .output_stride(16)
5424 .block_width(4)
5425 .block_height(8)
5426 .element_size(4)
5427 .iterations(1)
5428 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5429 }
5430
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_5_8_bw_4)5431 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_5_8_bw_4){
5432 for(size_t i = 5; i < 8; ++i){
5433 TransposeMicrokernelTester()
5434 .input_stride(21)
5435 .output_stride(i)
5436 .block_width(7)
5437 .block_height(i)
5438 .element_size(4)
5439 .iterations(1)
5440 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5441 }
5442 }
5443
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_5_8_bw_8)5444 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_5_8_bw_8){
5445 for(size_t i = 5; i < 8; ++i){
5446 TransposeMicrokernelTester()
5447 .input_stride(8)
5448 .output_stride(i)
5449 .block_width(8)
5450 .block_height(i)
5451 .element_size(4)
5452 .iterations(1)
5453 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5454 }
5455 }
5456
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_5_8_bw_5_8)5457 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_5_8_bw_5_8) {
5458 for(size_t i = 5; i < 8; ++i){
5459 for(size_t j = 5; j < 8; ++j){
5460 TransposeMicrokernelTester()
5461 .input_stride(j)
5462 .output_stride(i)
5463 .block_width(j)
5464 .block_height(i)
5465 .element_size(4)
5466 .iterations(1)
5467 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5468 }
5469 }
5470 }
5471
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_4_bw_4_is_8)5472 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_4_bw_4_is_8) {
5473 TransposeMicrokernelTester()
5474 .input_stride(8)
5475 .output_stride(4)
5476 .block_width(4)
5477 .block_height(4)
5478 .element_size(4)
5479 .iterations(1)
5480 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5481 }
5482
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_4_bw_4_os_8)5483 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_4_bw_4_os_8) {
5484 TransposeMicrokernelTester()
5485 .input_stride(4)
5486 .output_stride(8)
5487 .block_width(4)
5488 .block_height(4)
5489 .element_size(4)
5490 .iterations(1)
5491 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5492 }
5493
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_4_bw_4_is_8_os_8)5494 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_4_bw_4_is_8_os_8) {
5495 TransposeMicrokernelTester()
5496 .input_stride(8)
5497 .output_stride(8)
5498 .block_width(4)
5499 .block_height(4)
5500 .element_size(4)
5501 .iterations(1)
5502 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5503 }
5504
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_68_bw_76_ies_15)5505 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_68_bw_76_ies_15) {
5506 TransposeMicrokernelTester()
5507 .input_stride(76)
5508 .output_stride(68)
5509 .block_width(76)
5510 .block_height(68)
5511 .element_size(4)
5512 .input_element_stride(15)
5513 .iterations(1)
5514 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5515 }
5516
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_12_bw_20_oes_15)5517 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_12_bw_20_oes_15) {
5518 TransposeMicrokernelTester()
5519 .input_stride(20)
5520 .output_stride(12)
5521 .block_width(20)
5522 .block_height(12)
5523 .element_size(4)
5524 .output_element_stride(15)
5525 .iterations(1)
5526 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5527 }
5528
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_28_bw_92_ies_21_oes_17)5529 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_28_bw_92_ies_21_oes_17) {
5530 TransposeMicrokernelTester()
5531 .input_stride(97)
5532 .output_stride(34)
5533 .block_width(92)
5534 .block_height(28)
5535 .element_size(4)
5536 .input_element_stride(21)
5537 .output_element_stride(17)
5538 .iterations(1)
5539 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5540 }
5541 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5542
5543
5544 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_4_bw_4)5545 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_4_bw_4) {
5546 TransposeMicrokernelTester()
5547 .input_stride(8)
5548 .output_stride(8)
5549 .block_width(4)
5550 .block_height(4)
5551 .element_size(4)
5552 .iterations(1)
5553 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5554 }
5555
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_1_8_bw_1_8)5556 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_1_8_bw_1_8) {
5557 for(size_t i = 1; i <= 8; ++i){
5558 for(size_t j = 1; j <= 8; ++j){
5559 TransposeMicrokernelTester()
5560 .input_stride(j * 3)
5561 .output_stride(i * 7)
5562 .block_width(j)
5563 .block_height(i)
5564 .element_size(4)
5565 .iterations(1)
5566 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5567 }
5568 }
5569 }
5570
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_4_bw_8)5571 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_4_bw_8) {
5572 TransposeMicrokernelTester()
5573 .input_stride(8)
5574 .output_stride(4)
5575 .block_width(8)
5576 .block_height(4)
5577 .element_size(4)
5578 .iterations(1)
5579 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5580 }
5581
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_4_bw_5_8)5582 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_4_bw_5_8) {
5583 for(size_t i = 5; i < 8; ++i){
5584 TransposeMicrokernelTester()
5585 .input_stride(i)
5586 .output_stride(8)
5587 .block_width(i)
5588 .block_height(4)
5589 .element_size(4)
5590 .iterations(1)
5591 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5592 }
5593 }
5594
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_8_bw_5_8)5595 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_8_bw_5_8) {
5596 for(size_t i = 5; i < 8; ++i){
5597 TransposeMicrokernelTester()
5598 .input_stride(i)
5599 .output_stride(8)
5600 .block_width(i)
5601 .block_height(8)
5602 .element_size(4)
5603 .iterations(1)
5604 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5605 }
5606 }
5607
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_8_bw_4)5608 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_8_bw_4) {
5609 TransposeMicrokernelTester()
5610 .input_stride(4)
5611 .output_stride(16)
5612 .block_width(4)
5613 .block_height(8)
5614 .element_size(4)
5615 .iterations(1)
5616 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5617 }
5618
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_5_8_bw_4)5619 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_5_8_bw_4){
5620 for(size_t i = 5; i < 8; ++i){
5621 TransposeMicrokernelTester()
5622 .input_stride(21)
5623 .output_stride(i)
5624 .block_width(7)
5625 .block_height(i)
5626 .element_size(4)
5627 .iterations(1)
5628 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5629 }
5630 }
5631
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_5_8_bw_8)5632 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_5_8_bw_8){
5633 for(size_t i = 5; i < 8; ++i){
5634 TransposeMicrokernelTester()
5635 .input_stride(8)
5636 .output_stride(i)
5637 .block_width(8)
5638 .block_height(i)
5639 .element_size(4)
5640 .iterations(1)
5641 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5642 }
5643 }
5644
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_5_8_bw_5_8)5645 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_5_8_bw_5_8) {
5646 for(size_t i = 5; i < 8; ++i){
5647 for(size_t j = 5; j < 8; ++j){
5648 TransposeMicrokernelTester()
5649 .input_stride(j)
5650 .output_stride(i)
5651 .block_width(j)
5652 .block_height(i)
5653 .element_size(4)
5654 .iterations(1)
5655 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5656 }
5657 }
5658 }
5659
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_4_bw_4_is_8)5660 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_4_bw_4_is_8) {
5661 TransposeMicrokernelTester()
5662 .input_stride(8)
5663 .output_stride(4)
5664 .block_width(4)
5665 .block_height(4)
5666 .element_size(4)
5667 .iterations(1)
5668 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5669 }
5670
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_4_bw_4_os_8)5671 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_4_bw_4_os_8) {
5672 TransposeMicrokernelTester()
5673 .input_stride(4)
5674 .output_stride(8)
5675 .block_width(4)
5676 .block_height(4)
5677 .element_size(4)
5678 .iterations(1)
5679 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5680 }
5681
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_4_bw_4_is_8_os_8)5682 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_4_bw_4_is_8_os_8) {
5683 TransposeMicrokernelTester()
5684 .input_stride(8)
5685 .output_stride(8)
5686 .block_width(4)
5687 .block_height(4)
5688 .element_size(4)
5689 .iterations(1)
5690 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5691 }
5692
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_68_bw_76_ies_15)5693 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_68_bw_76_ies_15) {
5694 TransposeMicrokernelTester()
5695 .input_stride(76)
5696 .output_stride(68)
5697 .block_width(76)
5698 .block_height(68)
5699 .element_size(4)
5700 .input_element_stride(15)
5701 .iterations(1)
5702 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5703 }
5704
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_12_bw_20_oes_15)5705 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_12_bw_20_oes_15) {
5706 TransposeMicrokernelTester()
5707 .input_stride(20)
5708 .output_stride(12)
5709 .block_width(20)
5710 .block_height(12)
5711 .element_size(4)
5712 .output_element_stride(15)
5713 .iterations(1)
5714 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5715 }
5716
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_28_bw_92_ies_21_oes_17)5717 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_28_bw_92_ies_21_oes_17) {
5718 TransposeMicrokernelTester()
5719 .input_stride(97)
5720 .output_stride(34)
5721 .block_width(92)
5722 .block_height(28)
5723 .element_size(4)
5724 .input_element_stride(21)
5725 .output_element_stride(17)
5726 .iterations(1)
5727 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5728 }
5729 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5730
5731
5732 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_2_bw_2)5733 TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_2_bw_2) {
5734 TEST_REQUIRES_ARM_NEON;
5735 TransposeMicrokernelTester()
5736 .input_stride(4)
5737 .output_stride(4)
5738 .block_width(2)
5739 .block_height(2)
5740 .element_size(4)
5741 .iterations(1)
5742 .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5743 }
5744
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_1_4_bw_1_4)5745 TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_1_4_bw_1_4) {
5746 TEST_REQUIRES_ARM_NEON;
5747 for(size_t i = 1; i <= 4; ++i){
5748 for(size_t j = 1; j <= 4; ++j){
5749 TransposeMicrokernelTester()
5750 .input_stride(j * 3)
5751 .output_stride(i * 7)
5752 .block_width(j)
5753 .block_height(i)
5754 .element_size(4)
5755 .iterations(1)
5756 .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5757 }
5758 }
5759 }
5760
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_2_bw_4)5761 TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_2_bw_4) {
5762 TEST_REQUIRES_ARM_NEON;
5763 TransposeMicrokernelTester()
5764 .input_stride(4)
5765 .output_stride(2)
5766 .block_width(4)
5767 .block_height(2)
5768 .element_size(4)
5769 .iterations(1)
5770 .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5771 }
5772
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_2_bw_3_4)5773 TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_2_bw_3_4) {
5774 TEST_REQUIRES_ARM_NEON;
5775 for(size_t i = 3; i < 4; ++i){
5776 TransposeMicrokernelTester()
5777 .input_stride(i)
5778 .output_stride(4)
5779 .block_width(i)
5780 .block_height(2)
5781 .element_size(4)
5782 .iterations(1)
5783 .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5784 }
5785 }
5786
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_4_bw_3_4)5787 TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_4_bw_3_4) {
5788 TEST_REQUIRES_ARM_NEON;
5789 for(size_t i = 3; i < 4; ++i){
5790 TransposeMicrokernelTester()
5791 .input_stride(i)
5792 .output_stride(4)
5793 .block_width(i)
5794 .block_height(4)
5795 .element_size(4)
5796 .iterations(1)
5797 .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5798 }
5799 }
5800
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_4_bw_2)5801 TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_4_bw_2) {
5802 TEST_REQUIRES_ARM_NEON;
5803 TransposeMicrokernelTester()
5804 .input_stride(2)
5805 .output_stride(10)
5806 .block_width(2)
5807 .block_height(4)
5808 .element_size(4)
5809 .iterations(1)
5810 .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5811 }
5812
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_3_4_bw_2)5813 TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_3_4_bw_2){
5814 TEST_REQUIRES_ARM_NEON;
5815 for(size_t i = 3; i < 4; ++i){
5816 TransposeMicrokernelTester()
5817 .input_stride(19)
5818 .output_stride(i)
5819 .block_width(5)
5820 .block_height(i)
5821 .element_size(4)
5822 .iterations(1)
5823 .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5824 }
5825 }
5826
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_3_4_bw_4)5827 TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_3_4_bw_4){
5828 TEST_REQUIRES_ARM_NEON;
5829 for(size_t i = 3; i < 4; ++i){
5830 TransposeMicrokernelTester()
5831 .input_stride(4)
5832 .output_stride(i)
5833 .block_width(4)
5834 .block_height(i)
5835 .element_size(4)
5836 .iterations(1)
5837 .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5838 }
5839 }
5840
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_3_4_bw_3_4)5841 TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_3_4_bw_3_4) {
5842 TEST_REQUIRES_ARM_NEON;
5843 for(size_t i = 3; i < 4; ++i){
5844 for(size_t j = 3; j < 4; ++j){
5845 TransposeMicrokernelTester()
5846 .input_stride(j)
5847 .output_stride(i)
5848 .block_width(j)
5849 .block_height(i)
5850 .element_size(4)
5851 .iterations(1)
5852 .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5853 }
5854 }
5855 }
5856
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_2_bw_2_is_4)5857 TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_2_bw_2_is_4) {
5858 TEST_REQUIRES_ARM_NEON;
5859 TransposeMicrokernelTester()
5860 .input_stride(4)
5861 .output_stride(2)
5862 .block_width(2)
5863 .block_height(2)
5864 .element_size(4)
5865 .iterations(1)
5866 .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5867 }
5868
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_2_bw_2_os_4)5869 TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_2_bw_2_os_4) {
5870 TEST_REQUIRES_ARM_NEON;
5871 TransposeMicrokernelTester()
5872 .input_stride(2)
5873 .output_stride(4)
5874 .block_width(2)
5875 .block_height(2)
5876 .element_size(4)
5877 .iterations(1)
5878 .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5879 }
5880
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_2_bw_2_is_4_os_4)5881 TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_2_bw_2_is_4_os_4) {
5882 TEST_REQUIRES_ARM_NEON;
5883 TransposeMicrokernelTester()
5884 .input_stride(4)
5885 .output_stride(4)
5886 .block_width(2)
5887 .block_height(2)
5888 .element_size(4)
5889 .iterations(1)
5890 .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5891 }
5892
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_34_bw_38_ies_15)5893 TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_34_bw_38_ies_15) {
5894 TEST_REQUIRES_ARM_NEON;
5895 TransposeMicrokernelTester()
5896 .input_stride(38)
5897 .output_stride(34)
5898 .block_width(38)
5899 .block_height(34)
5900 .element_size(4)
5901 .input_element_stride(15)
5902 .iterations(1)
5903 .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5904 }
5905
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_6_bw_10_oes_15)5906 TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_6_bw_10_oes_15) {
5907 TEST_REQUIRES_ARM_NEON;
5908 TransposeMicrokernelTester()
5909 .input_stride(10)
5910 .output_stride(6)
5911 .block_width(10)
5912 .block_height(6)
5913 .element_size(4)
5914 .output_element_stride(15)
5915 .iterations(1)
5916 .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5917 }
5918
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_14_bw_46_ies_21_oes_17)5919 TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
5920 TEST_REQUIRES_ARM_NEON;
5921 TransposeMicrokernelTester()
5922 .input_stride(51)
5923 .output_stride(20)
5924 .block_width(46)
5925 .block_height(14)
5926 .element_size(4)
5927 .input_element_stride(21)
5928 .output_element_stride(17)
5929 .iterations(1)
5930 .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5931 }
5932 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5933
5934
5935 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_2_bw_2)5936 TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_2_bw_2) {
5937 TEST_REQUIRES_ARM_NEON;
5938 TransposeMicrokernelTester()
5939 .input_stride(4)
5940 .output_stride(4)
5941 .block_width(2)
5942 .block_height(2)
5943 .element_size(4)
5944 .iterations(1)
5945 .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
5946 }
5947
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_1_4_bw_1_4)5948 TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_1_4_bw_1_4) {
5949 TEST_REQUIRES_ARM_NEON;
5950 for(size_t i = 1; i <= 4; ++i){
5951 for(size_t j = 1; j <= 4; ++j){
5952 TransposeMicrokernelTester()
5953 .input_stride(j * 3)
5954 .output_stride(i * 7)
5955 .block_width(j)
5956 .block_height(i)
5957 .element_size(4)
5958 .iterations(1)
5959 .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
5960 }
5961 }
5962 }
5963
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_2_bw_4)5964 TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_2_bw_4) {
5965 TEST_REQUIRES_ARM_NEON;
5966 TransposeMicrokernelTester()
5967 .input_stride(4)
5968 .output_stride(2)
5969 .block_width(4)
5970 .block_height(2)
5971 .element_size(4)
5972 .iterations(1)
5973 .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
5974 }
5975
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_2_bw_3_4)5976 TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_2_bw_3_4) {
5977 TEST_REQUIRES_ARM_NEON;
5978 for(size_t i = 3; i < 4; ++i){
5979 TransposeMicrokernelTester()
5980 .input_stride(i)
5981 .output_stride(4)
5982 .block_width(i)
5983 .block_height(2)
5984 .element_size(4)
5985 .iterations(1)
5986 .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
5987 }
5988 }
5989
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_4_bw_3_4)5990 TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_4_bw_3_4) {
5991 TEST_REQUIRES_ARM_NEON;
5992 for(size_t i = 3; i < 4; ++i){
5993 TransposeMicrokernelTester()
5994 .input_stride(i)
5995 .output_stride(4)
5996 .block_width(i)
5997 .block_height(4)
5998 .element_size(4)
5999 .iterations(1)
6000 .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6001 }
6002 }
6003
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_4_bw_2)6004 TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_4_bw_2) {
6005 TEST_REQUIRES_ARM_NEON;
6006 TransposeMicrokernelTester()
6007 .input_stride(2)
6008 .output_stride(10)
6009 .block_width(2)
6010 .block_height(4)
6011 .element_size(4)
6012 .iterations(1)
6013 .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6014 }
6015
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_3_4_bw_2)6016 TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_3_4_bw_2){
6017 TEST_REQUIRES_ARM_NEON;
6018 for(size_t i = 3; i < 4; ++i){
6019 TransposeMicrokernelTester()
6020 .input_stride(19)
6021 .output_stride(i)
6022 .block_width(5)
6023 .block_height(i)
6024 .element_size(4)
6025 .iterations(1)
6026 .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6027 }
6028 }
6029
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_3_4_bw_4)6030 TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_3_4_bw_4){
6031 TEST_REQUIRES_ARM_NEON;
6032 for(size_t i = 3; i < 4; ++i){
6033 TransposeMicrokernelTester()
6034 .input_stride(4)
6035 .output_stride(i)
6036 .block_width(4)
6037 .block_height(i)
6038 .element_size(4)
6039 .iterations(1)
6040 .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6041 }
6042 }
6043
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_3_4_bw_3_4)6044 TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_3_4_bw_3_4) {
6045 TEST_REQUIRES_ARM_NEON;
6046 for(size_t i = 3; i < 4; ++i){
6047 for(size_t j = 3; j < 4; ++j){
6048 TransposeMicrokernelTester()
6049 .input_stride(j)
6050 .output_stride(i)
6051 .block_width(j)
6052 .block_height(i)
6053 .element_size(4)
6054 .iterations(1)
6055 .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6056 }
6057 }
6058 }
6059
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_2_bw_2_is_4)6060 TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_2_bw_2_is_4) {
6061 TEST_REQUIRES_ARM_NEON;
6062 TransposeMicrokernelTester()
6063 .input_stride(4)
6064 .output_stride(2)
6065 .block_width(2)
6066 .block_height(2)
6067 .element_size(4)
6068 .iterations(1)
6069 .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6070 }
6071
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_2_bw_2_os_4)6072 TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_2_bw_2_os_4) {
6073 TEST_REQUIRES_ARM_NEON;
6074 TransposeMicrokernelTester()
6075 .input_stride(2)
6076 .output_stride(4)
6077 .block_width(2)
6078 .block_height(2)
6079 .element_size(4)
6080 .iterations(1)
6081 .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6082 }
6083
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_2_bw_2_is_4_os_4)6084 TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_2_bw_2_is_4_os_4) {
6085 TEST_REQUIRES_ARM_NEON;
6086 TransposeMicrokernelTester()
6087 .input_stride(4)
6088 .output_stride(4)
6089 .block_width(2)
6090 .block_height(2)
6091 .element_size(4)
6092 .iterations(1)
6093 .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6094 }
6095
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_34_bw_38_ies_15)6096 TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_34_bw_38_ies_15) {
6097 TEST_REQUIRES_ARM_NEON;
6098 TransposeMicrokernelTester()
6099 .input_stride(38)
6100 .output_stride(34)
6101 .block_width(38)
6102 .block_height(34)
6103 .element_size(4)
6104 .input_element_stride(15)
6105 .iterations(1)
6106 .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6107 }
6108
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_6_bw_10_oes_15)6109 TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_6_bw_10_oes_15) {
6110 TEST_REQUIRES_ARM_NEON;
6111 TransposeMicrokernelTester()
6112 .input_stride(10)
6113 .output_stride(6)
6114 .block_width(10)
6115 .block_height(6)
6116 .element_size(4)
6117 .output_element_stride(15)
6118 .iterations(1)
6119 .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6120 }
6121
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_14_bw_46_ies_21_oes_17)6122 TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
6123 TEST_REQUIRES_ARM_NEON;
6124 TransposeMicrokernelTester()
6125 .input_stride(51)
6126 .output_stride(20)
6127 .block_width(46)
6128 .block_height(14)
6129 .element_size(4)
6130 .input_element_stride(21)
6131 .output_element_stride(17)
6132 .iterations(1)
6133 .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6134 }
6135 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6136
6137
6138 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_2_bw_2)6139 TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_2_bw_2) {
6140 TEST_REQUIRES_ARM_NEON;
6141 TransposeMicrokernelTester()
6142 .input_stride(4)
6143 .output_stride(4)
6144 .block_width(2)
6145 .block_height(2)
6146 .element_size(4)
6147 .iterations(1)
6148 .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6149 }
6150
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_1_4_bw_1_4)6151 TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_1_4_bw_1_4) {
6152 TEST_REQUIRES_ARM_NEON;
6153 for(size_t i = 1; i <= 4; ++i){
6154 for(size_t j = 1; j <= 4; ++j){
6155 TransposeMicrokernelTester()
6156 .input_stride(j * 3)
6157 .output_stride(i * 7)
6158 .block_width(j)
6159 .block_height(i)
6160 .element_size(4)
6161 .iterations(1)
6162 .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6163 }
6164 }
6165 }
6166
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_2_bw_4)6167 TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_2_bw_4) {
6168 TEST_REQUIRES_ARM_NEON;
6169 TransposeMicrokernelTester()
6170 .input_stride(4)
6171 .output_stride(2)
6172 .block_width(4)
6173 .block_height(2)
6174 .element_size(4)
6175 .iterations(1)
6176 .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6177 }
6178
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_2_bw_3_4)6179 TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_2_bw_3_4) {
6180 TEST_REQUIRES_ARM_NEON;
6181 for(size_t i = 3; i < 4; ++i){
6182 TransposeMicrokernelTester()
6183 .input_stride(i)
6184 .output_stride(4)
6185 .block_width(i)
6186 .block_height(2)
6187 .element_size(4)
6188 .iterations(1)
6189 .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6190 }
6191 }
6192
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_4_bw_3_4)6193 TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_4_bw_3_4) {
6194 TEST_REQUIRES_ARM_NEON;
6195 for(size_t i = 3; i < 4; ++i){
6196 TransposeMicrokernelTester()
6197 .input_stride(i)
6198 .output_stride(4)
6199 .block_width(i)
6200 .block_height(4)
6201 .element_size(4)
6202 .iterations(1)
6203 .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6204 }
6205 }
6206
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_4_bw_2)6207 TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_4_bw_2) {
6208 TEST_REQUIRES_ARM_NEON;
6209 TransposeMicrokernelTester()
6210 .input_stride(2)
6211 .output_stride(10)
6212 .block_width(2)
6213 .block_height(4)
6214 .element_size(4)
6215 .iterations(1)
6216 .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6217 }
6218
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_3_4_bw_2)6219 TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_3_4_bw_2){
6220 TEST_REQUIRES_ARM_NEON;
6221 for(size_t i = 3; i < 4; ++i){
6222 TransposeMicrokernelTester()
6223 .input_stride(19)
6224 .output_stride(i)
6225 .block_width(5)
6226 .block_height(i)
6227 .element_size(4)
6228 .iterations(1)
6229 .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6230 }
6231 }
6232
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_3_4_bw_4)6233 TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_3_4_bw_4){
6234 TEST_REQUIRES_ARM_NEON;
6235 for(size_t i = 3; i < 4; ++i){
6236 TransposeMicrokernelTester()
6237 .input_stride(4)
6238 .output_stride(i)
6239 .block_width(4)
6240 .block_height(i)
6241 .element_size(4)
6242 .iterations(1)
6243 .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6244 }
6245 }
6246
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_3_4_bw_3_4)6247 TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_3_4_bw_3_4) {
6248 TEST_REQUIRES_ARM_NEON;
6249 for(size_t i = 3; i < 4; ++i){
6250 for(size_t j = 3; j < 4; ++j){
6251 TransposeMicrokernelTester()
6252 .input_stride(j)
6253 .output_stride(i)
6254 .block_width(j)
6255 .block_height(i)
6256 .element_size(4)
6257 .iterations(1)
6258 .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6259 }
6260 }
6261 }
6262
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_2_bw_2_is_4)6263 TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_2_bw_2_is_4) {
6264 TEST_REQUIRES_ARM_NEON;
6265 TransposeMicrokernelTester()
6266 .input_stride(4)
6267 .output_stride(2)
6268 .block_width(2)
6269 .block_height(2)
6270 .element_size(4)
6271 .iterations(1)
6272 .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6273 }
6274
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_2_bw_2_os_4)6275 TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_2_bw_2_os_4) {
6276 TEST_REQUIRES_ARM_NEON;
6277 TransposeMicrokernelTester()
6278 .input_stride(2)
6279 .output_stride(4)
6280 .block_width(2)
6281 .block_height(2)
6282 .element_size(4)
6283 .iterations(1)
6284 .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6285 }
6286
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_2_bw_2_is_4_os_4)6287 TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_2_bw_2_is_4_os_4) {
6288 TEST_REQUIRES_ARM_NEON;
6289 TransposeMicrokernelTester()
6290 .input_stride(4)
6291 .output_stride(4)
6292 .block_width(2)
6293 .block_height(2)
6294 .element_size(4)
6295 .iterations(1)
6296 .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6297 }
6298
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_34_bw_38_ies_15)6299 TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_34_bw_38_ies_15) {
6300 TEST_REQUIRES_ARM_NEON;
6301 TransposeMicrokernelTester()
6302 .input_stride(38)
6303 .output_stride(34)
6304 .block_width(38)
6305 .block_height(34)
6306 .element_size(4)
6307 .input_element_stride(15)
6308 .iterations(1)
6309 .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6310 }
6311
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_6_bw_10_oes_15)6312 TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_6_bw_10_oes_15) {
6313 TEST_REQUIRES_ARM_NEON;
6314 TransposeMicrokernelTester()
6315 .input_stride(10)
6316 .output_stride(6)
6317 .block_width(10)
6318 .block_height(6)
6319 .element_size(4)
6320 .output_element_stride(15)
6321 .iterations(1)
6322 .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6323 }
6324
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_14_bw_46_ies_21_oes_17)6325 TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
6326 TEST_REQUIRES_ARM_NEON;
6327 TransposeMicrokernelTester()
6328 .input_stride(51)
6329 .output_stride(20)
6330 .block_width(46)
6331 .block_height(14)
6332 .element_size(4)
6333 .input_element_stride(21)
6334 .output_element_stride(17)
6335 .iterations(1)
6336 .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6337 }
6338 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6339
6340
6341 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_2_bw_2)6342 TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_2_bw_2) {
6343 TEST_REQUIRES_ARM_NEON;
6344 TransposeMicrokernelTester()
6345 .input_stride(4)
6346 .output_stride(4)
6347 .block_width(2)
6348 .block_height(2)
6349 .element_size(4)
6350 .iterations(1)
6351 .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6352 }
6353
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_1_4_bw_1_4)6354 TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_1_4_bw_1_4) {
6355 TEST_REQUIRES_ARM_NEON;
6356 for(size_t i = 1; i <= 4; ++i){
6357 for(size_t j = 1; j <= 4; ++j){
6358 TransposeMicrokernelTester()
6359 .input_stride(j * 3)
6360 .output_stride(i * 7)
6361 .block_width(j)
6362 .block_height(i)
6363 .element_size(4)
6364 .iterations(1)
6365 .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6366 }
6367 }
6368 }
6369
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_2_bw_4)6370 TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_2_bw_4) {
6371 TEST_REQUIRES_ARM_NEON;
6372 TransposeMicrokernelTester()
6373 .input_stride(4)
6374 .output_stride(2)
6375 .block_width(4)
6376 .block_height(2)
6377 .element_size(4)
6378 .iterations(1)
6379 .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6380 }
6381
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_2_bw_3_4)6382 TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_2_bw_3_4) {
6383 TEST_REQUIRES_ARM_NEON;
6384 for(size_t i = 3; i < 4; ++i){
6385 TransposeMicrokernelTester()
6386 .input_stride(i)
6387 .output_stride(4)
6388 .block_width(i)
6389 .block_height(2)
6390 .element_size(4)
6391 .iterations(1)
6392 .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6393 }
6394 }
6395
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_4_bw_3_4)6396 TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_4_bw_3_4) {
6397 TEST_REQUIRES_ARM_NEON;
6398 for(size_t i = 3; i < 4; ++i){
6399 TransposeMicrokernelTester()
6400 .input_stride(i)
6401 .output_stride(4)
6402 .block_width(i)
6403 .block_height(4)
6404 .element_size(4)
6405 .iterations(1)
6406 .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6407 }
6408 }
6409
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_4_bw_2)6410 TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_4_bw_2) {
6411 TEST_REQUIRES_ARM_NEON;
6412 TransposeMicrokernelTester()
6413 .input_stride(2)
6414 .output_stride(10)
6415 .block_width(2)
6416 .block_height(4)
6417 .element_size(4)
6418 .iterations(1)
6419 .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6420 }
6421
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_3_4_bw_2)6422 TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_3_4_bw_2){
6423 TEST_REQUIRES_ARM_NEON;
6424 for(size_t i = 3; i < 4; ++i){
6425 TransposeMicrokernelTester()
6426 .input_stride(19)
6427 .output_stride(i)
6428 .block_width(5)
6429 .block_height(i)
6430 .element_size(4)
6431 .iterations(1)
6432 .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6433 }
6434 }
6435
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_3_4_bw_4)6436 TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_3_4_bw_4){
6437 TEST_REQUIRES_ARM_NEON;
6438 for(size_t i = 3; i < 4; ++i){
6439 TransposeMicrokernelTester()
6440 .input_stride(4)
6441 .output_stride(i)
6442 .block_width(4)
6443 .block_height(i)
6444 .element_size(4)
6445 .iterations(1)
6446 .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6447 }
6448 }
6449
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_3_4_bw_3_4)6450 TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_3_4_bw_3_4) {
6451 TEST_REQUIRES_ARM_NEON;
6452 for(size_t i = 3; i < 4; ++i){
6453 for(size_t j = 3; j < 4; ++j){
6454 TransposeMicrokernelTester()
6455 .input_stride(j)
6456 .output_stride(i)
6457 .block_width(j)
6458 .block_height(i)
6459 .element_size(4)
6460 .iterations(1)
6461 .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6462 }
6463 }
6464 }
6465
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_2_bw_2_is_4)6466 TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_2_bw_2_is_4) {
6467 TEST_REQUIRES_ARM_NEON;
6468 TransposeMicrokernelTester()
6469 .input_stride(4)
6470 .output_stride(2)
6471 .block_width(2)
6472 .block_height(2)
6473 .element_size(4)
6474 .iterations(1)
6475 .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6476 }
6477
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_2_bw_2_os_4)6478 TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_2_bw_2_os_4) {
6479 TEST_REQUIRES_ARM_NEON;
6480 TransposeMicrokernelTester()
6481 .input_stride(2)
6482 .output_stride(4)
6483 .block_width(2)
6484 .block_height(2)
6485 .element_size(4)
6486 .iterations(1)
6487 .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6488 }
6489
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_2_bw_2_is_4_os_4)6490 TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_2_bw_2_is_4_os_4) {
6491 TEST_REQUIRES_ARM_NEON;
6492 TransposeMicrokernelTester()
6493 .input_stride(4)
6494 .output_stride(4)
6495 .block_width(2)
6496 .block_height(2)
6497 .element_size(4)
6498 .iterations(1)
6499 .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6500 }
6501
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_34_bw_38_ies_15)6502 TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_34_bw_38_ies_15) {
6503 TEST_REQUIRES_ARM_NEON;
6504 TransposeMicrokernelTester()
6505 .input_stride(38)
6506 .output_stride(34)
6507 .block_width(38)
6508 .block_height(34)
6509 .element_size(4)
6510 .input_element_stride(15)
6511 .iterations(1)
6512 .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6513 }
6514
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_6_bw_10_oes_15)6515 TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_6_bw_10_oes_15) {
6516 TEST_REQUIRES_ARM_NEON;
6517 TransposeMicrokernelTester()
6518 .input_stride(10)
6519 .output_stride(6)
6520 .block_width(10)
6521 .block_height(6)
6522 .element_size(4)
6523 .output_element_stride(15)
6524 .iterations(1)
6525 .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6526 }
6527
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_14_bw_46_ies_21_oes_17)6528 TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
6529 TEST_REQUIRES_ARM_NEON;
6530 TransposeMicrokernelTester()
6531 .input_stride(51)
6532 .output_stride(20)
6533 .block_width(46)
6534 .block_height(14)
6535 .element_size(4)
6536 .input_element_stride(21)
6537 .output_element_stride(17)
6538 .iterations(1)
6539 .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6540 }
6541 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6542
6543
6544 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_2_bw_2)6545 TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_2_bw_2) {
6546 TEST_REQUIRES_ARM_NEON;
6547 TransposeMicrokernelTester()
6548 .input_stride(4)
6549 .output_stride(4)
6550 .block_width(2)
6551 .block_height(2)
6552 .element_size(4)
6553 .iterations(1)
6554 .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6555 }
6556
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_1_4_bw_1_4)6557 TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_1_4_bw_1_4) {
6558 TEST_REQUIRES_ARM_NEON;
6559 for(size_t i = 1; i <= 4; ++i){
6560 for(size_t j = 1; j <= 4; ++j){
6561 TransposeMicrokernelTester()
6562 .input_stride(j * 3)
6563 .output_stride(i * 7)
6564 .block_width(j)
6565 .block_height(i)
6566 .element_size(4)
6567 .iterations(1)
6568 .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6569 }
6570 }
6571 }
6572
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_2_bw_4)6573 TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_2_bw_4) {
6574 TEST_REQUIRES_ARM_NEON;
6575 TransposeMicrokernelTester()
6576 .input_stride(4)
6577 .output_stride(2)
6578 .block_width(4)
6579 .block_height(2)
6580 .element_size(4)
6581 .iterations(1)
6582 .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6583 }
6584
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_2_bw_3_4)6585 TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_2_bw_3_4) {
6586 TEST_REQUIRES_ARM_NEON;
6587 for(size_t i = 3; i < 4; ++i){
6588 TransposeMicrokernelTester()
6589 .input_stride(i)
6590 .output_stride(4)
6591 .block_width(i)
6592 .block_height(2)
6593 .element_size(4)
6594 .iterations(1)
6595 .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6596 }
6597 }
6598
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_4_bw_3_4)6599 TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_4_bw_3_4) {
6600 TEST_REQUIRES_ARM_NEON;
6601 for(size_t i = 3; i < 4; ++i){
6602 TransposeMicrokernelTester()
6603 .input_stride(i)
6604 .output_stride(4)
6605 .block_width(i)
6606 .block_height(4)
6607 .element_size(4)
6608 .iterations(1)
6609 .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6610 }
6611 }
6612
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_4_bw_2)6613 TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_4_bw_2) {
6614 TEST_REQUIRES_ARM_NEON;
6615 TransposeMicrokernelTester()
6616 .input_stride(2)
6617 .output_stride(10)
6618 .block_width(2)
6619 .block_height(4)
6620 .element_size(4)
6621 .iterations(1)
6622 .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6623 }
6624
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_3_4_bw_2)6625 TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_3_4_bw_2){
6626 TEST_REQUIRES_ARM_NEON;
6627 for(size_t i = 3; i < 4; ++i){
6628 TransposeMicrokernelTester()
6629 .input_stride(19)
6630 .output_stride(i)
6631 .block_width(5)
6632 .block_height(i)
6633 .element_size(4)
6634 .iterations(1)
6635 .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6636 }
6637 }
6638
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_3_4_bw_4)6639 TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_3_4_bw_4){
6640 TEST_REQUIRES_ARM_NEON;
6641 for(size_t i = 3; i < 4; ++i){
6642 TransposeMicrokernelTester()
6643 .input_stride(4)
6644 .output_stride(i)
6645 .block_width(4)
6646 .block_height(i)
6647 .element_size(4)
6648 .iterations(1)
6649 .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6650 }
6651 }
6652
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_3_4_bw_3_4)6653 TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_3_4_bw_3_4) {
6654 TEST_REQUIRES_ARM_NEON;
6655 for(size_t i = 3; i < 4; ++i){
6656 for(size_t j = 3; j < 4; ++j){
6657 TransposeMicrokernelTester()
6658 .input_stride(j)
6659 .output_stride(i)
6660 .block_width(j)
6661 .block_height(i)
6662 .element_size(4)
6663 .iterations(1)
6664 .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6665 }
6666 }
6667 }
6668
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_2_bw_2_is_4)6669 TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_2_bw_2_is_4) {
6670 TEST_REQUIRES_ARM_NEON;
6671 TransposeMicrokernelTester()
6672 .input_stride(4)
6673 .output_stride(2)
6674 .block_width(2)
6675 .block_height(2)
6676 .element_size(4)
6677 .iterations(1)
6678 .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6679 }
6680
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_2_bw_2_os_4)6681 TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_2_bw_2_os_4) {
6682 TEST_REQUIRES_ARM_NEON;
6683 TransposeMicrokernelTester()
6684 .input_stride(2)
6685 .output_stride(4)
6686 .block_width(2)
6687 .block_height(2)
6688 .element_size(4)
6689 .iterations(1)
6690 .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6691 }
6692
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_2_bw_2_is_4_os_4)6693 TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_2_bw_2_is_4_os_4) {
6694 TEST_REQUIRES_ARM_NEON;
6695 TransposeMicrokernelTester()
6696 .input_stride(4)
6697 .output_stride(4)
6698 .block_width(2)
6699 .block_height(2)
6700 .element_size(4)
6701 .iterations(1)
6702 .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6703 }
6704
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_34_bw_38_ies_15)6705 TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_34_bw_38_ies_15) {
6706 TEST_REQUIRES_ARM_NEON;
6707 TransposeMicrokernelTester()
6708 .input_stride(38)
6709 .output_stride(34)
6710 .block_width(38)
6711 .block_height(34)
6712 .element_size(4)
6713 .input_element_stride(15)
6714 .iterations(1)
6715 .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6716 }
6717
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_6_bw_10_oes_15)6718 TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_6_bw_10_oes_15) {
6719 TEST_REQUIRES_ARM_NEON;
6720 TransposeMicrokernelTester()
6721 .input_stride(10)
6722 .output_stride(6)
6723 .block_width(10)
6724 .block_height(6)
6725 .element_size(4)
6726 .output_element_stride(15)
6727 .iterations(1)
6728 .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6729 }
6730
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_14_bw_46_ies_21_oes_17)6731 TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
6732 TEST_REQUIRES_ARM_NEON;
6733 TransposeMicrokernelTester()
6734 .input_stride(51)
6735 .output_stride(20)
6736 .block_width(46)
6737 .block_height(14)
6738 .element_size(4)
6739 .input_element_stride(21)
6740 .output_element_stride(17)
6741 .iterations(1)
6742 .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6743 }
6744 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6745
6746
6747 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_2_bw_2)6748 TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_2_bw_2) {
6749 TEST_REQUIRES_ARM_NEON;
6750 TransposeMicrokernelTester()
6751 .input_stride(4)
6752 .output_stride(4)
6753 .block_width(2)
6754 .block_height(2)
6755 .element_size(4)
6756 .iterations(1)
6757 .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6758 }
6759
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_1_4_bw_1_4)6760 TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_1_4_bw_1_4) {
6761 TEST_REQUIRES_ARM_NEON;
6762 for(size_t i = 1; i <= 4; ++i){
6763 for(size_t j = 1; j <= 4; ++j){
6764 TransposeMicrokernelTester()
6765 .input_stride(j * 3)
6766 .output_stride(i * 7)
6767 .block_width(j)
6768 .block_height(i)
6769 .element_size(4)
6770 .iterations(1)
6771 .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6772 }
6773 }
6774 }
6775
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_2_bw_4)6776 TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_2_bw_4) {
6777 TEST_REQUIRES_ARM_NEON;
6778 TransposeMicrokernelTester()
6779 .input_stride(4)
6780 .output_stride(2)
6781 .block_width(4)
6782 .block_height(2)
6783 .element_size(4)
6784 .iterations(1)
6785 .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6786 }
6787
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_2_bw_3_4)6788 TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_2_bw_3_4) {
6789 TEST_REQUIRES_ARM_NEON;
6790 for(size_t i = 3; i < 4; ++i){
6791 TransposeMicrokernelTester()
6792 .input_stride(i)
6793 .output_stride(4)
6794 .block_width(i)
6795 .block_height(2)
6796 .element_size(4)
6797 .iterations(1)
6798 .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6799 }
6800 }
6801
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_4_bw_3_4)6802 TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_4_bw_3_4) {
6803 TEST_REQUIRES_ARM_NEON;
6804 for(size_t i = 3; i < 4; ++i){
6805 TransposeMicrokernelTester()
6806 .input_stride(i)
6807 .output_stride(4)
6808 .block_width(i)
6809 .block_height(4)
6810 .element_size(4)
6811 .iterations(1)
6812 .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6813 }
6814 }
6815
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_4_bw_2)6816 TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_4_bw_2) {
6817 TEST_REQUIRES_ARM_NEON;
6818 TransposeMicrokernelTester()
6819 .input_stride(2)
6820 .output_stride(10)
6821 .block_width(2)
6822 .block_height(4)
6823 .element_size(4)
6824 .iterations(1)
6825 .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6826 }
6827
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_3_4_bw_2)6828 TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_3_4_bw_2){
6829 TEST_REQUIRES_ARM_NEON;
6830 for(size_t i = 3; i < 4; ++i){
6831 TransposeMicrokernelTester()
6832 .input_stride(19)
6833 .output_stride(i)
6834 .block_width(5)
6835 .block_height(i)
6836 .element_size(4)
6837 .iterations(1)
6838 .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6839 }
6840 }
6841
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_3_4_bw_4)6842 TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_3_4_bw_4){
6843 TEST_REQUIRES_ARM_NEON;
6844 for(size_t i = 3; i < 4; ++i){
6845 TransposeMicrokernelTester()
6846 .input_stride(4)
6847 .output_stride(i)
6848 .block_width(4)
6849 .block_height(i)
6850 .element_size(4)
6851 .iterations(1)
6852 .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6853 }
6854 }
6855
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_3_4_bw_3_4)6856 TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_3_4_bw_3_4) {
6857 TEST_REQUIRES_ARM_NEON;
6858 for(size_t i = 3; i < 4; ++i){
6859 for(size_t j = 3; j < 4; ++j){
6860 TransposeMicrokernelTester()
6861 .input_stride(j)
6862 .output_stride(i)
6863 .block_width(j)
6864 .block_height(i)
6865 .element_size(4)
6866 .iterations(1)
6867 .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6868 }
6869 }
6870 }
6871
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_2_bw_2_is_4)6872 TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_2_bw_2_is_4) {
6873 TEST_REQUIRES_ARM_NEON;
6874 TransposeMicrokernelTester()
6875 .input_stride(4)
6876 .output_stride(2)
6877 .block_width(2)
6878 .block_height(2)
6879 .element_size(4)
6880 .iterations(1)
6881 .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6882 }
6883
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_2_bw_2_os_4)6884 TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_2_bw_2_os_4) {
6885 TEST_REQUIRES_ARM_NEON;
6886 TransposeMicrokernelTester()
6887 .input_stride(2)
6888 .output_stride(4)
6889 .block_width(2)
6890 .block_height(2)
6891 .element_size(4)
6892 .iterations(1)
6893 .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6894 }
6895
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_2_bw_2_is_4_os_4)6896 TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_2_bw_2_is_4_os_4) {
6897 TEST_REQUIRES_ARM_NEON;
6898 TransposeMicrokernelTester()
6899 .input_stride(4)
6900 .output_stride(4)
6901 .block_width(2)
6902 .block_height(2)
6903 .element_size(4)
6904 .iterations(1)
6905 .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6906 }
6907
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_34_bw_38_ies_15)6908 TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_34_bw_38_ies_15) {
6909 TEST_REQUIRES_ARM_NEON;
6910 TransposeMicrokernelTester()
6911 .input_stride(38)
6912 .output_stride(34)
6913 .block_width(38)
6914 .block_height(34)
6915 .element_size(4)
6916 .input_element_stride(15)
6917 .iterations(1)
6918 .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6919 }
6920
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_6_bw_10_oes_15)6921 TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_6_bw_10_oes_15) {
6922 TEST_REQUIRES_ARM_NEON;
6923 TransposeMicrokernelTester()
6924 .input_stride(10)
6925 .output_stride(6)
6926 .block_width(10)
6927 .block_height(6)
6928 .element_size(4)
6929 .output_element_stride(15)
6930 .iterations(1)
6931 .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6932 }
6933
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_14_bw_46_ies_21_oes_17)6934 TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
6935 TEST_REQUIRES_ARM_NEON;
6936 TransposeMicrokernelTester()
6937 .input_stride(51)
6938 .output_stride(20)
6939 .block_width(46)
6940 .block_height(14)
6941 .element_size(4)
6942 .input_element_stride(21)
6943 .output_element_stride(17)
6944 .iterations(1)
6945 .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6946 }
6947 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6948
6949
6950 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_2_bw_2)6951 TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_2_bw_2) {
6952 TEST_REQUIRES_ARM_NEON;
6953 TransposeMicrokernelTester()
6954 .input_stride(4)
6955 .output_stride(4)
6956 .block_width(2)
6957 .block_height(2)
6958 .element_size(4)
6959 .iterations(1)
6960 .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
6961 }
6962
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_1_4_bw_1_4)6963 TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_1_4_bw_1_4) {
6964 TEST_REQUIRES_ARM_NEON;
6965 for(size_t i = 1; i <= 4; ++i){
6966 for(size_t j = 1; j <= 4; ++j){
6967 TransposeMicrokernelTester()
6968 .input_stride(j * 3)
6969 .output_stride(i * 7)
6970 .block_width(j)
6971 .block_height(i)
6972 .element_size(4)
6973 .iterations(1)
6974 .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
6975 }
6976 }
6977 }
6978
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_2_bw_4)6979 TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_2_bw_4) {
6980 TEST_REQUIRES_ARM_NEON;
6981 TransposeMicrokernelTester()
6982 .input_stride(4)
6983 .output_stride(2)
6984 .block_width(4)
6985 .block_height(2)
6986 .element_size(4)
6987 .iterations(1)
6988 .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
6989 }
6990
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_2_bw_3_4)6991 TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_2_bw_3_4) {
6992 TEST_REQUIRES_ARM_NEON;
6993 for(size_t i = 3; i < 4; ++i){
6994 TransposeMicrokernelTester()
6995 .input_stride(i)
6996 .output_stride(4)
6997 .block_width(i)
6998 .block_height(2)
6999 .element_size(4)
7000 .iterations(1)
7001 .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7002 }
7003 }
7004
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_4_bw_3_4)7005 TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_4_bw_3_4) {
7006 TEST_REQUIRES_ARM_NEON;
7007 for(size_t i = 3; i < 4; ++i){
7008 TransposeMicrokernelTester()
7009 .input_stride(i)
7010 .output_stride(4)
7011 .block_width(i)
7012 .block_height(4)
7013 .element_size(4)
7014 .iterations(1)
7015 .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7016 }
7017 }
7018
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_4_bw_2)7019 TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_4_bw_2) {
7020 TEST_REQUIRES_ARM_NEON;
7021 TransposeMicrokernelTester()
7022 .input_stride(2)
7023 .output_stride(10)
7024 .block_width(2)
7025 .block_height(4)
7026 .element_size(4)
7027 .iterations(1)
7028 .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7029 }
7030
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_3_4_bw_2)7031 TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_3_4_bw_2){
7032 TEST_REQUIRES_ARM_NEON;
7033 for(size_t i = 3; i < 4; ++i){
7034 TransposeMicrokernelTester()
7035 .input_stride(19)
7036 .output_stride(i)
7037 .block_width(5)
7038 .block_height(i)
7039 .element_size(4)
7040 .iterations(1)
7041 .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7042 }
7043 }
7044
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_3_4_bw_4)7045 TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_3_4_bw_4){
7046 TEST_REQUIRES_ARM_NEON;
7047 for(size_t i = 3; i < 4; ++i){
7048 TransposeMicrokernelTester()
7049 .input_stride(4)
7050 .output_stride(i)
7051 .block_width(4)
7052 .block_height(i)
7053 .element_size(4)
7054 .iterations(1)
7055 .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7056 }
7057 }
7058
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_3_4_bw_3_4)7059 TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_3_4_bw_3_4) {
7060 TEST_REQUIRES_ARM_NEON;
7061 for(size_t i = 3; i < 4; ++i){
7062 for(size_t j = 3; j < 4; ++j){
7063 TransposeMicrokernelTester()
7064 .input_stride(j)
7065 .output_stride(i)
7066 .block_width(j)
7067 .block_height(i)
7068 .element_size(4)
7069 .iterations(1)
7070 .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7071 }
7072 }
7073 }
7074
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_2_bw_2_is_4)7075 TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_2_bw_2_is_4) {
7076 TEST_REQUIRES_ARM_NEON;
7077 TransposeMicrokernelTester()
7078 .input_stride(4)
7079 .output_stride(2)
7080 .block_width(2)
7081 .block_height(2)
7082 .element_size(4)
7083 .iterations(1)
7084 .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7085 }
7086
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_2_bw_2_os_4)7087 TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_2_bw_2_os_4) {
7088 TEST_REQUIRES_ARM_NEON;
7089 TransposeMicrokernelTester()
7090 .input_stride(2)
7091 .output_stride(4)
7092 .block_width(2)
7093 .block_height(2)
7094 .element_size(4)
7095 .iterations(1)
7096 .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7097 }
7098
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_2_bw_2_is_4_os_4)7099 TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_2_bw_2_is_4_os_4) {
7100 TEST_REQUIRES_ARM_NEON;
7101 TransposeMicrokernelTester()
7102 .input_stride(4)
7103 .output_stride(4)
7104 .block_width(2)
7105 .block_height(2)
7106 .element_size(4)
7107 .iterations(1)
7108 .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7109 }
7110
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_34_bw_38_ies_15)7111 TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_34_bw_38_ies_15) {
7112 TEST_REQUIRES_ARM_NEON;
7113 TransposeMicrokernelTester()
7114 .input_stride(38)
7115 .output_stride(34)
7116 .block_width(38)
7117 .block_height(34)
7118 .element_size(4)
7119 .input_element_stride(15)
7120 .iterations(1)
7121 .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7122 }
7123
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_6_bw_10_oes_15)7124 TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_6_bw_10_oes_15) {
7125 TEST_REQUIRES_ARM_NEON;
7126 TransposeMicrokernelTester()
7127 .input_stride(10)
7128 .output_stride(6)
7129 .block_width(10)
7130 .block_height(6)
7131 .element_size(4)
7132 .output_element_stride(15)
7133 .iterations(1)
7134 .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7135 }
7136
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_14_bw_46_ies_21_oes_17)7137 TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
7138 TEST_REQUIRES_ARM_NEON;
7139 TransposeMicrokernelTester()
7140 .input_stride(51)
7141 .output_stride(20)
7142 .block_width(46)
7143 .block_height(14)
7144 .element_size(4)
7145 .input_element_stride(21)
7146 .output_element_stride(17)
7147 .iterations(1)
7148 .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7149 }
7150 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7151
7152
7153 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_2_bw_2)7154 TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_2_bw_2) {
7155 TEST_REQUIRES_ARM_NEON;
7156 TransposeMicrokernelTester()
7157 .input_stride(4)
7158 .output_stride(4)
7159 .block_width(2)
7160 .block_height(2)
7161 .element_size(4)
7162 .iterations(1)
7163 .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7164 }
7165
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_1_4_bw_1_4)7166 TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_1_4_bw_1_4) {
7167 TEST_REQUIRES_ARM_NEON;
7168 for(size_t i = 1; i <= 4; ++i){
7169 for(size_t j = 1; j <= 4; ++j){
7170 TransposeMicrokernelTester()
7171 .input_stride(j * 3)
7172 .output_stride(i * 7)
7173 .block_width(j)
7174 .block_height(i)
7175 .element_size(4)
7176 .iterations(1)
7177 .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7178 }
7179 }
7180 }
7181
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_2_bw_4)7182 TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_2_bw_4) {
7183 TEST_REQUIRES_ARM_NEON;
7184 TransposeMicrokernelTester()
7185 .input_stride(4)
7186 .output_stride(2)
7187 .block_width(4)
7188 .block_height(2)
7189 .element_size(4)
7190 .iterations(1)
7191 .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7192 }
7193
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_2_bw_3_4)7194 TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_2_bw_3_4) {
7195 TEST_REQUIRES_ARM_NEON;
7196 for(size_t i = 3; i < 4; ++i){
7197 TransposeMicrokernelTester()
7198 .input_stride(i)
7199 .output_stride(4)
7200 .block_width(i)
7201 .block_height(2)
7202 .element_size(4)
7203 .iterations(1)
7204 .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7205 }
7206 }
7207
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_4_bw_3_4)7208 TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_4_bw_3_4) {
7209 TEST_REQUIRES_ARM_NEON;
7210 for(size_t i = 3; i < 4; ++i){
7211 TransposeMicrokernelTester()
7212 .input_stride(i)
7213 .output_stride(4)
7214 .block_width(i)
7215 .block_height(4)
7216 .element_size(4)
7217 .iterations(1)
7218 .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7219 }
7220 }
7221
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_4_bw_2)7222 TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_4_bw_2) {
7223 TEST_REQUIRES_ARM_NEON;
7224 TransposeMicrokernelTester()
7225 .input_stride(2)
7226 .output_stride(10)
7227 .block_width(2)
7228 .block_height(4)
7229 .element_size(4)
7230 .iterations(1)
7231 .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7232 }
7233
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_3_4_bw_2)7234 TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_3_4_bw_2){
7235 TEST_REQUIRES_ARM_NEON;
7236 for(size_t i = 3; i < 4; ++i){
7237 TransposeMicrokernelTester()
7238 .input_stride(19)
7239 .output_stride(i)
7240 .block_width(5)
7241 .block_height(i)
7242 .element_size(4)
7243 .iterations(1)
7244 .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7245 }
7246 }
7247
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_3_4_bw_4)7248 TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_3_4_bw_4){
7249 TEST_REQUIRES_ARM_NEON;
7250 for(size_t i = 3; i < 4; ++i){
7251 TransposeMicrokernelTester()
7252 .input_stride(4)
7253 .output_stride(i)
7254 .block_width(4)
7255 .block_height(i)
7256 .element_size(4)
7257 .iterations(1)
7258 .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7259 }
7260 }
7261
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_3_4_bw_3_4)7262 TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_3_4_bw_3_4) {
7263 TEST_REQUIRES_ARM_NEON;
7264 for(size_t i = 3; i < 4; ++i){
7265 for(size_t j = 3; j < 4; ++j){
7266 TransposeMicrokernelTester()
7267 .input_stride(j)
7268 .output_stride(i)
7269 .block_width(j)
7270 .block_height(i)
7271 .element_size(4)
7272 .iterations(1)
7273 .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7274 }
7275 }
7276 }
7277
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_2_bw_2_is_4)7278 TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_2_bw_2_is_4) {
7279 TEST_REQUIRES_ARM_NEON;
7280 TransposeMicrokernelTester()
7281 .input_stride(4)
7282 .output_stride(2)
7283 .block_width(2)
7284 .block_height(2)
7285 .element_size(4)
7286 .iterations(1)
7287 .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7288 }
7289
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_2_bw_2_os_4)7290 TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_2_bw_2_os_4) {
7291 TEST_REQUIRES_ARM_NEON;
7292 TransposeMicrokernelTester()
7293 .input_stride(2)
7294 .output_stride(4)
7295 .block_width(2)
7296 .block_height(2)
7297 .element_size(4)
7298 .iterations(1)
7299 .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7300 }
7301
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_2_bw_2_is_4_os_4)7302 TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_2_bw_2_is_4_os_4) {
7303 TEST_REQUIRES_ARM_NEON;
7304 TransposeMicrokernelTester()
7305 .input_stride(4)
7306 .output_stride(4)
7307 .block_width(2)
7308 .block_height(2)
7309 .element_size(4)
7310 .iterations(1)
7311 .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7312 }
7313
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_34_bw_38_ies_15)7314 TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_34_bw_38_ies_15) {
7315 TEST_REQUIRES_ARM_NEON;
7316 TransposeMicrokernelTester()
7317 .input_stride(38)
7318 .output_stride(34)
7319 .block_width(38)
7320 .block_height(34)
7321 .element_size(4)
7322 .input_element_stride(15)
7323 .iterations(1)
7324 .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7325 }
7326
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_6_bw_10_oes_15)7327 TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_6_bw_10_oes_15) {
7328 TEST_REQUIRES_ARM_NEON;
7329 TransposeMicrokernelTester()
7330 .input_stride(10)
7331 .output_stride(6)
7332 .block_width(10)
7333 .block_height(6)
7334 .element_size(4)
7335 .output_element_stride(15)
7336 .iterations(1)
7337 .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7338 }
7339
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_14_bw_46_ies_21_oes_17)7340 TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
7341 TEST_REQUIRES_ARM_NEON;
7342 TransposeMicrokernelTester()
7343 .input_stride(51)
7344 .output_stride(20)
7345 .block_width(46)
7346 .block_height(14)
7347 .element_size(4)
7348 .input_element_stride(21)
7349 .output_element_stride(17)
7350 .iterations(1)
7351 .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7352 }
7353 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7354
7355
7356 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_4_bw_4)7357 TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_4_bw_4) {
7358 TEST_REQUIRES_ARM_NEON;
7359 TransposeMicrokernelTester()
7360 .input_stride(8)
7361 .output_stride(8)
7362 .block_width(4)
7363 .block_height(4)
7364 .element_size(4)
7365 .iterations(1)
7366 .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7367 }
7368
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_1_8_bw_1_8)7369 TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_1_8_bw_1_8) {
7370 TEST_REQUIRES_ARM_NEON;
7371 for(size_t i = 1; i <= 8; ++i){
7372 for(size_t j = 1; j <= 8; ++j){
7373 TransposeMicrokernelTester()
7374 .input_stride(j * 3)
7375 .output_stride(i * 7)
7376 .block_width(j)
7377 .block_height(i)
7378 .element_size(4)
7379 .iterations(1)
7380 .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7381 }
7382 }
7383 }
7384
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_4_bw_8)7385 TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_4_bw_8) {
7386 TEST_REQUIRES_ARM_NEON;
7387 TransposeMicrokernelTester()
7388 .input_stride(8)
7389 .output_stride(4)
7390 .block_width(8)
7391 .block_height(4)
7392 .element_size(4)
7393 .iterations(1)
7394 .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7395 }
7396
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_4_bw_5_8)7397 TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_4_bw_5_8) {
7398 TEST_REQUIRES_ARM_NEON;
7399 for(size_t i = 5; i < 8; ++i){
7400 TransposeMicrokernelTester()
7401 .input_stride(i)
7402 .output_stride(8)
7403 .block_width(i)
7404 .block_height(4)
7405 .element_size(4)
7406 .iterations(1)
7407 .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7408 }
7409 }
7410
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_8_bw_5_8)7411 TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_8_bw_5_8) {
7412 TEST_REQUIRES_ARM_NEON;
7413 for(size_t i = 5; i < 8; ++i){
7414 TransposeMicrokernelTester()
7415 .input_stride(i)
7416 .output_stride(8)
7417 .block_width(i)
7418 .block_height(8)
7419 .element_size(4)
7420 .iterations(1)
7421 .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7422 }
7423 }
7424
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_8_bw_4)7425 TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_8_bw_4) {
7426 TEST_REQUIRES_ARM_NEON;
7427 TransposeMicrokernelTester()
7428 .input_stride(4)
7429 .output_stride(16)
7430 .block_width(4)
7431 .block_height(8)
7432 .element_size(4)
7433 .iterations(1)
7434 .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7435 }
7436
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_5_8_bw_4)7437 TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_5_8_bw_4){
7438 TEST_REQUIRES_ARM_NEON;
7439 for(size_t i = 5; i < 8; ++i){
7440 TransposeMicrokernelTester()
7441 .input_stride(21)
7442 .output_stride(i)
7443 .block_width(7)
7444 .block_height(i)
7445 .element_size(4)
7446 .iterations(1)
7447 .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7448 }
7449 }
7450
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_5_8_bw_8)7451 TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_5_8_bw_8){
7452 TEST_REQUIRES_ARM_NEON;
7453 for(size_t i = 5; i < 8; ++i){
7454 TransposeMicrokernelTester()
7455 .input_stride(8)
7456 .output_stride(i)
7457 .block_width(8)
7458 .block_height(i)
7459 .element_size(4)
7460 .iterations(1)
7461 .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7462 }
7463 }
7464
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_5_8_bw_5_8)7465 TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_5_8_bw_5_8) {
7466 TEST_REQUIRES_ARM_NEON;
7467 for(size_t i = 5; i < 8; ++i){
7468 for(size_t j = 5; j < 8; ++j){
7469 TransposeMicrokernelTester()
7470 .input_stride(j)
7471 .output_stride(i)
7472 .block_width(j)
7473 .block_height(i)
7474 .element_size(4)
7475 .iterations(1)
7476 .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7477 }
7478 }
7479 }
7480
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_4_bw_4_is_8)7481 TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_4_bw_4_is_8) {
7482 TEST_REQUIRES_ARM_NEON;
7483 TransposeMicrokernelTester()
7484 .input_stride(8)
7485 .output_stride(4)
7486 .block_width(4)
7487 .block_height(4)
7488 .element_size(4)
7489 .iterations(1)
7490 .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7491 }
7492
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_4_bw_4_os_8)7493 TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_4_bw_4_os_8) {
7494 TEST_REQUIRES_ARM_NEON;
7495 TransposeMicrokernelTester()
7496 .input_stride(4)
7497 .output_stride(8)
7498 .block_width(4)
7499 .block_height(4)
7500 .element_size(4)
7501 .iterations(1)
7502 .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7503 }
7504
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_4_bw_4_is_8_os_8)7505 TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_4_bw_4_is_8_os_8) {
7506 TEST_REQUIRES_ARM_NEON;
7507 TransposeMicrokernelTester()
7508 .input_stride(8)
7509 .output_stride(8)
7510 .block_width(4)
7511 .block_height(4)
7512 .element_size(4)
7513 .iterations(1)
7514 .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7515 }
7516
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_68_bw_76_ies_15)7517 TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_68_bw_76_ies_15) {
7518 TEST_REQUIRES_ARM_NEON;
7519 TransposeMicrokernelTester()
7520 .input_stride(76)
7521 .output_stride(68)
7522 .block_width(76)
7523 .block_height(68)
7524 .element_size(4)
7525 .input_element_stride(15)
7526 .iterations(1)
7527 .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7528 }
7529
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_12_bw_20_oes_15)7530 TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_12_bw_20_oes_15) {
7531 TEST_REQUIRES_ARM_NEON;
7532 TransposeMicrokernelTester()
7533 .input_stride(20)
7534 .output_stride(12)
7535 .block_width(20)
7536 .block_height(12)
7537 .element_size(4)
7538 .output_element_stride(15)
7539 .iterations(1)
7540 .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7541 }
7542
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_28_bw_92_ies_21_oes_17)7543 TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
7544 TEST_REQUIRES_ARM_NEON;
7545 TransposeMicrokernelTester()
7546 .input_stride(97)
7547 .output_stride(34)
7548 .block_width(92)
7549 .block_height(28)
7550 .element_size(4)
7551 .input_element_stride(21)
7552 .output_element_stride(17)
7553 .iterations(1)
7554 .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7555 }
7556 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7557
7558
7559 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_4_bw_4)7560 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_4_bw_4) {
7561 TEST_REQUIRES_ARM_NEON;
7562 TransposeMicrokernelTester()
7563 .input_stride(8)
7564 .output_stride(8)
7565 .block_width(4)
7566 .block_height(4)
7567 .element_size(4)
7568 .iterations(1)
7569 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7570 }
7571
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_1_8_bw_1_8)7572 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_1_8_bw_1_8) {
7573 TEST_REQUIRES_ARM_NEON;
7574 for(size_t i = 1; i <= 8; ++i){
7575 for(size_t j = 1; j <= 8; ++j){
7576 TransposeMicrokernelTester()
7577 .input_stride(j * 3)
7578 .output_stride(i * 7)
7579 .block_width(j)
7580 .block_height(i)
7581 .element_size(4)
7582 .iterations(1)
7583 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7584 }
7585 }
7586 }
7587
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_4_bw_8)7588 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_4_bw_8) {
7589 TEST_REQUIRES_ARM_NEON;
7590 TransposeMicrokernelTester()
7591 .input_stride(8)
7592 .output_stride(4)
7593 .block_width(8)
7594 .block_height(4)
7595 .element_size(4)
7596 .iterations(1)
7597 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7598 }
7599
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_4_bw_5_8)7600 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_4_bw_5_8) {
7601 TEST_REQUIRES_ARM_NEON;
7602 for(size_t i = 5; i < 8; ++i){
7603 TransposeMicrokernelTester()
7604 .input_stride(i)
7605 .output_stride(8)
7606 .block_width(i)
7607 .block_height(4)
7608 .element_size(4)
7609 .iterations(1)
7610 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7611 }
7612 }
7613
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_8_bw_5_8)7614 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_8_bw_5_8) {
7615 TEST_REQUIRES_ARM_NEON;
7616 for(size_t i = 5; i < 8; ++i){
7617 TransposeMicrokernelTester()
7618 .input_stride(i)
7619 .output_stride(8)
7620 .block_width(i)
7621 .block_height(8)
7622 .element_size(4)
7623 .iterations(1)
7624 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7625 }
7626 }
7627
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_8_bw_4)7628 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_8_bw_4) {
7629 TEST_REQUIRES_ARM_NEON;
7630 TransposeMicrokernelTester()
7631 .input_stride(4)
7632 .output_stride(16)
7633 .block_width(4)
7634 .block_height(8)
7635 .element_size(4)
7636 .iterations(1)
7637 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7638 }
7639
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_5_8_bw_4)7640 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_5_8_bw_4){
7641 TEST_REQUIRES_ARM_NEON;
7642 for(size_t i = 5; i < 8; ++i){
7643 TransposeMicrokernelTester()
7644 .input_stride(21)
7645 .output_stride(i)
7646 .block_width(7)
7647 .block_height(i)
7648 .element_size(4)
7649 .iterations(1)
7650 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7651 }
7652 }
7653
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_5_8_bw_8)7654 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_5_8_bw_8){
7655 TEST_REQUIRES_ARM_NEON;
7656 for(size_t i = 5; i < 8; ++i){
7657 TransposeMicrokernelTester()
7658 .input_stride(8)
7659 .output_stride(i)
7660 .block_width(8)
7661 .block_height(i)
7662 .element_size(4)
7663 .iterations(1)
7664 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7665 }
7666 }
7667
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_5_8_bw_5_8)7668 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_5_8_bw_5_8) {
7669 TEST_REQUIRES_ARM_NEON;
7670 for(size_t i = 5; i < 8; ++i){
7671 for(size_t j = 5; j < 8; ++j){
7672 TransposeMicrokernelTester()
7673 .input_stride(j)
7674 .output_stride(i)
7675 .block_width(j)
7676 .block_height(i)
7677 .element_size(4)
7678 .iterations(1)
7679 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7680 }
7681 }
7682 }
7683
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_4_bw_4_is_8)7684 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_4_bw_4_is_8) {
7685 TEST_REQUIRES_ARM_NEON;
7686 TransposeMicrokernelTester()
7687 .input_stride(8)
7688 .output_stride(4)
7689 .block_width(4)
7690 .block_height(4)
7691 .element_size(4)
7692 .iterations(1)
7693 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7694 }
7695
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_4_bw_4_os_8)7696 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_4_bw_4_os_8) {
7697 TEST_REQUIRES_ARM_NEON;
7698 TransposeMicrokernelTester()
7699 .input_stride(4)
7700 .output_stride(8)
7701 .block_width(4)
7702 .block_height(4)
7703 .element_size(4)
7704 .iterations(1)
7705 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7706 }
7707
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_4_bw_4_is_8_os_8)7708 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_4_bw_4_is_8_os_8) {
7709 TEST_REQUIRES_ARM_NEON;
7710 TransposeMicrokernelTester()
7711 .input_stride(8)
7712 .output_stride(8)
7713 .block_width(4)
7714 .block_height(4)
7715 .element_size(4)
7716 .iterations(1)
7717 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7718 }
7719
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_68_bw_76_ies_15)7720 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_68_bw_76_ies_15) {
7721 TEST_REQUIRES_ARM_NEON;
7722 TransposeMicrokernelTester()
7723 .input_stride(76)
7724 .output_stride(68)
7725 .block_width(76)
7726 .block_height(68)
7727 .element_size(4)
7728 .input_element_stride(15)
7729 .iterations(1)
7730 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7731 }
7732
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_12_bw_20_oes_15)7733 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_12_bw_20_oes_15) {
7734 TEST_REQUIRES_ARM_NEON;
7735 TransposeMicrokernelTester()
7736 .input_stride(20)
7737 .output_stride(12)
7738 .block_width(20)
7739 .block_height(12)
7740 .element_size(4)
7741 .output_element_stride(15)
7742 .iterations(1)
7743 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7744 }
7745
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_28_bw_92_ies_21_oes_17)7746 TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
7747 TEST_REQUIRES_ARM_NEON;
7748 TransposeMicrokernelTester()
7749 .input_stride(97)
7750 .output_stride(34)
7751 .block_width(92)
7752 .block_height(28)
7753 .element_size(4)
7754 .input_element_stride(21)
7755 .output_element_stride(17)
7756 .iterations(1)
7757 .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7758 }
7759 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7760
7761
7762 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_4_bw_4)7763 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_4_bw_4) {
7764 TEST_REQUIRES_ARM_NEON;
7765 TransposeMicrokernelTester()
7766 .input_stride(8)
7767 .output_stride(8)
7768 .block_width(4)
7769 .block_height(4)
7770 .element_size(4)
7771 .iterations(1)
7772 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7773 }
7774
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_1_8_bw_1_8)7775 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_1_8_bw_1_8) {
7776 TEST_REQUIRES_ARM_NEON;
7777 for(size_t i = 1; i <= 8; ++i){
7778 for(size_t j = 1; j <= 8; ++j){
7779 TransposeMicrokernelTester()
7780 .input_stride(j * 3)
7781 .output_stride(i * 7)
7782 .block_width(j)
7783 .block_height(i)
7784 .element_size(4)
7785 .iterations(1)
7786 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7787 }
7788 }
7789 }
7790
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_4_bw_8)7791 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_4_bw_8) {
7792 TEST_REQUIRES_ARM_NEON;
7793 TransposeMicrokernelTester()
7794 .input_stride(8)
7795 .output_stride(4)
7796 .block_width(8)
7797 .block_height(4)
7798 .element_size(4)
7799 .iterations(1)
7800 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7801 }
7802
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_4_bw_5_8)7803 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_4_bw_5_8) {
7804 TEST_REQUIRES_ARM_NEON;
7805 for(size_t i = 5; i < 8; ++i){
7806 TransposeMicrokernelTester()
7807 .input_stride(i)
7808 .output_stride(8)
7809 .block_width(i)
7810 .block_height(4)
7811 .element_size(4)
7812 .iterations(1)
7813 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7814 }
7815 }
7816
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_8_bw_5_8)7817 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_8_bw_5_8) {
7818 TEST_REQUIRES_ARM_NEON;
7819 for(size_t i = 5; i < 8; ++i){
7820 TransposeMicrokernelTester()
7821 .input_stride(i)
7822 .output_stride(8)
7823 .block_width(i)
7824 .block_height(8)
7825 .element_size(4)
7826 .iterations(1)
7827 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7828 }
7829 }
7830
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_8_bw_4)7831 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_8_bw_4) {
7832 TEST_REQUIRES_ARM_NEON;
7833 TransposeMicrokernelTester()
7834 .input_stride(4)
7835 .output_stride(16)
7836 .block_width(4)
7837 .block_height(8)
7838 .element_size(4)
7839 .iterations(1)
7840 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7841 }
7842
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_5_8_bw_4)7843 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_5_8_bw_4){
7844 TEST_REQUIRES_ARM_NEON;
7845 for(size_t i = 5; i < 8; ++i){
7846 TransposeMicrokernelTester()
7847 .input_stride(21)
7848 .output_stride(i)
7849 .block_width(7)
7850 .block_height(i)
7851 .element_size(4)
7852 .iterations(1)
7853 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7854 }
7855 }
7856
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_5_8_bw_8)7857 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_5_8_bw_8){
7858 TEST_REQUIRES_ARM_NEON;
7859 for(size_t i = 5; i < 8; ++i){
7860 TransposeMicrokernelTester()
7861 .input_stride(8)
7862 .output_stride(i)
7863 .block_width(8)
7864 .block_height(i)
7865 .element_size(4)
7866 .iterations(1)
7867 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7868 }
7869 }
7870
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_5_8_bw_5_8)7871 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_5_8_bw_5_8) {
7872 TEST_REQUIRES_ARM_NEON;
7873 for(size_t i = 5; i < 8; ++i){
7874 for(size_t j = 5; j < 8; ++j){
7875 TransposeMicrokernelTester()
7876 .input_stride(j)
7877 .output_stride(i)
7878 .block_width(j)
7879 .block_height(i)
7880 .element_size(4)
7881 .iterations(1)
7882 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7883 }
7884 }
7885 }
7886
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_4_bw_4_is_8)7887 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_4_bw_4_is_8) {
7888 TEST_REQUIRES_ARM_NEON;
7889 TransposeMicrokernelTester()
7890 .input_stride(8)
7891 .output_stride(4)
7892 .block_width(4)
7893 .block_height(4)
7894 .element_size(4)
7895 .iterations(1)
7896 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7897 }
7898
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_4_bw_4_os_8)7899 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_4_bw_4_os_8) {
7900 TEST_REQUIRES_ARM_NEON;
7901 TransposeMicrokernelTester()
7902 .input_stride(4)
7903 .output_stride(8)
7904 .block_width(4)
7905 .block_height(4)
7906 .element_size(4)
7907 .iterations(1)
7908 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7909 }
7910
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_4_bw_4_is_8_os_8)7911 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_4_bw_4_is_8_os_8) {
7912 TEST_REQUIRES_ARM_NEON;
7913 TransposeMicrokernelTester()
7914 .input_stride(8)
7915 .output_stride(8)
7916 .block_width(4)
7917 .block_height(4)
7918 .element_size(4)
7919 .iterations(1)
7920 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7921 }
7922
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_68_bw_76_ies_15)7923 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_68_bw_76_ies_15) {
7924 TEST_REQUIRES_ARM_NEON;
7925 TransposeMicrokernelTester()
7926 .input_stride(76)
7927 .output_stride(68)
7928 .block_width(76)
7929 .block_height(68)
7930 .element_size(4)
7931 .input_element_stride(15)
7932 .iterations(1)
7933 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7934 }
7935
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_12_bw_20_oes_15)7936 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_12_bw_20_oes_15) {
7937 TEST_REQUIRES_ARM_NEON;
7938 TransposeMicrokernelTester()
7939 .input_stride(20)
7940 .output_stride(12)
7941 .block_width(20)
7942 .block_height(12)
7943 .element_size(4)
7944 .output_element_stride(15)
7945 .iterations(1)
7946 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7947 }
7948
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_28_bw_92_ies_21_oes_17)7949 TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
7950 TEST_REQUIRES_ARM_NEON;
7951 TransposeMicrokernelTester()
7952 .input_stride(97)
7953 .output_stride(34)
7954 .block_width(92)
7955 .block_height(28)
7956 .element_size(4)
7957 .input_element_stride(21)
7958 .output_element_stride(17)
7959 .iterations(1)
7960 .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7961 }
7962 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7963
7964
7965 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_4_bw_4)7966 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_4_bw_4) {
7967 TEST_REQUIRES_ARM_NEON;
7968 TransposeMicrokernelTester()
7969 .input_stride(8)
7970 .output_stride(8)
7971 .block_width(4)
7972 .block_height(4)
7973 .element_size(4)
7974 .iterations(1)
7975 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
7976 }
7977
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_1_8_bw_1_8)7978 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_1_8_bw_1_8) {
7979 TEST_REQUIRES_ARM_NEON;
7980 for(size_t i = 1; i <= 8; ++i){
7981 for(size_t j = 1; j <= 8; ++j){
7982 TransposeMicrokernelTester()
7983 .input_stride(j * 3)
7984 .output_stride(i * 7)
7985 .block_width(j)
7986 .block_height(i)
7987 .element_size(4)
7988 .iterations(1)
7989 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
7990 }
7991 }
7992 }
7993
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_4_bw_8)7994 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_4_bw_8) {
7995 TEST_REQUIRES_ARM_NEON;
7996 TransposeMicrokernelTester()
7997 .input_stride(8)
7998 .output_stride(4)
7999 .block_width(8)
8000 .block_height(4)
8001 .element_size(4)
8002 .iterations(1)
8003 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8004 }
8005
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_4_bw_5_8)8006 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_4_bw_5_8) {
8007 TEST_REQUIRES_ARM_NEON;
8008 for(size_t i = 5; i < 8; ++i){
8009 TransposeMicrokernelTester()
8010 .input_stride(i)
8011 .output_stride(8)
8012 .block_width(i)
8013 .block_height(4)
8014 .element_size(4)
8015 .iterations(1)
8016 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8017 }
8018 }
8019
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_8_bw_5_8)8020 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_8_bw_5_8) {
8021 TEST_REQUIRES_ARM_NEON;
8022 for(size_t i = 5; i < 8; ++i){
8023 TransposeMicrokernelTester()
8024 .input_stride(i)
8025 .output_stride(8)
8026 .block_width(i)
8027 .block_height(8)
8028 .element_size(4)
8029 .iterations(1)
8030 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8031 }
8032 }
8033
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_8_bw_4)8034 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_8_bw_4) {
8035 TEST_REQUIRES_ARM_NEON;
8036 TransposeMicrokernelTester()
8037 .input_stride(4)
8038 .output_stride(16)
8039 .block_width(4)
8040 .block_height(8)
8041 .element_size(4)
8042 .iterations(1)
8043 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8044 }
8045
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_5_8_bw_4)8046 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_5_8_bw_4){
8047 TEST_REQUIRES_ARM_NEON;
8048 for(size_t i = 5; i < 8; ++i){
8049 TransposeMicrokernelTester()
8050 .input_stride(21)
8051 .output_stride(i)
8052 .block_width(7)
8053 .block_height(i)
8054 .element_size(4)
8055 .iterations(1)
8056 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8057 }
8058 }
8059
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_5_8_bw_8)8060 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_5_8_bw_8){
8061 TEST_REQUIRES_ARM_NEON;
8062 for(size_t i = 5; i < 8; ++i){
8063 TransposeMicrokernelTester()
8064 .input_stride(8)
8065 .output_stride(i)
8066 .block_width(8)
8067 .block_height(i)
8068 .element_size(4)
8069 .iterations(1)
8070 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8071 }
8072 }
8073
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_5_8_bw_5_8)8074 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_5_8_bw_5_8) {
8075 TEST_REQUIRES_ARM_NEON;
8076 for(size_t i = 5; i < 8; ++i){
8077 for(size_t j = 5; j < 8; ++j){
8078 TransposeMicrokernelTester()
8079 .input_stride(j)
8080 .output_stride(i)
8081 .block_width(j)
8082 .block_height(i)
8083 .element_size(4)
8084 .iterations(1)
8085 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8086 }
8087 }
8088 }
8089
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_4_bw_4_is_8)8090 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_4_bw_4_is_8) {
8091 TEST_REQUIRES_ARM_NEON;
8092 TransposeMicrokernelTester()
8093 .input_stride(8)
8094 .output_stride(4)
8095 .block_width(4)
8096 .block_height(4)
8097 .element_size(4)
8098 .iterations(1)
8099 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8100 }
8101
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_4_bw_4_os_8)8102 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_4_bw_4_os_8) {
8103 TEST_REQUIRES_ARM_NEON;
8104 TransposeMicrokernelTester()
8105 .input_stride(4)
8106 .output_stride(8)
8107 .block_width(4)
8108 .block_height(4)
8109 .element_size(4)
8110 .iterations(1)
8111 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8112 }
8113
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_4_bw_4_is_8_os_8)8114 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_4_bw_4_is_8_os_8) {
8115 TEST_REQUIRES_ARM_NEON;
8116 TransposeMicrokernelTester()
8117 .input_stride(8)
8118 .output_stride(8)
8119 .block_width(4)
8120 .block_height(4)
8121 .element_size(4)
8122 .iterations(1)
8123 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8124 }
8125
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_68_bw_76_ies_15)8126 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_68_bw_76_ies_15) {
8127 TEST_REQUIRES_ARM_NEON;
8128 TransposeMicrokernelTester()
8129 .input_stride(76)
8130 .output_stride(68)
8131 .block_width(76)
8132 .block_height(68)
8133 .element_size(4)
8134 .input_element_stride(15)
8135 .iterations(1)
8136 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8137 }
8138
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_12_bw_20_oes_15)8139 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_12_bw_20_oes_15) {
8140 TEST_REQUIRES_ARM_NEON;
8141 TransposeMicrokernelTester()
8142 .input_stride(20)
8143 .output_stride(12)
8144 .block_width(20)
8145 .block_height(12)
8146 .element_size(4)
8147 .output_element_stride(15)
8148 .iterations(1)
8149 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8150 }
8151
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_28_bw_92_ies_21_oes_17)8152 TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
8153 TEST_REQUIRES_ARM_NEON;
8154 TransposeMicrokernelTester()
8155 .input_stride(97)
8156 .output_stride(34)
8157 .block_width(92)
8158 .block_height(28)
8159 .element_size(4)
8160 .input_element_stride(21)
8161 .output_element_stride(17)
8162 .iterations(1)
8163 .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8164 }
8165 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8166
8167
8168 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_4_bw_4)8169 TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_4_bw_4) {
8170 TEST_REQUIRES_ARM_NEON;
8171 TransposeMicrokernelTester()
8172 .input_stride(8)
8173 .output_stride(8)
8174 .block_width(4)
8175 .block_height(4)
8176 .element_size(4)
8177 .iterations(1)
8178 .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8179 }
8180
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_1_8_bw_1_8)8181 TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_1_8_bw_1_8) {
8182 TEST_REQUIRES_ARM_NEON;
8183 for(size_t i = 1; i <= 8; ++i){
8184 for(size_t j = 1; j <= 8; ++j){
8185 TransposeMicrokernelTester()
8186 .input_stride(j * 3)
8187 .output_stride(i * 7)
8188 .block_width(j)
8189 .block_height(i)
8190 .element_size(4)
8191 .iterations(1)
8192 .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8193 }
8194 }
8195 }
8196
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_4_bw_8)8197 TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_4_bw_8) {
8198 TEST_REQUIRES_ARM_NEON;
8199 TransposeMicrokernelTester()
8200 .input_stride(8)
8201 .output_stride(4)
8202 .block_width(8)
8203 .block_height(4)
8204 .element_size(4)
8205 .iterations(1)
8206 .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8207 }
8208
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_4_bw_5_8)8209 TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_4_bw_5_8) {
8210 TEST_REQUIRES_ARM_NEON;
8211 for(size_t i = 5; i < 8; ++i){
8212 TransposeMicrokernelTester()
8213 .input_stride(i)
8214 .output_stride(8)
8215 .block_width(i)
8216 .block_height(4)
8217 .element_size(4)
8218 .iterations(1)
8219 .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8220 }
8221 }
8222
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_8_bw_5_8)8223 TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_8_bw_5_8) {
8224 TEST_REQUIRES_ARM_NEON;
8225 for(size_t i = 5; i < 8; ++i){
8226 TransposeMicrokernelTester()
8227 .input_stride(i)
8228 .output_stride(8)
8229 .block_width(i)
8230 .block_height(8)
8231 .element_size(4)
8232 .iterations(1)
8233 .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8234 }
8235 }
8236
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_8_bw_4)8237 TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_8_bw_4) {
8238 TEST_REQUIRES_ARM_NEON;
8239 TransposeMicrokernelTester()
8240 .input_stride(4)
8241 .output_stride(16)
8242 .block_width(4)
8243 .block_height(8)
8244 .element_size(4)
8245 .iterations(1)
8246 .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8247 }
8248
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_5_8_bw_4)8249 TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_5_8_bw_4){
8250 TEST_REQUIRES_ARM_NEON;
8251 for(size_t i = 5; i < 8; ++i){
8252 TransposeMicrokernelTester()
8253 .input_stride(21)
8254 .output_stride(i)
8255 .block_width(7)
8256 .block_height(i)
8257 .element_size(4)
8258 .iterations(1)
8259 .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8260 }
8261 }
8262
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_5_8_bw_8)8263 TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_5_8_bw_8){
8264 TEST_REQUIRES_ARM_NEON;
8265 for(size_t i = 5; i < 8; ++i){
8266 TransposeMicrokernelTester()
8267 .input_stride(8)
8268 .output_stride(i)
8269 .block_width(8)
8270 .block_height(i)
8271 .element_size(4)
8272 .iterations(1)
8273 .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8274 }
8275 }
8276
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_5_8_bw_5_8)8277 TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_5_8_bw_5_8) {
8278 TEST_REQUIRES_ARM_NEON;
8279 for(size_t i = 5; i < 8; ++i){
8280 for(size_t j = 5; j < 8; ++j){
8281 TransposeMicrokernelTester()
8282 .input_stride(j)
8283 .output_stride(i)
8284 .block_width(j)
8285 .block_height(i)
8286 .element_size(4)
8287 .iterations(1)
8288 .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8289 }
8290 }
8291 }
8292
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_4_bw_4_is_8)8293 TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_4_bw_4_is_8) {
8294 TEST_REQUIRES_ARM_NEON;
8295 TransposeMicrokernelTester()
8296 .input_stride(8)
8297 .output_stride(4)
8298 .block_width(4)
8299 .block_height(4)
8300 .element_size(4)
8301 .iterations(1)
8302 .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8303 }
8304
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_4_bw_4_os_8)8305 TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_4_bw_4_os_8) {
8306 TEST_REQUIRES_ARM_NEON;
8307 TransposeMicrokernelTester()
8308 .input_stride(4)
8309 .output_stride(8)
8310 .block_width(4)
8311 .block_height(4)
8312 .element_size(4)
8313 .iterations(1)
8314 .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8315 }
8316
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_4_bw_4_is_8_os_8)8317 TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_4_bw_4_is_8_os_8) {
8318 TEST_REQUIRES_ARM_NEON;
8319 TransposeMicrokernelTester()
8320 .input_stride(8)
8321 .output_stride(8)
8322 .block_width(4)
8323 .block_height(4)
8324 .element_size(4)
8325 .iterations(1)
8326 .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8327 }
8328
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_68_bw_76_ies_15)8329 TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_68_bw_76_ies_15) {
8330 TEST_REQUIRES_ARM_NEON;
8331 TransposeMicrokernelTester()
8332 .input_stride(76)
8333 .output_stride(68)
8334 .block_width(76)
8335 .block_height(68)
8336 .element_size(4)
8337 .input_element_stride(15)
8338 .iterations(1)
8339 .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8340 }
8341
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_12_bw_20_oes_15)8342 TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_12_bw_20_oes_15) {
8343 TEST_REQUIRES_ARM_NEON;
8344 TransposeMicrokernelTester()
8345 .input_stride(20)
8346 .output_stride(12)
8347 .block_width(20)
8348 .block_height(12)
8349 .element_size(4)
8350 .output_element_stride(15)
8351 .iterations(1)
8352 .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8353 }
8354
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_28_bw_92_ies_21_oes_17)8355 TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
8356 TEST_REQUIRES_ARM_NEON;
8357 TransposeMicrokernelTester()
8358 .input_stride(97)
8359 .output_stride(34)
8360 .block_width(92)
8361 .block_height(28)
8362 .element_size(4)
8363 .input_element_stride(21)
8364 .output_element_stride(17)
8365 .iterations(1)
8366 .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8367 }
8368 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8369
8370
8371 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_4_bw_4)8372 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_4_bw_4) {
8373 TEST_REQUIRES_ARM_NEON;
8374 TransposeMicrokernelTester()
8375 .input_stride(8)
8376 .output_stride(8)
8377 .block_width(4)
8378 .block_height(4)
8379 .element_size(4)
8380 .iterations(1)
8381 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8382 }
8383
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_1_8_bw_1_8)8384 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_1_8_bw_1_8) {
8385 TEST_REQUIRES_ARM_NEON;
8386 for(size_t i = 1; i <= 8; ++i){
8387 for(size_t j = 1; j <= 8; ++j){
8388 TransposeMicrokernelTester()
8389 .input_stride(j * 3)
8390 .output_stride(i * 7)
8391 .block_width(j)
8392 .block_height(i)
8393 .element_size(4)
8394 .iterations(1)
8395 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8396 }
8397 }
8398 }
8399
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_4_bw_8)8400 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_4_bw_8) {
8401 TEST_REQUIRES_ARM_NEON;
8402 TransposeMicrokernelTester()
8403 .input_stride(8)
8404 .output_stride(4)
8405 .block_width(8)
8406 .block_height(4)
8407 .element_size(4)
8408 .iterations(1)
8409 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8410 }
8411
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_4_bw_5_8)8412 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_4_bw_5_8) {
8413 TEST_REQUIRES_ARM_NEON;
8414 for(size_t i = 5; i < 8; ++i){
8415 TransposeMicrokernelTester()
8416 .input_stride(i)
8417 .output_stride(8)
8418 .block_width(i)
8419 .block_height(4)
8420 .element_size(4)
8421 .iterations(1)
8422 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8423 }
8424 }
8425
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_8_bw_5_8)8426 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_8_bw_5_8) {
8427 TEST_REQUIRES_ARM_NEON;
8428 for(size_t i = 5; i < 8; ++i){
8429 TransposeMicrokernelTester()
8430 .input_stride(i)
8431 .output_stride(8)
8432 .block_width(i)
8433 .block_height(8)
8434 .element_size(4)
8435 .iterations(1)
8436 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8437 }
8438 }
8439
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_8_bw_4)8440 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_8_bw_4) {
8441 TEST_REQUIRES_ARM_NEON;
8442 TransposeMicrokernelTester()
8443 .input_stride(4)
8444 .output_stride(16)
8445 .block_width(4)
8446 .block_height(8)
8447 .element_size(4)
8448 .iterations(1)
8449 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8450 }
8451
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_5_8_bw_4)8452 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_5_8_bw_4){
8453 TEST_REQUIRES_ARM_NEON;
8454 for(size_t i = 5; i < 8; ++i){
8455 TransposeMicrokernelTester()
8456 .input_stride(21)
8457 .output_stride(i)
8458 .block_width(7)
8459 .block_height(i)
8460 .element_size(4)
8461 .iterations(1)
8462 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8463 }
8464 }
8465
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_5_8_bw_8)8466 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_5_8_bw_8){
8467 TEST_REQUIRES_ARM_NEON;
8468 for(size_t i = 5; i < 8; ++i){
8469 TransposeMicrokernelTester()
8470 .input_stride(8)
8471 .output_stride(i)
8472 .block_width(8)
8473 .block_height(i)
8474 .element_size(4)
8475 .iterations(1)
8476 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8477 }
8478 }
8479
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_5_8_bw_5_8)8480 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_5_8_bw_5_8) {
8481 TEST_REQUIRES_ARM_NEON;
8482 for(size_t i = 5; i < 8; ++i){
8483 for(size_t j = 5; j < 8; ++j){
8484 TransposeMicrokernelTester()
8485 .input_stride(j)
8486 .output_stride(i)
8487 .block_width(j)
8488 .block_height(i)
8489 .element_size(4)
8490 .iterations(1)
8491 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8492 }
8493 }
8494 }
8495
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_4_bw_4_is_8)8496 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_4_bw_4_is_8) {
8497 TEST_REQUIRES_ARM_NEON;
8498 TransposeMicrokernelTester()
8499 .input_stride(8)
8500 .output_stride(4)
8501 .block_width(4)
8502 .block_height(4)
8503 .element_size(4)
8504 .iterations(1)
8505 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8506 }
8507
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_4_bw_4_os_8)8508 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_4_bw_4_os_8) {
8509 TEST_REQUIRES_ARM_NEON;
8510 TransposeMicrokernelTester()
8511 .input_stride(4)
8512 .output_stride(8)
8513 .block_width(4)
8514 .block_height(4)
8515 .element_size(4)
8516 .iterations(1)
8517 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8518 }
8519
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_4_bw_4_is_8_os_8)8520 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_4_bw_4_is_8_os_8) {
8521 TEST_REQUIRES_ARM_NEON;
8522 TransposeMicrokernelTester()
8523 .input_stride(8)
8524 .output_stride(8)
8525 .block_width(4)
8526 .block_height(4)
8527 .element_size(4)
8528 .iterations(1)
8529 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8530 }
8531
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_68_bw_76_ies_15)8532 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_68_bw_76_ies_15) {
8533 TEST_REQUIRES_ARM_NEON;
8534 TransposeMicrokernelTester()
8535 .input_stride(76)
8536 .output_stride(68)
8537 .block_width(76)
8538 .block_height(68)
8539 .element_size(4)
8540 .input_element_stride(15)
8541 .iterations(1)
8542 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8543 }
8544
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_12_bw_20_oes_15)8545 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_12_bw_20_oes_15) {
8546 TEST_REQUIRES_ARM_NEON;
8547 TransposeMicrokernelTester()
8548 .input_stride(20)
8549 .output_stride(12)
8550 .block_width(20)
8551 .block_height(12)
8552 .element_size(4)
8553 .output_element_stride(15)
8554 .iterations(1)
8555 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8556 }
8557
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_28_bw_92_ies_21_oes_17)8558 TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
8559 TEST_REQUIRES_ARM_NEON;
8560 TransposeMicrokernelTester()
8561 .input_stride(97)
8562 .output_stride(34)
8563 .block_width(92)
8564 .block_height(28)
8565 .element_size(4)
8566 .input_element_stride(21)
8567 .output_element_stride(17)
8568 .iterations(1)
8569 .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8570 }
8571 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8572
8573
8574 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_4_bw_4)8575 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_4_bw_4) {
8576 TEST_REQUIRES_ARM_NEON;
8577 TransposeMicrokernelTester()
8578 .input_stride(8)
8579 .output_stride(8)
8580 .block_width(4)
8581 .block_height(4)
8582 .element_size(4)
8583 .iterations(1)
8584 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8585 }
8586
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_1_8_bw_1_8)8587 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_1_8_bw_1_8) {
8588 TEST_REQUIRES_ARM_NEON;
8589 for(size_t i = 1; i <= 8; ++i){
8590 for(size_t j = 1; j <= 8; ++j){
8591 TransposeMicrokernelTester()
8592 .input_stride(j * 3)
8593 .output_stride(i * 7)
8594 .block_width(j)
8595 .block_height(i)
8596 .element_size(4)
8597 .iterations(1)
8598 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8599 }
8600 }
8601 }
8602
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_4_bw_8)8603 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_4_bw_8) {
8604 TEST_REQUIRES_ARM_NEON;
8605 TransposeMicrokernelTester()
8606 .input_stride(8)
8607 .output_stride(4)
8608 .block_width(8)
8609 .block_height(4)
8610 .element_size(4)
8611 .iterations(1)
8612 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8613 }
8614
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_4_bw_5_8)8615 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_4_bw_5_8) {
8616 TEST_REQUIRES_ARM_NEON;
8617 for(size_t i = 5; i < 8; ++i){
8618 TransposeMicrokernelTester()
8619 .input_stride(i)
8620 .output_stride(8)
8621 .block_width(i)
8622 .block_height(4)
8623 .element_size(4)
8624 .iterations(1)
8625 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8626 }
8627 }
8628
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_8_bw_5_8)8629 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_8_bw_5_8) {
8630 TEST_REQUIRES_ARM_NEON;
8631 for(size_t i = 5; i < 8; ++i){
8632 TransposeMicrokernelTester()
8633 .input_stride(i)
8634 .output_stride(8)
8635 .block_width(i)
8636 .block_height(8)
8637 .element_size(4)
8638 .iterations(1)
8639 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8640 }
8641 }
8642
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_8_bw_4)8643 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_8_bw_4) {
8644 TEST_REQUIRES_ARM_NEON;
8645 TransposeMicrokernelTester()
8646 .input_stride(4)
8647 .output_stride(16)
8648 .block_width(4)
8649 .block_height(8)
8650 .element_size(4)
8651 .iterations(1)
8652 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8653 }
8654
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_5_8_bw_4)8655 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_5_8_bw_4){
8656 TEST_REQUIRES_ARM_NEON;
8657 for(size_t i = 5; i < 8; ++i){
8658 TransposeMicrokernelTester()
8659 .input_stride(21)
8660 .output_stride(i)
8661 .block_width(7)
8662 .block_height(i)
8663 .element_size(4)
8664 .iterations(1)
8665 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8666 }
8667 }
8668
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_5_8_bw_8)8669 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_5_8_bw_8){
8670 TEST_REQUIRES_ARM_NEON;
8671 for(size_t i = 5; i < 8; ++i){
8672 TransposeMicrokernelTester()
8673 .input_stride(8)
8674 .output_stride(i)
8675 .block_width(8)
8676 .block_height(i)
8677 .element_size(4)
8678 .iterations(1)
8679 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8680 }
8681 }
8682
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_5_8_bw_5_8)8683 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_5_8_bw_5_8) {
8684 TEST_REQUIRES_ARM_NEON;
8685 for(size_t i = 5; i < 8; ++i){
8686 for(size_t j = 5; j < 8; ++j){
8687 TransposeMicrokernelTester()
8688 .input_stride(j)
8689 .output_stride(i)
8690 .block_width(j)
8691 .block_height(i)
8692 .element_size(4)
8693 .iterations(1)
8694 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8695 }
8696 }
8697 }
8698
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_4_bw_4_is_8)8699 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_4_bw_4_is_8) {
8700 TEST_REQUIRES_ARM_NEON;
8701 TransposeMicrokernelTester()
8702 .input_stride(8)
8703 .output_stride(4)
8704 .block_width(4)
8705 .block_height(4)
8706 .element_size(4)
8707 .iterations(1)
8708 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8709 }
8710
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_4_bw_4_os_8)8711 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_4_bw_4_os_8) {
8712 TEST_REQUIRES_ARM_NEON;
8713 TransposeMicrokernelTester()
8714 .input_stride(4)
8715 .output_stride(8)
8716 .block_width(4)
8717 .block_height(4)
8718 .element_size(4)
8719 .iterations(1)
8720 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8721 }
8722
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_4_bw_4_is_8_os_8)8723 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_4_bw_4_is_8_os_8) {
8724 TEST_REQUIRES_ARM_NEON;
8725 TransposeMicrokernelTester()
8726 .input_stride(8)
8727 .output_stride(8)
8728 .block_width(4)
8729 .block_height(4)
8730 .element_size(4)
8731 .iterations(1)
8732 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8733 }
8734
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_68_bw_76_ies_15)8735 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_68_bw_76_ies_15) {
8736 TEST_REQUIRES_ARM_NEON;
8737 TransposeMicrokernelTester()
8738 .input_stride(76)
8739 .output_stride(68)
8740 .block_width(76)
8741 .block_height(68)
8742 .element_size(4)
8743 .input_element_stride(15)
8744 .iterations(1)
8745 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8746 }
8747
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_12_bw_20_oes_15)8748 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_12_bw_20_oes_15) {
8749 TEST_REQUIRES_ARM_NEON;
8750 TransposeMicrokernelTester()
8751 .input_stride(20)
8752 .output_stride(12)
8753 .block_width(20)
8754 .block_height(12)
8755 .element_size(4)
8756 .output_element_stride(15)
8757 .iterations(1)
8758 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8759 }
8760
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_28_bw_92_ies_21_oes_17)8761 TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
8762 TEST_REQUIRES_ARM_NEON;
8763 TransposeMicrokernelTester()
8764 .input_stride(97)
8765 .output_stride(34)
8766 .block_width(92)
8767 .block_height(28)
8768 .element_size(4)
8769 .input_element_stride(21)
8770 .output_element_stride(17)
8771 .iterations(1)
8772 .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8773 }
8774 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8775
8776
8777 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_4_bw_4)8778 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_4_bw_4) {
8779 TEST_REQUIRES_ARM_NEON;
8780 TransposeMicrokernelTester()
8781 .input_stride(8)
8782 .output_stride(8)
8783 .block_width(4)
8784 .block_height(4)
8785 .element_size(4)
8786 .iterations(1)
8787 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8788 }
8789
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_1_8_bw_1_8)8790 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_1_8_bw_1_8) {
8791 TEST_REQUIRES_ARM_NEON;
8792 for(size_t i = 1; i <= 8; ++i){
8793 for(size_t j = 1; j <= 8; ++j){
8794 TransposeMicrokernelTester()
8795 .input_stride(j * 3)
8796 .output_stride(i * 7)
8797 .block_width(j)
8798 .block_height(i)
8799 .element_size(4)
8800 .iterations(1)
8801 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8802 }
8803 }
8804 }
8805
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_4_bw_8)8806 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_4_bw_8) {
8807 TEST_REQUIRES_ARM_NEON;
8808 TransposeMicrokernelTester()
8809 .input_stride(8)
8810 .output_stride(4)
8811 .block_width(8)
8812 .block_height(4)
8813 .element_size(4)
8814 .iterations(1)
8815 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8816 }
8817
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_4_bw_5_8)8818 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_4_bw_5_8) {
8819 TEST_REQUIRES_ARM_NEON;
8820 for(size_t i = 5; i < 8; ++i){
8821 TransposeMicrokernelTester()
8822 .input_stride(i)
8823 .output_stride(8)
8824 .block_width(i)
8825 .block_height(4)
8826 .element_size(4)
8827 .iterations(1)
8828 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8829 }
8830 }
8831
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_8_bw_5_8)8832 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_8_bw_5_8) {
8833 TEST_REQUIRES_ARM_NEON;
8834 for(size_t i = 5; i < 8; ++i){
8835 TransposeMicrokernelTester()
8836 .input_stride(i)
8837 .output_stride(8)
8838 .block_width(i)
8839 .block_height(8)
8840 .element_size(4)
8841 .iterations(1)
8842 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8843 }
8844 }
8845
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_8_bw_4)8846 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_8_bw_4) {
8847 TEST_REQUIRES_ARM_NEON;
8848 TransposeMicrokernelTester()
8849 .input_stride(4)
8850 .output_stride(16)
8851 .block_width(4)
8852 .block_height(8)
8853 .element_size(4)
8854 .iterations(1)
8855 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8856 }
8857
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_5_8_bw_4)8858 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_5_8_bw_4){
8859 TEST_REQUIRES_ARM_NEON;
8860 for(size_t i = 5; i < 8; ++i){
8861 TransposeMicrokernelTester()
8862 .input_stride(21)
8863 .output_stride(i)
8864 .block_width(7)
8865 .block_height(i)
8866 .element_size(4)
8867 .iterations(1)
8868 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8869 }
8870 }
8871
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_5_8_bw_8)8872 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_5_8_bw_8){
8873 TEST_REQUIRES_ARM_NEON;
8874 for(size_t i = 5; i < 8; ++i){
8875 TransposeMicrokernelTester()
8876 .input_stride(8)
8877 .output_stride(i)
8878 .block_width(8)
8879 .block_height(i)
8880 .element_size(4)
8881 .iterations(1)
8882 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8883 }
8884 }
8885
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_5_8_bw_5_8)8886 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_5_8_bw_5_8) {
8887 TEST_REQUIRES_ARM_NEON;
8888 for(size_t i = 5; i < 8; ++i){
8889 for(size_t j = 5; j < 8; ++j){
8890 TransposeMicrokernelTester()
8891 .input_stride(j)
8892 .output_stride(i)
8893 .block_width(j)
8894 .block_height(i)
8895 .element_size(4)
8896 .iterations(1)
8897 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8898 }
8899 }
8900 }
8901
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_4_bw_4_is_8)8902 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_4_bw_4_is_8) {
8903 TEST_REQUIRES_ARM_NEON;
8904 TransposeMicrokernelTester()
8905 .input_stride(8)
8906 .output_stride(4)
8907 .block_width(4)
8908 .block_height(4)
8909 .element_size(4)
8910 .iterations(1)
8911 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8912 }
8913
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_4_bw_4_os_8)8914 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_4_bw_4_os_8) {
8915 TEST_REQUIRES_ARM_NEON;
8916 TransposeMicrokernelTester()
8917 .input_stride(4)
8918 .output_stride(8)
8919 .block_width(4)
8920 .block_height(4)
8921 .element_size(4)
8922 .iterations(1)
8923 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8924 }
8925
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_4_bw_4_is_8_os_8)8926 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_4_bw_4_is_8_os_8) {
8927 TEST_REQUIRES_ARM_NEON;
8928 TransposeMicrokernelTester()
8929 .input_stride(8)
8930 .output_stride(8)
8931 .block_width(4)
8932 .block_height(4)
8933 .element_size(4)
8934 .iterations(1)
8935 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8936 }
8937
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_68_bw_76_ies_15)8938 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_68_bw_76_ies_15) {
8939 TEST_REQUIRES_ARM_NEON;
8940 TransposeMicrokernelTester()
8941 .input_stride(76)
8942 .output_stride(68)
8943 .block_width(76)
8944 .block_height(68)
8945 .element_size(4)
8946 .input_element_stride(15)
8947 .iterations(1)
8948 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8949 }
8950
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_12_bw_20_oes_15)8951 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_12_bw_20_oes_15) {
8952 TEST_REQUIRES_ARM_NEON;
8953 TransposeMicrokernelTester()
8954 .input_stride(20)
8955 .output_stride(12)
8956 .block_width(20)
8957 .block_height(12)
8958 .element_size(4)
8959 .output_element_stride(15)
8960 .iterations(1)
8961 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8962 }
8963
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_28_bw_92_ies_21_oes_17)8964 TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
8965 TEST_REQUIRES_ARM_NEON;
8966 TransposeMicrokernelTester()
8967 .input_stride(97)
8968 .output_stride(34)
8969 .block_width(92)
8970 .block_height(28)
8971 .element_size(4)
8972 .input_element_stride(21)
8973 .output_element_stride(17)
8974 .iterations(1)
8975 .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8976 }
8977 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8978