xref: /aosp_15_r20/external/XNNPACK/test/x32-transpose.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/x32-transpose.yaml
8 //   Generator: tools/generate-transpose-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/transpose.h>
17 #include "transpose-microkernel-tester.h"
18 
19 
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_1_bw_2)20 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_1_bw_2) {
21   TransposeMicrokernelTester()
22     .input_stride(4)
23     .output_stride(2)
24     .block_width(2)
25     .block_height(1)
26     .element_size(4)
27     .iterations(1)
28     .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
29 }
30 
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_1_2_bw_1_4)31 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_1_2_bw_1_4) {
32   for(size_t i = 1; i <= 2; ++i){
33     for(size_t j = 1; j <= 4; ++j){
34       TransposeMicrokernelTester()
35         .input_stride(j * 3)
36         .output_stride(i * 7)
37         .block_width(j)
38         .block_height(i)
39         .element_size(4)
40         .iterations(1)
41         .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
42     }
43   }
44 }
45 
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_1_bw_4)46 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_1_bw_4) {
47   TransposeMicrokernelTester()
48     .input_stride(4)
49     .output_stride(1)
50     .block_width(4)
51     .block_height(1)
52     .element_size(4)
53     .iterations(1)
54     .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
55 }
56 
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_1_bw_3_4)57 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_1_bw_3_4) {
58   for(size_t i = 3; i < 4; ++i){
59     TransposeMicrokernelTester()
60       .input_stride(i)
61       .output_stride(2)
62       .block_width(i)
63       .block_height(1)
64       .element_size(4)
65       .iterations(1)
66       .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
67   }
68 }
69 
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_2_bw_3_4)70 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_2_bw_3_4) {
71   for(size_t i = 3; i < 4; ++i){
72     TransposeMicrokernelTester()
73       .input_stride(i)
74       .output_stride(2)
75       .block_width(i)
76       .block_height(2)
77       .element_size(4)
78       .iterations(1)
79       .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
80   }
81 }
82 
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_2_bw_2)83 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_2_bw_2) {
84   TransposeMicrokernelTester()
85     .input_stride(2)
86     .output_stride(7)
87     .block_width(2)
88     .block_height(2)
89     .element_size(4)
90     .iterations(1)
91     .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
92 }
93 
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_2_2_bw_2)94 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_2_2_bw_2){
95   for(size_t i = 2; i < 2; ++i){
96     TransposeMicrokernelTester()
97       .input_stride(19)
98       .output_stride(i)
99       .block_width(5)
100       .block_height(i)
101       .element_size(4)
102       .iterations(1)
103       .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
104   }
105 }
106 
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_2_2_bw_4)107 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_2_2_bw_4){
108   for(size_t i = 2; i < 2; ++i){
109     TransposeMicrokernelTester()
110       .input_stride(4)
111       .output_stride(i)
112       .block_width(4)
113       .block_height(i)
114       .element_size(4)
115       .iterations(1)
116       .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
117   }
118 }
119 
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_2_2_bw_3_4)120 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_2_2_bw_3_4) {
121   for(size_t i = 2; i < 2; ++i){
122     for(size_t j = 3; j < 4; ++j){
123       TransposeMicrokernelTester()
124         .input_stride(j)
125         .output_stride(i)
126         .block_width(j)
127         .block_height(i)
128         .element_size(4)
129         .iterations(1)
130         .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
131     }
132   }
133 }
134 
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_1_bw_2_is_4)135 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_1_bw_2_is_4) {
136   TransposeMicrokernelTester()
137     .input_stride(4)
138     .output_stride(1)
139     .block_width(2)
140     .block_height(1)
141     .element_size(4)
142     .iterations(1)
143     .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
144 }
145 
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_1_bw_2_os_2)146 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_1_bw_2_os_2) {
147   TransposeMicrokernelTester()
148     .input_stride(2)
149     .output_stride(2)
150     .block_width(2)
151     .block_height(1)
152     .element_size(4)
153     .iterations(1)
154     .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
155 }
156 
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_1_bw_2_is_4_os_2)157 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_1_bw_2_is_4_os_2) {
158   TransposeMicrokernelTester()
159     .input_stride(4)
160     .output_stride(2)
161     .block_width(2)
162     .block_height(1)
163     .element_size(4)
164     .iterations(1)
165     .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
166 }
167 
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_17_bw_38_ies_15)168 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_17_bw_38_ies_15) {
169   TransposeMicrokernelTester()
170     .input_stride(38)
171     .output_stride(17)
172     .block_width(38)
173     .block_height(17)
174     .element_size(4)
175     .input_element_stride(15)
176     .iterations(1)
177     .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
178 }
179 
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_3_bw_10_oes_15)180 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_3_bw_10_oes_15) {
181   TransposeMicrokernelTester()
182     .input_stride(10)
183     .output_stride(3)
184     .block_width(10)
185     .block_height(3)
186     .element_size(4)
187     .output_element_stride(15)
188     .iterations(1)
189     .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
190 }
191 
TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4,bh_7_bw_46_ies_21_oes_17)192 TEST(X32_TRANSPOSEC__1X2_SCALAR_FLOAT_4, bh_7_bw_46_ies_21_oes_17) {
193   TransposeMicrokernelTester()
194     .input_stride(51)
195     .output_stride(13)
196     .block_width(46)
197     .block_height(7)
198     .element_size(4)
199     .input_element_stride(21)
200     .output_element_stride(17)
201     .iterations(1)
202     .Test(xnn_x32_transposec_ukernel__1x2_scalar_float);
203 }
204 
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_1_bw_2)205 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_1_bw_2) {
206   TransposeMicrokernelTester()
207     .input_stride(4)
208     .output_stride(2)
209     .block_width(2)
210     .block_height(1)
211     .element_size(4)
212     .iterations(1)
213     .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
214 }
215 
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_1_2_bw_1_4)216 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_1_2_bw_1_4) {
217   for(size_t i = 1; i <= 2; ++i){
218     for(size_t j = 1; j <= 4; ++j){
219       TransposeMicrokernelTester()
220         .input_stride(j * 3)
221         .output_stride(i * 7)
222         .block_width(j)
223         .block_height(i)
224         .element_size(4)
225         .iterations(1)
226         .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
227     }
228   }
229 }
230 
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_1_bw_4)231 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_1_bw_4) {
232   TransposeMicrokernelTester()
233     .input_stride(4)
234     .output_stride(1)
235     .block_width(4)
236     .block_height(1)
237     .element_size(4)
238     .iterations(1)
239     .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
240 }
241 
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_1_bw_3_4)242 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_1_bw_3_4) {
243   for(size_t i = 3; i < 4; ++i){
244     TransposeMicrokernelTester()
245       .input_stride(i)
246       .output_stride(2)
247       .block_width(i)
248       .block_height(1)
249       .element_size(4)
250       .iterations(1)
251       .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
252   }
253 }
254 
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_2_bw_3_4)255 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_2_bw_3_4) {
256   for(size_t i = 3; i < 4; ++i){
257     TransposeMicrokernelTester()
258       .input_stride(i)
259       .output_stride(2)
260       .block_width(i)
261       .block_height(2)
262       .element_size(4)
263       .iterations(1)
264       .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
265   }
266 }
267 
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_2_bw_2)268 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_2_bw_2) {
269   TransposeMicrokernelTester()
270     .input_stride(2)
271     .output_stride(7)
272     .block_width(2)
273     .block_height(2)
274     .element_size(4)
275     .iterations(1)
276     .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
277 }
278 
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_2_2_bw_2)279 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_2_2_bw_2){
280   for(size_t i = 2; i < 2; ++i){
281     TransposeMicrokernelTester()
282       .input_stride(19)
283       .output_stride(i)
284       .block_width(5)
285       .block_height(i)
286       .element_size(4)
287       .iterations(1)
288       .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
289   }
290 }
291 
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_2_2_bw_4)292 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_2_2_bw_4){
293   for(size_t i = 2; i < 2; ++i){
294     TransposeMicrokernelTester()
295       .input_stride(4)
296       .output_stride(i)
297       .block_width(4)
298       .block_height(i)
299       .element_size(4)
300       .iterations(1)
301       .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
302   }
303 }
304 
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_2_2_bw_3_4)305 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_2_2_bw_3_4) {
306   for(size_t i = 2; i < 2; ++i){
307     for(size_t j = 3; j < 4; ++j){
308       TransposeMicrokernelTester()
309         .input_stride(j)
310         .output_stride(i)
311         .block_width(j)
312         .block_height(i)
313         .element_size(4)
314         .iterations(1)
315         .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
316     }
317   }
318 }
319 
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_1_bw_2_is_4)320 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_1_bw_2_is_4) {
321   TransposeMicrokernelTester()
322     .input_stride(4)
323     .output_stride(1)
324     .block_width(2)
325     .block_height(1)
326     .element_size(4)
327     .iterations(1)
328     .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
329 }
330 
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_1_bw_2_os_2)331 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_1_bw_2_os_2) {
332   TransposeMicrokernelTester()
333     .input_stride(2)
334     .output_stride(2)
335     .block_width(2)
336     .block_height(1)
337     .element_size(4)
338     .iterations(1)
339     .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
340 }
341 
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_1_bw_2_is_4_os_2)342 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_1_bw_2_is_4_os_2) {
343   TransposeMicrokernelTester()
344     .input_stride(4)
345     .output_stride(2)
346     .block_width(2)
347     .block_height(1)
348     .element_size(4)
349     .iterations(1)
350     .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
351 }
352 
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_17_bw_38_ies_15)353 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_17_bw_38_ies_15) {
354   TransposeMicrokernelTester()
355     .input_stride(38)
356     .output_stride(17)
357     .block_width(38)
358     .block_height(17)
359     .element_size(4)
360     .input_element_stride(15)
361     .iterations(1)
362     .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
363 }
364 
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_3_bw_10_oes_15)365 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_3_bw_10_oes_15) {
366   TransposeMicrokernelTester()
367     .input_stride(10)
368     .output_stride(3)
369     .block_width(10)
370     .block_height(3)
371     .element_size(4)
372     .output_element_stride(15)
373     .iterations(1)
374     .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
375 }
376 
TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4,bh_7_bw_46_ies_21_oes_17)377 TEST(X32_TRANSPOSEC__1X2_SCALAR_INT_4, bh_7_bw_46_ies_21_oes_17) {
378   TransposeMicrokernelTester()
379     .input_stride(51)
380     .output_stride(13)
381     .block_width(46)
382     .block_height(7)
383     .element_size(4)
384     .input_element_stride(21)
385     .output_element_stride(17)
386     .iterations(1)
387     .Test(xnn_x32_transposec_ukernel__1x2_scalar_int);
388 }
389 
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_1_bw_4)390 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_1_bw_4) {
391   TransposeMicrokernelTester()
392     .input_stride(8)
393     .output_stride(2)
394     .block_width(4)
395     .block_height(1)
396     .element_size(4)
397     .iterations(1)
398     .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
399 }
400 
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_1_2_bw_1_8)401 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_1_2_bw_1_8) {
402   for(size_t i = 1; i <= 2; ++i){
403     for(size_t j = 1; j <= 8; ++j){
404       TransposeMicrokernelTester()
405         .input_stride(j * 3)
406         .output_stride(i * 7)
407         .block_width(j)
408         .block_height(i)
409         .element_size(4)
410         .iterations(1)
411         .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
412     }
413   }
414 }
415 
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_1_bw_8)416 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_1_bw_8) {
417   TransposeMicrokernelTester()
418     .input_stride(8)
419     .output_stride(1)
420     .block_width(8)
421     .block_height(1)
422     .element_size(4)
423     .iterations(1)
424     .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
425 }
426 
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_1_bw_5_8)427 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_1_bw_5_8) {
428   for(size_t i = 5; i < 8; ++i){
429     TransposeMicrokernelTester()
430       .input_stride(i)
431       .output_stride(2)
432       .block_width(i)
433       .block_height(1)
434       .element_size(4)
435       .iterations(1)
436       .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
437   }
438 }
439 
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_2_bw_5_8)440 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_2_bw_5_8) {
441   for(size_t i = 5; i < 8; ++i){
442     TransposeMicrokernelTester()
443       .input_stride(i)
444       .output_stride(2)
445       .block_width(i)
446       .block_height(2)
447       .element_size(4)
448       .iterations(1)
449       .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
450   }
451 }
452 
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_2_bw_4)453 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_2_bw_4) {
454   TransposeMicrokernelTester()
455     .input_stride(4)
456     .output_stride(7)
457     .block_width(4)
458     .block_height(2)
459     .element_size(4)
460     .iterations(1)
461     .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
462 }
463 
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_2_2_bw_4)464 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_2_2_bw_4){
465   for(size_t i = 2; i < 2; ++i){
466     TransposeMicrokernelTester()
467       .input_stride(21)
468       .output_stride(i)
469       .block_width(7)
470       .block_height(i)
471       .element_size(4)
472       .iterations(1)
473       .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
474   }
475 }
476 
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_2_2_bw_8)477 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_2_2_bw_8){
478   for(size_t i = 2; i < 2; ++i){
479     TransposeMicrokernelTester()
480       .input_stride(8)
481       .output_stride(i)
482       .block_width(8)
483       .block_height(i)
484       .element_size(4)
485       .iterations(1)
486       .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
487   }
488 }
489 
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_2_2_bw_5_8)490 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_2_2_bw_5_8) {
491   for(size_t i = 2; i < 2; ++i){
492     for(size_t j = 5; j < 8; ++j){
493       TransposeMicrokernelTester()
494         .input_stride(j)
495         .output_stride(i)
496         .block_width(j)
497         .block_height(i)
498         .element_size(4)
499         .iterations(1)
500         .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
501     }
502   }
503 }
504 
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_1_bw_4_is_8)505 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_1_bw_4_is_8) {
506   TransposeMicrokernelTester()
507     .input_stride(8)
508     .output_stride(1)
509     .block_width(4)
510     .block_height(1)
511     .element_size(4)
512     .iterations(1)
513     .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
514 }
515 
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_1_bw_4_os_2)516 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_1_bw_4_os_2) {
517   TransposeMicrokernelTester()
518     .input_stride(4)
519     .output_stride(2)
520     .block_width(4)
521     .block_height(1)
522     .element_size(4)
523     .iterations(1)
524     .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
525 }
526 
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_1_bw_4_is_8_os_2)527 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_1_bw_4_is_8_os_2) {
528   TransposeMicrokernelTester()
529     .input_stride(8)
530     .output_stride(2)
531     .block_width(4)
532     .block_height(1)
533     .element_size(4)
534     .iterations(1)
535     .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
536 }
537 
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_17_bw_76_ies_15)538 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_17_bw_76_ies_15) {
539   TransposeMicrokernelTester()
540     .input_stride(76)
541     .output_stride(17)
542     .block_width(76)
543     .block_height(17)
544     .element_size(4)
545     .input_element_stride(15)
546     .iterations(1)
547     .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
548 }
549 
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_3_bw_20_oes_15)550 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_3_bw_20_oes_15) {
551   TransposeMicrokernelTester()
552     .input_stride(20)
553     .output_stride(3)
554     .block_width(20)
555     .block_height(3)
556     .element_size(4)
557     .output_element_stride(15)
558     .iterations(1)
559     .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
560 }
561 
TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4,bh_7_bw_92_ies_21_oes_17)562 TEST(X32_TRANSPOSEC__1X4_SCALAR_FLOAT_4, bh_7_bw_92_ies_21_oes_17) {
563   TransposeMicrokernelTester()
564     .input_stride(97)
565     .output_stride(13)
566     .block_width(92)
567     .block_height(7)
568     .element_size(4)
569     .input_element_stride(21)
570     .output_element_stride(17)
571     .iterations(1)
572     .Test(xnn_x32_transposec_ukernel__1x4_scalar_float);
573 }
574 
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_1_bw_4)575 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_1_bw_4) {
576   TransposeMicrokernelTester()
577     .input_stride(8)
578     .output_stride(2)
579     .block_width(4)
580     .block_height(1)
581     .element_size(4)
582     .iterations(1)
583     .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
584 }
585 
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_1_2_bw_1_8)586 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_1_2_bw_1_8) {
587   for(size_t i = 1; i <= 2; ++i){
588     for(size_t j = 1; j <= 8; ++j){
589       TransposeMicrokernelTester()
590         .input_stride(j * 3)
591         .output_stride(i * 7)
592         .block_width(j)
593         .block_height(i)
594         .element_size(4)
595         .iterations(1)
596         .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
597     }
598   }
599 }
600 
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_1_bw_8)601 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_1_bw_8) {
602   TransposeMicrokernelTester()
603     .input_stride(8)
604     .output_stride(1)
605     .block_width(8)
606     .block_height(1)
607     .element_size(4)
608     .iterations(1)
609     .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
610 }
611 
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_1_bw_5_8)612 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_1_bw_5_8) {
613   for(size_t i = 5; i < 8; ++i){
614     TransposeMicrokernelTester()
615       .input_stride(i)
616       .output_stride(2)
617       .block_width(i)
618       .block_height(1)
619       .element_size(4)
620       .iterations(1)
621       .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
622   }
623 }
624 
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_2_bw_5_8)625 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_2_bw_5_8) {
626   for(size_t i = 5; i < 8; ++i){
627     TransposeMicrokernelTester()
628       .input_stride(i)
629       .output_stride(2)
630       .block_width(i)
631       .block_height(2)
632       .element_size(4)
633       .iterations(1)
634       .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
635   }
636 }
637 
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_2_bw_4)638 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_2_bw_4) {
639   TransposeMicrokernelTester()
640     .input_stride(4)
641     .output_stride(7)
642     .block_width(4)
643     .block_height(2)
644     .element_size(4)
645     .iterations(1)
646     .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
647 }
648 
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_2_2_bw_4)649 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_2_2_bw_4){
650   for(size_t i = 2; i < 2; ++i){
651     TransposeMicrokernelTester()
652       .input_stride(21)
653       .output_stride(i)
654       .block_width(7)
655       .block_height(i)
656       .element_size(4)
657       .iterations(1)
658       .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
659   }
660 }
661 
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_2_2_bw_8)662 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_2_2_bw_8){
663   for(size_t i = 2; i < 2; ++i){
664     TransposeMicrokernelTester()
665       .input_stride(8)
666       .output_stride(i)
667       .block_width(8)
668       .block_height(i)
669       .element_size(4)
670       .iterations(1)
671       .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
672   }
673 }
674 
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_2_2_bw_5_8)675 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_2_2_bw_5_8) {
676   for(size_t i = 2; i < 2; ++i){
677     for(size_t j = 5; j < 8; ++j){
678       TransposeMicrokernelTester()
679         .input_stride(j)
680         .output_stride(i)
681         .block_width(j)
682         .block_height(i)
683         .element_size(4)
684         .iterations(1)
685         .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
686     }
687   }
688 }
689 
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_1_bw_4_is_8)690 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_1_bw_4_is_8) {
691   TransposeMicrokernelTester()
692     .input_stride(8)
693     .output_stride(1)
694     .block_width(4)
695     .block_height(1)
696     .element_size(4)
697     .iterations(1)
698     .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
699 }
700 
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_1_bw_4_os_2)701 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_1_bw_4_os_2) {
702   TransposeMicrokernelTester()
703     .input_stride(4)
704     .output_stride(2)
705     .block_width(4)
706     .block_height(1)
707     .element_size(4)
708     .iterations(1)
709     .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
710 }
711 
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_1_bw_4_is_8_os_2)712 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_1_bw_4_is_8_os_2) {
713   TransposeMicrokernelTester()
714     .input_stride(8)
715     .output_stride(2)
716     .block_width(4)
717     .block_height(1)
718     .element_size(4)
719     .iterations(1)
720     .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
721 }
722 
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_17_bw_76_ies_15)723 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_17_bw_76_ies_15) {
724   TransposeMicrokernelTester()
725     .input_stride(76)
726     .output_stride(17)
727     .block_width(76)
728     .block_height(17)
729     .element_size(4)
730     .input_element_stride(15)
731     .iterations(1)
732     .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
733 }
734 
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_3_bw_20_oes_15)735 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_3_bw_20_oes_15) {
736   TransposeMicrokernelTester()
737     .input_stride(20)
738     .output_stride(3)
739     .block_width(20)
740     .block_height(3)
741     .element_size(4)
742     .output_element_stride(15)
743     .iterations(1)
744     .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
745 }
746 
TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4,bh_7_bw_92_ies_21_oes_17)747 TEST(X32_TRANSPOSEC__1X4_SCALAR_INT_4, bh_7_bw_92_ies_21_oes_17) {
748   TransposeMicrokernelTester()
749     .input_stride(97)
750     .output_stride(13)
751     .block_width(92)
752     .block_height(7)
753     .element_size(4)
754     .input_element_stride(21)
755     .output_element_stride(17)
756     .iterations(1)
757     .Test(xnn_x32_transposec_ukernel__1x4_scalar_int);
758 }
759 
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_2_bw_1)760 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_2_bw_1) {
761   TransposeMicrokernelTester()
762     .input_stride(2)
763     .output_stride(4)
764     .block_width(1)
765     .block_height(2)
766     .element_size(4)
767     .iterations(1)
768     .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
769 }
770 
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_1_4_bw_1_2)771 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_1_4_bw_1_2) {
772   for(size_t i = 1; i <= 4; ++i){
773     for(size_t j = 1; j <= 2; ++j){
774       TransposeMicrokernelTester()
775         .input_stride(j * 3)
776         .output_stride(i * 7)
777         .block_width(j)
778         .block_height(i)
779         .element_size(4)
780         .iterations(1)
781         .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
782     }
783   }
784 }
785 
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_2_bw_2)786 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_2_bw_2) {
787   TransposeMicrokernelTester()
788     .input_stride(2)
789     .output_stride(2)
790     .block_width(2)
791     .block_height(2)
792     .element_size(4)
793     .iterations(1)
794     .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
795 }
796 
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_2_bw_2_2)797 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_2_bw_2_2) {
798   for(size_t i = 2; i < 2; ++i){
799     TransposeMicrokernelTester()
800       .input_stride(i)
801       .output_stride(4)
802       .block_width(i)
803       .block_height(2)
804       .element_size(4)
805       .iterations(1)
806       .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
807   }
808 }
809 
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_4_bw_2_2)810 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_4_bw_2_2) {
811   for(size_t i = 2; i < 2; ++i){
812     TransposeMicrokernelTester()
813       .input_stride(i)
814       .output_stride(4)
815       .block_width(i)
816       .block_height(4)
817       .element_size(4)
818       .iterations(1)
819       .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
820   }
821 }
822 
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_4_bw_1)823 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_4_bw_1) {
824   TransposeMicrokernelTester()
825     .input_stride(1)
826     .output_stride(10)
827     .block_width(1)
828     .block_height(4)
829     .element_size(4)
830     .iterations(1)
831     .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
832 }
833 
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_3_4_bw_1)834 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_3_4_bw_1){
835   for(size_t i = 3; i < 4; ++i){
836     TransposeMicrokernelTester()
837       .input_stride(18)
838       .output_stride(i)
839       .block_width(4)
840       .block_height(i)
841       .element_size(4)
842       .iterations(1)
843       .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
844   }
845 }
846 
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_3_4_bw_2)847 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_3_4_bw_2){
848   for(size_t i = 3; i < 4; ++i){
849     TransposeMicrokernelTester()
850       .input_stride(2)
851       .output_stride(i)
852       .block_width(2)
853       .block_height(i)
854       .element_size(4)
855       .iterations(1)
856       .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
857   }
858 }
859 
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_3_4_bw_2_2)860 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_3_4_bw_2_2) {
861   for(size_t i = 3; i < 4; ++i){
862     for(size_t j = 2; j < 2; ++j){
863       TransposeMicrokernelTester()
864         .input_stride(j)
865         .output_stride(i)
866         .block_width(j)
867         .block_height(i)
868         .element_size(4)
869         .iterations(1)
870         .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
871     }
872   }
873 }
874 
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_2_bw_1_is_2)875 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_2_bw_1_is_2) {
876   TransposeMicrokernelTester()
877     .input_stride(2)
878     .output_stride(2)
879     .block_width(1)
880     .block_height(2)
881     .element_size(4)
882     .iterations(1)
883     .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
884 }
885 
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_2_bw_1_os_4)886 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_2_bw_1_os_4) {
887   TransposeMicrokernelTester()
888     .input_stride(1)
889     .output_stride(4)
890     .block_width(1)
891     .block_height(2)
892     .element_size(4)
893     .iterations(1)
894     .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
895 }
896 
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_2_bw_1_is_2_os_4)897 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_2_bw_1_is_2_os_4) {
898   TransposeMicrokernelTester()
899     .input_stride(2)
900     .output_stride(4)
901     .block_width(1)
902     .block_height(2)
903     .element_size(4)
904     .iterations(1)
905     .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
906 }
907 
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_34_bw_19_ies_15)908 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_34_bw_19_ies_15) {
909   TransposeMicrokernelTester()
910     .input_stride(19)
911     .output_stride(34)
912     .block_width(19)
913     .block_height(34)
914     .element_size(4)
915     .input_element_stride(15)
916     .iterations(1)
917     .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
918 }
919 
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_6_bw_5_oes_15)920 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_6_bw_5_oes_15) {
921   TransposeMicrokernelTester()
922     .input_stride(5)
923     .output_stride(6)
924     .block_width(5)
925     .block_height(6)
926     .element_size(4)
927     .output_element_stride(15)
928     .iterations(1)
929     .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
930 }
931 
TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4,bh_14_bw_23_ies_21_oes_17)932 TEST(X32_TRANSPOSEC__2X1_SCALAR_FLOAT_4, bh_14_bw_23_ies_21_oes_17) {
933   TransposeMicrokernelTester()
934     .input_stride(28)
935     .output_stride(20)
936     .block_width(23)
937     .block_height(14)
938     .element_size(4)
939     .input_element_stride(21)
940     .output_element_stride(17)
941     .iterations(1)
942     .Test(xnn_x32_transposec_ukernel__2x1_scalar_float);
943 }
944 
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_2_bw_1)945 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_2_bw_1) {
946   TransposeMicrokernelTester()
947     .input_stride(2)
948     .output_stride(4)
949     .block_width(1)
950     .block_height(2)
951     .element_size(4)
952     .iterations(1)
953     .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
954 }
955 
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_1_4_bw_1_2)956 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_1_4_bw_1_2) {
957   for(size_t i = 1; i <= 4; ++i){
958     for(size_t j = 1; j <= 2; ++j){
959       TransposeMicrokernelTester()
960         .input_stride(j * 3)
961         .output_stride(i * 7)
962         .block_width(j)
963         .block_height(i)
964         .element_size(4)
965         .iterations(1)
966         .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
967     }
968   }
969 }
970 
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_2_bw_2)971 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_2_bw_2) {
972   TransposeMicrokernelTester()
973     .input_stride(2)
974     .output_stride(2)
975     .block_width(2)
976     .block_height(2)
977     .element_size(4)
978     .iterations(1)
979     .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
980 }
981 
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_2_bw_2_2)982 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_2_bw_2_2) {
983   for(size_t i = 2; i < 2; ++i){
984     TransposeMicrokernelTester()
985       .input_stride(i)
986       .output_stride(4)
987       .block_width(i)
988       .block_height(2)
989       .element_size(4)
990       .iterations(1)
991       .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
992   }
993 }
994 
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_4_bw_2_2)995 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_4_bw_2_2) {
996   for(size_t i = 2; i < 2; ++i){
997     TransposeMicrokernelTester()
998       .input_stride(i)
999       .output_stride(4)
1000       .block_width(i)
1001       .block_height(4)
1002       .element_size(4)
1003       .iterations(1)
1004       .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1005   }
1006 }
1007 
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_4_bw_1)1008 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_4_bw_1) {
1009   TransposeMicrokernelTester()
1010     .input_stride(1)
1011     .output_stride(10)
1012     .block_width(1)
1013     .block_height(4)
1014     .element_size(4)
1015     .iterations(1)
1016     .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1017 }
1018 
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_3_4_bw_1)1019 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_3_4_bw_1){
1020   for(size_t i = 3; i < 4; ++i){
1021     TransposeMicrokernelTester()
1022       .input_stride(18)
1023       .output_stride(i)
1024       .block_width(4)
1025       .block_height(i)
1026       .element_size(4)
1027       .iterations(1)
1028       .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1029   }
1030 }
1031 
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_3_4_bw_2)1032 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_3_4_bw_2){
1033   for(size_t i = 3; i < 4; ++i){
1034     TransposeMicrokernelTester()
1035       .input_stride(2)
1036       .output_stride(i)
1037       .block_width(2)
1038       .block_height(i)
1039       .element_size(4)
1040       .iterations(1)
1041       .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1042   }
1043 }
1044 
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_3_4_bw_2_2)1045 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_3_4_bw_2_2) {
1046   for(size_t i = 3; i < 4; ++i){
1047     for(size_t j = 2; j < 2; ++j){
1048       TransposeMicrokernelTester()
1049         .input_stride(j)
1050         .output_stride(i)
1051         .block_width(j)
1052         .block_height(i)
1053         .element_size(4)
1054         .iterations(1)
1055         .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1056     }
1057   }
1058 }
1059 
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_2_bw_1_is_2)1060 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_2_bw_1_is_2) {
1061   TransposeMicrokernelTester()
1062     .input_stride(2)
1063     .output_stride(2)
1064     .block_width(1)
1065     .block_height(2)
1066     .element_size(4)
1067     .iterations(1)
1068     .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1069 }
1070 
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_2_bw_1_os_4)1071 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_2_bw_1_os_4) {
1072   TransposeMicrokernelTester()
1073     .input_stride(1)
1074     .output_stride(4)
1075     .block_width(1)
1076     .block_height(2)
1077     .element_size(4)
1078     .iterations(1)
1079     .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1080 }
1081 
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_2_bw_1_is_2_os_4)1082 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_2_bw_1_is_2_os_4) {
1083   TransposeMicrokernelTester()
1084     .input_stride(2)
1085     .output_stride(4)
1086     .block_width(1)
1087     .block_height(2)
1088     .element_size(4)
1089     .iterations(1)
1090     .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1091 }
1092 
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_34_bw_19_ies_15)1093 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_34_bw_19_ies_15) {
1094   TransposeMicrokernelTester()
1095     .input_stride(19)
1096     .output_stride(34)
1097     .block_width(19)
1098     .block_height(34)
1099     .element_size(4)
1100     .input_element_stride(15)
1101     .iterations(1)
1102     .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1103 }
1104 
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_6_bw_5_oes_15)1105 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_6_bw_5_oes_15) {
1106   TransposeMicrokernelTester()
1107     .input_stride(5)
1108     .output_stride(6)
1109     .block_width(5)
1110     .block_height(6)
1111     .element_size(4)
1112     .output_element_stride(15)
1113     .iterations(1)
1114     .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1115 }
1116 
TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4,bh_14_bw_23_ies_21_oes_17)1117 TEST(X32_TRANSPOSEC__2X1_SCALAR_INT_4, bh_14_bw_23_ies_21_oes_17) {
1118   TransposeMicrokernelTester()
1119     .input_stride(28)
1120     .output_stride(20)
1121     .block_width(23)
1122     .block_height(14)
1123     .element_size(4)
1124     .input_element_stride(21)
1125     .output_element_stride(17)
1126     .iterations(1)
1127     .Test(xnn_x32_transposec_ukernel__2x1_scalar_int);
1128 }
1129 
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_2_bw_2)1130 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_2_bw_2) {
1131   TransposeMicrokernelTester()
1132     .input_stride(4)
1133     .output_stride(4)
1134     .block_width(2)
1135     .block_height(2)
1136     .element_size(4)
1137     .iterations(1)
1138     .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1139 }
1140 
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_1_4_bw_1_4)1141 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_1_4_bw_1_4) {
1142   for(size_t i = 1; i <= 4; ++i){
1143     for(size_t j = 1; j <= 4; ++j){
1144       TransposeMicrokernelTester()
1145         .input_stride(j * 3)
1146         .output_stride(i * 7)
1147         .block_width(j)
1148         .block_height(i)
1149         .element_size(4)
1150         .iterations(1)
1151         .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1152     }
1153   }
1154 }
1155 
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_2_bw_4)1156 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_2_bw_4) {
1157   TransposeMicrokernelTester()
1158     .input_stride(4)
1159     .output_stride(2)
1160     .block_width(4)
1161     .block_height(2)
1162     .element_size(4)
1163     .iterations(1)
1164     .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1165 }
1166 
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_2_bw_3_4)1167 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_2_bw_3_4) {
1168   for(size_t i = 3; i < 4; ++i){
1169     TransposeMicrokernelTester()
1170       .input_stride(i)
1171       .output_stride(4)
1172       .block_width(i)
1173       .block_height(2)
1174       .element_size(4)
1175       .iterations(1)
1176       .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1177   }
1178 }
1179 
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_4_bw_3_4)1180 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_4_bw_3_4) {
1181   for(size_t i = 3; i < 4; ++i){
1182     TransposeMicrokernelTester()
1183       .input_stride(i)
1184       .output_stride(4)
1185       .block_width(i)
1186       .block_height(4)
1187       .element_size(4)
1188       .iterations(1)
1189       .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1190   }
1191 }
1192 
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_4_bw_2)1193 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_4_bw_2) {
1194   TransposeMicrokernelTester()
1195     .input_stride(2)
1196     .output_stride(10)
1197     .block_width(2)
1198     .block_height(4)
1199     .element_size(4)
1200     .iterations(1)
1201     .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1202 }
1203 
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_3_4_bw_2)1204 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_3_4_bw_2){
1205   for(size_t i = 3; i < 4; ++i){
1206     TransposeMicrokernelTester()
1207       .input_stride(19)
1208       .output_stride(i)
1209       .block_width(5)
1210       .block_height(i)
1211       .element_size(4)
1212       .iterations(1)
1213       .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1214   }
1215 }
1216 
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_3_4_bw_4)1217 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_3_4_bw_4){
1218   for(size_t i = 3; i < 4; ++i){
1219     TransposeMicrokernelTester()
1220       .input_stride(4)
1221       .output_stride(i)
1222       .block_width(4)
1223       .block_height(i)
1224       .element_size(4)
1225       .iterations(1)
1226       .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1227   }
1228 }
1229 
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_3_4_bw_3_4)1230 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_3_4_bw_3_4) {
1231   for(size_t i = 3; i < 4; ++i){
1232     for(size_t j = 3; j < 4; ++j){
1233       TransposeMicrokernelTester()
1234         .input_stride(j)
1235         .output_stride(i)
1236         .block_width(j)
1237         .block_height(i)
1238         .element_size(4)
1239         .iterations(1)
1240         .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1241     }
1242   }
1243 }
1244 
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_2_bw_2_is_4)1245 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_2_bw_2_is_4) {
1246   TransposeMicrokernelTester()
1247     .input_stride(4)
1248     .output_stride(2)
1249     .block_width(2)
1250     .block_height(2)
1251     .element_size(4)
1252     .iterations(1)
1253     .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1254 }
1255 
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_2_bw_2_os_4)1256 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_2_bw_2_os_4) {
1257   TransposeMicrokernelTester()
1258     .input_stride(2)
1259     .output_stride(4)
1260     .block_width(2)
1261     .block_height(2)
1262     .element_size(4)
1263     .iterations(1)
1264     .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1265 }
1266 
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_2_bw_2_is_4_os_4)1267 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_2_bw_2_is_4_os_4) {
1268   TransposeMicrokernelTester()
1269     .input_stride(4)
1270     .output_stride(4)
1271     .block_width(2)
1272     .block_height(2)
1273     .element_size(4)
1274     .iterations(1)
1275     .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1276 }
1277 
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_34_bw_38_ies_15)1278 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_34_bw_38_ies_15) {
1279   TransposeMicrokernelTester()
1280     .input_stride(38)
1281     .output_stride(34)
1282     .block_width(38)
1283     .block_height(34)
1284     .element_size(4)
1285     .input_element_stride(15)
1286     .iterations(1)
1287     .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1288 }
1289 
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_6_bw_10_oes_15)1290 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_6_bw_10_oes_15) {
1291   TransposeMicrokernelTester()
1292     .input_stride(10)
1293     .output_stride(6)
1294     .block_width(10)
1295     .block_height(6)
1296     .element_size(4)
1297     .output_element_stride(15)
1298     .iterations(1)
1299     .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1300 }
1301 
TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4,bh_14_bw_46_ies_21_oes_17)1302 TEST(X32_TRANSPOSEC__2X2_SCALAR_FLOAT_4, bh_14_bw_46_ies_21_oes_17) {
1303   TransposeMicrokernelTester()
1304     .input_stride(51)
1305     .output_stride(20)
1306     .block_width(46)
1307     .block_height(14)
1308     .element_size(4)
1309     .input_element_stride(21)
1310     .output_element_stride(17)
1311     .iterations(1)
1312     .Test(xnn_x32_transposec_ukernel__2x2_scalar_float);
1313 }
1314 
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_2_bw_2)1315 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_2_bw_2) {
1316   TransposeMicrokernelTester()
1317     .input_stride(4)
1318     .output_stride(4)
1319     .block_width(2)
1320     .block_height(2)
1321     .element_size(4)
1322     .iterations(1)
1323     .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1324 }
1325 
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_1_4_bw_1_4)1326 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_1_4_bw_1_4) {
1327   for(size_t i = 1; i <= 4; ++i){
1328     for(size_t j = 1; j <= 4; ++j){
1329       TransposeMicrokernelTester()
1330         .input_stride(j * 3)
1331         .output_stride(i * 7)
1332         .block_width(j)
1333         .block_height(i)
1334         .element_size(4)
1335         .iterations(1)
1336         .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1337     }
1338   }
1339 }
1340 
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_2_bw_4)1341 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_2_bw_4) {
1342   TransposeMicrokernelTester()
1343     .input_stride(4)
1344     .output_stride(2)
1345     .block_width(4)
1346     .block_height(2)
1347     .element_size(4)
1348     .iterations(1)
1349     .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1350 }
1351 
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_2_bw_3_4)1352 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_2_bw_3_4) {
1353   for(size_t i = 3; i < 4; ++i){
1354     TransposeMicrokernelTester()
1355       .input_stride(i)
1356       .output_stride(4)
1357       .block_width(i)
1358       .block_height(2)
1359       .element_size(4)
1360       .iterations(1)
1361       .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1362   }
1363 }
1364 
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_4_bw_3_4)1365 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_4_bw_3_4) {
1366   for(size_t i = 3; i < 4; ++i){
1367     TransposeMicrokernelTester()
1368       .input_stride(i)
1369       .output_stride(4)
1370       .block_width(i)
1371       .block_height(4)
1372       .element_size(4)
1373       .iterations(1)
1374       .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1375   }
1376 }
1377 
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_4_bw_2)1378 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_4_bw_2) {
1379   TransposeMicrokernelTester()
1380     .input_stride(2)
1381     .output_stride(10)
1382     .block_width(2)
1383     .block_height(4)
1384     .element_size(4)
1385     .iterations(1)
1386     .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1387 }
1388 
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_3_4_bw_2)1389 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_3_4_bw_2){
1390   for(size_t i = 3; i < 4; ++i){
1391     TransposeMicrokernelTester()
1392       .input_stride(19)
1393       .output_stride(i)
1394       .block_width(5)
1395       .block_height(i)
1396       .element_size(4)
1397       .iterations(1)
1398       .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1399   }
1400 }
1401 
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_3_4_bw_4)1402 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_3_4_bw_4){
1403   for(size_t i = 3; i < 4; ++i){
1404     TransposeMicrokernelTester()
1405       .input_stride(4)
1406       .output_stride(i)
1407       .block_width(4)
1408       .block_height(i)
1409       .element_size(4)
1410       .iterations(1)
1411       .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1412   }
1413 }
1414 
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_3_4_bw_3_4)1415 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_3_4_bw_3_4) {
1416   for(size_t i = 3; i < 4; ++i){
1417     for(size_t j = 3; j < 4; ++j){
1418       TransposeMicrokernelTester()
1419         .input_stride(j)
1420         .output_stride(i)
1421         .block_width(j)
1422         .block_height(i)
1423         .element_size(4)
1424         .iterations(1)
1425         .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1426     }
1427   }
1428 }
1429 
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_2_bw_2_is_4)1430 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_2_bw_2_is_4) {
1431   TransposeMicrokernelTester()
1432     .input_stride(4)
1433     .output_stride(2)
1434     .block_width(2)
1435     .block_height(2)
1436     .element_size(4)
1437     .iterations(1)
1438     .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1439 }
1440 
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_2_bw_2_os_4)1441 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_2_bw_2_os_4) {
1442   TransposeMicrokernelTester()
1443     .input_stride(2)
1444     .output_stride(4)
1445     .block_width(2)
1446     .block_height(2)
1447     .element_size(4)
1448     .iterations(1)
1449     .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1450 }
1451 
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_2_bw_2_is_4_os_4)1452 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_2_bw_2_is_4_os_4) {
1453   TransposeMicrokernelTester()
1454     .input_stride(4)
1455     .output_stride(4)
1456     .block_width(2)
1457     .block_height(2)
1458     .element_size(4)
1459     .iterations(1)
1460     .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1461 }
1462 
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_34_bw_38_ies_15)1463 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_34_bw_38_ies_15) {
1464   TransposeMicrokernelTester()
1465     .input_stride(38)
1466     .output_stride(34)
1467     .block_width(38)
1468     .block_height(34)
1469     .element_size(4)
1470     .input_element_stride(15)
1471     .iterations(1)
1472     .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1473 }
1474 
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_6_bw_10_oes_15)1475 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_6_bw_10_oes_15) {
1476   TransposeMicrokernelTester()
1477     .input_stride(10)
1478     .output_stride(6)
1479     .block_width(10)
1480     .block_height(6)
1481     .element_size(4)
1482     .output_element_stride(15)
1483     .iterations(1)
1484     .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1485 }
1486 
TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4,bh_14_bw_46_ies_21_oes_17)1487 TEST(X32_TRANSPOSEC__2X2_SCALAR_INT_4, bh_14_bw_46_ies_21_oes_17) {
1488   TransposeMicrokernelTester()
1489     .input_stride(51)
1490     .output_stride(20)
1491     .block_width(46)
1492     .block_height(14)
1493     .element_size(4)
1494     .input_element_stride(21)
1495     .output_element_stride(17)
1496     .iterations(1)
1497     .Test(xnn_x32_transposec_ukernel__2x2_scalar_int);
1498 }
1499 
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_2_bw_4)1500 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_2_bw_4) {
1501   TransposeMicrokernelTester()
1502     .input_stride(8)
1503     .output_stride(4)
1504     .block_width(4)
1505     .block_height(2)
1506     .element_size(4)
1507     .iterations(1)
1508     .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1509 }
1510 
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_1_4_bw_1_8)1511 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_1_4_bw_1_8) {
1512   for(size_t i = 1; i <= 4; ++i){
1513     for(size_t j = 1; j <= 8; ++j){
1514       TransposeMicrokernelTester()
1515         .input_stride(j * 3)
1516         .output_stride(i * 7)
1517         .block_width(j)
1518         .block_height(i)
1519         .element_size(4)
1520         .iterations(1)
1521         .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1522     }
1523   }
1524 }
1525 
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_2_bw_8)1526 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_2_bw_8) {
1527   TransposeMicrokernelTester()
1528     .input_stride(8)
1529     .output_stride(2)
1530     .block_width(8)
1531     .block_height(2)
1532     .element_size(4)
1533     .iterations(1)
1534     .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1535 }
1536 
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_2_bw_5_8)1537 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_2_bw_5_8) {
1538   for(size_t i = 5; i < 8; ++i){
1539     TransposeMicrokernelTester()
1540       .input_stride(i)
1541       .output_stride(4)
1542       .block_width(i)
1543       .block_height(2)
1544       .element_size(4)
1545       .iterations(1)
1546       .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1547   }
1548 }
1549 
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_4_bw_5_8)1550 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_4_bw_5_8) {
1551   for(size_t i = 5; i < 8; ++i){
1552     TransposeMicrokernelTester()
1553       .input_stride(i)
1554       .output_stride(4)
1555       .block_width(i)
1556       .block_height(4)
1557       .element_size(4)
1558       .iterations(1)
1559       .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1560   }
1561 }
1562 
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_4_bw_4)1563 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_4_bw_4) {
1564   TransposeMicrokernelTester()
1565     .input_stride(4)
1566     .output_stride(10)
1567     .block_width(4)
1568     .block_height(4)
1569     .element_size(4)
1570     .iterations(1)
1571     .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1572 }
1573 
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_3_4_bw_4)1574 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_3_4_bw_4){
1575   for(size_t i = 3; i < 4; ++i){
1576     TransposeMicrokernelTester()
1577       .input_stride(21)
1578       .output_stride(i)
1579       .block_width(7)
1580       .block_height(i)
1581       .element_size(4)
1582       .iterations(1)
1583       .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1584   }
1585 }
1586 
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_3_4_bw_8)1587 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_3_4_bw_8){
1588   for(size_t i = 3; i < 4; ++i){
1589     TransposeMicrokernelTester()
1590       .input_stride(8)
1591       .output_stride(i)
1592       .block_width(8)
1593       .block_height(i)
1594       .element_size(4)
1595       .iterations(1)
1596       .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1597   }
1598 }
1599 
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_3_4_bw_5_8)1600 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_3_4_bw_5_8) {
1601   for(size_t i = 3; i < 4; ++i){
1602     for(size_t j = 5; j < 8; ++j){
1603       TransposeMicrokernelTester()
1604         .input_stride(j)
1605         .output_stride(i)
1606         .block_width(j)
1607         .block_height(i)
1608         .element_size(4)
1609         .iterations(1)
1610         .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1611     }
1612   }
1613 }
1614 
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_2_bw_4_is_8)1615 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_2_bw_4_is_8) {
1616   TransposeMicrokernelTester()
1617     .input_stride(8)
1618     .output_stride(2)
1619     .block_width(4)
1620     .block_height(2)
1621     .element_size(4)
1622     .iterations(1)
1623     .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1624 }
1625 
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_2_bw_4_os_4)1626 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_2_bw_4_os_4) {
1627   TransposeMicrokernelTester()
1628     .input_stride(4)
1629     .output_stride(4)
1630     .block_width(4)
1631     .block_height(2)
1632     .element_size(4)
1633     .iterations(1)
1634     .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1635 }
1636 
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_2_bw_4_is_8_os_4)1637 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_2_bw_4_is_8_os_4) {
1638   TransposeMicrokernelTester()
1639     .input_stride(8)
1640     .output_stride(4)
1641     .block_width(4)
1642     .block_height(2)
1643     .element_size(4)
1644     .iterations(1)
1645     .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1646 }
1647 
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_34_bw_76_ies_15)1648 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_34_bw_76_ies_15) {
1649   TransposeMicrokernelTester()
1650     .input_stride(76)
1651     .output_stride(34)
1652     .block_width(76)
1653     .block_height(34)
1654     .element_size(4)
1655     .input_element_stride(15)
1656     .iterations(1)
1657     .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1658 }
1659 
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_6_bw_20_oes_15)1660 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_6_bw_20_oes_15) {
1661   TransposeMicrokernelTester()
1662     .input_stride(20)
1663     .output_stride(6)
1664     .block_width(20)
1665     .block_height(6)
1666     .element_size(4)
1667     .output_element_stride(15)
1668     .iterations(1)
1669     .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1670 }
1671 
TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4,bh_14_bw_92_ies_21_oes_17)1672 TEST(X32_TRANSPOSEC__2X4_SCALAR_FLOAT_4, bh_14_bw_92_ies_21_oes_17) {
1673   TransposeMicrokernelTester()
1674     .input_stride(97)
1675     .output_stride(20)
1676     .block_width(92)
1677     .block_height(14)
1678     .element_size(4)
1679     .input_element_stride(21)
1680     .output_element_stride(17)
1681     .iterations(1)
1682     .Test(xnn_x32_transposec_ukernel__2x4_scalar_float);
1683 }
1684 
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_2_bw_4)1685 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_2_bw_4) {
1686   TransposeMicrokernelTester()
1687     .input_stride(8)
1688     .output_stride(4)
1689     .block_width(4)
1690     .block_height(2)
1691     .element_size(4)
1692     .iterations(1)
1693     .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1694 }
1695 
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_1_4_bw_1_8)1696 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_1_4_bw_1_8) {
1697   for(size_t i = 1; i <= 4; ++i){
1698     for(size_t j = 1; j <= 8; ++j){
1699       TransposeMicrokernelTester()
1700         .input_stride(j * 3)
1701         .output_stride(i * 7)
1702         .block_width(j)
1703         .block_height(i)
1704         .element_size(4)
1705         .iterations(1)
1706         .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1707     }
1708   }
1709 }
1710 
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_2_bw_8)1711 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_2_bw_8) {
1712   TransposeMicrokernelTester()
1713     .input_stride(8)
1714     .output_stride(2)
1715     .block_width(8)
1716     .block_height(2)
1717     .element_size(4)
1718     .iterations(1)
1719     .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1720 }
1721 
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_2_bw_5_8)1722 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_2_bw_5_8) {
1723   for(size_t i = 5; i < 8; ++i){
1724     TransposeMicrokernelTester()
1725       .input_stride(i)
1726       .output_stride(4)
1727       .block_width(i)
1728       .block_height(2)
1729       .element_size(4)
1730       .iterations(1)
1731       .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1732   }
1733 }
1734 
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_4_bw_5_8)1735 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_4_bw_5_8) {
1736   for(size_t i = 5; i < 8; ++i){
1737     TransposeMicrokernelTester()
1738       .input_stride(i)
1739       .output_stride(4)
1740       .block_width(i)
1741       .block_height(4)
1742       .element_size(4)
1743       .iterations(1)
1744       .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1745   }
1746 }
1747 
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_4_bw_4)1748 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_4_bw_4) {
1749   TransposeMicrokernelTester()
1750     .input_stride(4)
1751     .output_stride(10)
1752     .block_width(4)
1753     .block_height(4)
1754     .element_size(4)
1755     .iterations(1)
1756     .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1757 }
1758 
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_3_4_bw_4)1759 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_3_4_bw_4){
1760   for(size_t i = 3; i < 4; ++i){
1761     TransposeMicrokernelTester()
1762       .input_stride(21)
1763       .output_stride(i)
1764       .block_width(7)
1765       .block_height(i)
1766       .element_size(4)
1767       .iterations(1)
1768       .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1769   }
1770 }
1771 
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_3_4_bw_8)1772 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_3_4_bw_8){
1773   for(size_t i = 3; i < 4; ++i){
1774     TransposeMicrokernelTester()
1775       .input_stride(8)
1776       .output_stride(i)
1777       .block_width(8)
1778       .block_height(i)
1779       .element_size(4)
1780       .iterations(1)
1781       .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1782   }
1783 }
1784 
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_3_4_bw_5_8)1785 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_3_4_bw_5_8) {
1786   for(size_t i = 3; i < 4; ++i){
1787     for(size_t j = 5; j < 8; ++j){
1788       TransposeMicrokernelTester()
1789         .input_stride(j)
1790         .output_stride(i)
1791         .block_width(j)
1792         .block_height(i)
1793         .element_size(4)
1794         .iterations(1)
1795         .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1796     }
1797   }
1798 }
1799 
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_2_bw_4_is_8)1800 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_2_bw_4_is_8) {
1801   TransposeMicrokernelTester()
1802     .input_stride(8)
1803     .output_stride(2)
1804     .block_width(4)
1805     .block_height(2)
1806     .element_size(4)
1807     .iterations(1)
1808     .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1809 }
1810 
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_2_bw_4_os_4)1811 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_2_bw_4_os_4) {
1812   TransposeMicrokernelTester()
1813     .input_stride(4)
1814     .output_stride(4)
1815     .block_width(4)
1816     .block_height(2)
1817     .element_size(4)
1818     .iterations(1)
1819     .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1820 }
1821 
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_2_bw_4_is_8_os_4)1822 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_2_bw_4_is_8_os_4) {
1823   TransposeMicrokernelTester()
1824     .input_stride(8)
1825     .output_stride(4)
1826     .block_width(4)
1827     .block_height(2)
1828     .element_size(4)
1829     .iterations(1)
1830     .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1831 }
1832 
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_34_bw_76_ies_15)1833 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_34_bw_76_ies_15) {
1834   TransposeMicrokernelTester()
1835     .input_stride(76)
1836     .output_stride(34)
1837     .block_width(76)
1838     .block_height(34)
1839     .element_size(4)
1840     .input_element_stride(15)
1841     .iterations(1)
1842     .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1843 }
1844 
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_6_bw_20_oes_15)1845 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_6_bw_20_oes_15) {
1846   TransposeMicrokernelTester()
1847     .input_stride(20)
1848     .output_stride(6)
1849     .block_width(20)
1850     .block_height(6)
1851     .element_size(4)
1852     .output_element_stride(15)
1853     .iterations(1)
1854     .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1855 }
1856 
TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4,bh_14_bw_92_ies_21_oes_17)1857 TEST(X32_TRANSPOSEC__2X4_SCALAR_INT_4, bh_14_bw_92_ies_21_oes_17) {
1858   TransposeMicrokernelTester()
1859     .input_stride(97)
1860     .output_stride(20)
1861     .block_width(92)
1862     .block_height(14)
1863     .element_size(4)
1864     .input_element_stride(21)
1865     .output_element_stride(17)
1866     .iterations(1)
1867     .Test(xnn_x32_transposec_ukernel__2x4_scalar_int);
1868 }
1869 
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_4_bw_1)1870 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_4_bw_1) {
1871   TransposeMicrokernelTester()
1872     .input_stride(2)
1873     .output_stride(8)
1874     .block_width(1)
1875     .block_height(4)
1876     .element_size(4)
1877     .iterations(1)
1878     .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
1879 }
1880 
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_1_8_bw_1_2)1881 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_1_8_bw_1_2) {
1882   for(size_t i = 1; i <= 8; ++i){
1883     for(size_t j = 1; j <= 2; ++j){
1884       TransposeMicrokernelTester()
1885         .input_stride(j * 3)
1886         .output_stride(i * 7)
1887         .block_width(j)
1888         .block_height(i)
1889         .element_size(4)
1890         .iterations(1)
1891         .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
1892     }
1893   }
1894 }
1895 
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_4_bw_2)1896 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_4_bw_2) {
1897   TransposeMicrokernelTester()
1898     .input_stride(2)
1899     .output_stride(4)
1900     .block_width(2)
1901     .block_height(4)
1902     .element_size(4)
1903     .iterations(1)
1904     .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
1905 }
1906 
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_4_bw_2_2)1907 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_4_bw_2_2) {
1908   for(size_t i = 2; i < 2; ++i){
1909     TransposeMicrokernelTester()
1910       .input_stride(i)
1911       .output_stride(8)
1912       .block_width(i)
1913       .block_height(4)
1914       .element_size(4)
1915       .iterations(1)
1916       .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
1917   }
1918 }
1919 
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_8_bw_2_2)1920 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_8_bw_2_2) {
1921   for(size_t i = 2; i < 2; ++i){
1922     TransposeMicrokernelTester()
1923       .input_stride(i)
1924       .output_stride(8)
1925       .block_width(i)
1926       .block_height(8)
1927       .element_size(4)
1928       .iterations(1)
1929       .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
1930   }
1931 }
1932 
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_8_bw_1)1933 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_8_bw_1) {
1934   TransposeMicrokernelTester()
1935     .input_stride(1)
1936     .output_stride(16)
1937     .block_width(1)
1938     .block_height(8)
1939     .element_size(4)
1940     .iterations(1)
1941     .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
1942 }
1943 
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_5_8_bw_1)1944 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_5_8_bw_1){
1945   for(size_t i = 5; i < 8; ++i){
1946     TransposeMicrokernelTester()
1947       .input_stride(18)
1948       .output_stride(i)
1949       .block_width(4)
1950       .block_height(i)
1951       .element_size(4)
1952       .iterations(1)
1953       .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
1954   }
1955 }
1956 
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_5_8_bw_2)1957 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_5_8_bw_2){
1958   for(size_t i = 5; i < 8; ++i){
1959     TransposeMicrokernelTester()
1960       .input_stride(2)
1961       .output_stride(i)
1962       .block_width(2)
1963       .block_height(i)
1964       .element_size(4)
1965       .iterations(1)
1966       .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
1967   }
1968 }
1969 
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_5_8_bw_2_2)1970 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_5_8_bw_2_2) {
1971   for(size_t i = 5; i < 8; ++i){
1972     for(size_t j = 2; j < 2; ++j){
1973       TransposeMicrokernelTester()
1974         .input_stride(j)
1975         .output_stride(i)
1976         .block_width(j)
1977         .block_height(i)
1978         .element_size(4)
1979         .iterations(1)
1980         .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
1981     }
1982   }
1983 }
1984 
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_4_bw_1_is_2)1985 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_4_bw_1_is_2) {
1986   TransposeMicrokernelTester()
1987     .input_stride(2)
1988     .output_stride(4)
1989     .block_width(1)
1990     .block_height(4)
1991     .element_size(4)
1992     .iterations(1)
1993     .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
1994 }
1995 
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_4_bw_1_os_8)1996 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_4_bw_1_os_8) {
1997   TransposeMicrokernelTester()
1998     .input_stride(1)
1999     .output_stride(8)
2000     .block_width(1)
2001     .block_height(4)
2002     .element_size(4)
2003     .iterations(1)
2004     .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
2005 }
2006 
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_4_bw_1_is_2_os_8)2007 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_4_bw_1_is_2_os_8) {
2008   TransposeMicrokernelTester()
2009     .input_stride(2)
2010     .output_stride(8)
2011     .block_width(1)
2012     .block_height(4)
2013     .element_size(4)
2014     .iterations(1)
2015     .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
2016 }
2017 
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_68_bw_19_ies_15)2018 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_68_bw_19_ies_15) {
2019   TransposeMicrokernelTester()
2020     .input_stride(19)
2021     .output_stride(68)
2022     .block_width(19)
2023     .block_height(68)
2024     .element_size(4)
2025     .input_element_stride(15)
2026     .iterations(1)
2027     .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
2028 }
2029 
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_12_bw_5_oes_15)2030 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_12_bw_5_oes_15) {
2031   TransposeMicrokernelTester()
2032     .input_stride(5)
2033     .output_stride(12)
2034     .block_width(5)
2035     .block_height(12)
2036     .element_size(4)
2037     .output_element_stride(15)
2038     .iterations(1)
2039     .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
2040 }
2041 
TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4,bh_28_bw_23_ies_21_oes_17)2042 TEST(X32_TRANSPOSEC__4X1_SCALAR_FLOAT_4, bh_28_bw_23_ies_21_oes_17) {
2043   TransposeMicrokernelTester()
2044     .input_stride(28)
2045     .output_stride(34)
2046     .block_width(23)
2047     .block_height(28)
2048     .element_size(4)
2049     .input_element_stride(21)
2050     .output_element_stride(17)
2051     .iterations(1)
2052     .Test(xnn_x32_transposec_ukernel__4x1_scalar_float);
2053 }
2054 
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_4_bw_1)2055 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_4_bw_1) {
2056   TransposeMicrokernelTester()
2057     .input_stride(2)
2058     .output_stride(8)
2059     .block_width(1)
2060     .block_height(4)
2061     .element_size(4)
2062     .iterations(1)
2063     .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2064 }
2065 
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_1_8_bw_1_2)2066 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_1_8_bw_1_2) {
2067   for(size_t i = 1; i <= 8; ++i){
2068     for(size_t j = 1; j <= 2; ++j){
2069       TransposeMicrokernelTester()
2070         .input_stride(j * 3)
2071         .output_stride(i * 7)
2072         .block_width(j)
2073         .block_height(i)
2074         .element_size(4)
2075         .iterations(1)
2076         .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2077     }
2078   }
2079 }
2080 
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_4_bw_2)2081 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_4_bw_2) {
2082   TransposeMicrokernelTester()
2083     .input_stride(2)
2084     .output_stride(4)
2085     .block_width(2)
2086     .block_height(4)
2087     .element_size(4)
2088     .iterations(1)
2089     .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2090 }
2091 
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_4_bw_2_2)2092 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_4_bw_2_2) {
2093   for(size_t i = 2; i < 2; ++i){
2094     TransposeMicrokernelTester()
2095       .input_stride(i)
2096       .output_stride(8)
2097       .block_width(i)
2098       .block_height(4)
2099       .element_size(4)
2100       .iterations(1)
2101       .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2102   }
2103 }
2104 
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_8_bw_2_2)2105 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_8_bw_2_2) {
2106   for(size_t i = 2; i < 2; ++i){
2107     TransposeMicrokernelTester()
2108       .input_stride(i)
2109       .output_stride(8)
2110       .block_width(i)
2111       .block_height(8)
2112       .element_size(4)
2113       .iterations(1)
2114       .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2115   }
2116 }
2117 
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_8_bw_1)2118 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_8_bw_1) {
2119   TransposeMicrokernelTester()
2120     .input_stride(1)
2121     .output_stride(16)
2122     .block_width(1)
2123     .block_height(8)
2124     .element_size(4)
2125     .iterations(1)
2126     .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2127 }
2128 
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_5_8_bw_1)2129 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_5_8_bw_1){
2130   for(size_t i = 5; i < 8; ++i){
2131     TransposeMicrokernelTester()
2132       .input_stride(18)
2133       .output_stride(i)
2134       .block_width(4)
2135       .block_height(i)
2136       .element_size(4)
2137       .iterations(1)
2138       .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2139   }
2140 }
2141 
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_5_8_bw_2)2142 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_5_8_bw_2){
2143   for(size_t i = 5; i < 8; ++i){
2144     TransposeMicrokernelTester()
2145       .input_stride(2)
2146       .output_stride(i)
2147       .block_width(2)
2148       .block_height(i)
2149       .element_size(4)
2150       .iterations(1)
2151       .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2152   }
2153 }
2154 
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_5_8_bw_2_2)2155 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_5_8_bw_2_2) {
2156   for(size_t i = 5; i < 8; ++i){
2157     for(size_t j = 2; j < 2; ++j){
2158       TransposeMicrokernelTester()
2159         .input_stride(j)
2160         .output_stride(i)
2161         .block_width(j)
2162         .block_height(i)
2163         .element_size(4)
2164         .iterations(1)
2165         .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2166     }
2167   }
2168 }
2169 
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_4_bw_1_is_2)2170 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_4_bw_1_is_2) {
2171   TransposeMicrokernelTester()
2172     .input_stride(2)
2173     .output_stride(4)
2174     .block_width(1)
2175     .block_height(4)
2176     .element_size(4)
2177     .iterations(1)
2178     .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2179 }
2180 
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_4_bw_1_os_8)2181 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_4_bw_1_os_8) {
2182   TransposeMicrokernelTester()
2183     .input_stride(1)
2184     .output_stride(8)
2185     .block_width(1)
2186     .block_height(4)
2187     .element_size(4)
2188     .iterations(1)
2189     .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2190 }
2191 
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_4_bw_1_is_2_os_8)2192 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_4_bw_1_is_2_os_8) {
2193   TransposeMicrokernelTester()
2194     .input_stride(2)
2195     .output_stride(8)
2196     .block_width(1)
2197     .block_height(4)
2198     .element_size(4)
2199     .iterations(1)
2200     .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2201 }
2202 
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_68_bw_19_ies_15)2203 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_68_bw_19_ies_15) {
2204   TransposeMicrokernelTester()
2205     .input_stride(19)
2206     .output_stride(68)
2207     .block_width(19)
2208     .block_height(68)
2209     .element_size(4)
2210     .input_element_stride(15)
2211     .iterations(1)
2212     .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2213 }
2214 
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_12_bw_5_oes_15)2215 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_12_bw_5_oes_15) {
2216   TransposeMicrokernelTester()
2217     .input_stride(5)
2218     .output_stride(12)
2219     .block_width(5)
2220     .block_height(12)
2221     .element_size(4)
2222     .output_element_stride(15)
2223     .iterations(1)
2224     .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2225 }
2226 
TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4,bh_28_bw_23_ies_21_oes_17)2227 TEST(X32_TRANSPOSEC__4X1_SCALAR_INT_4, bh_28_bw_23_ies_21_oes_17) {
2228   TransposeMicrokernelTester()
2229     .input_stride(28)
2230     .output_stride(34)
2231     .block_width(23)
2232     .block_height(28)
2233     .element_size(4)
2234     .input_element_stride(21)
2235     .output_element_stride(17)
2236     .iterations(1)
2237     .Test(xnn_x32_transposec_ukernel__4x1_scalar_int);
2238 }
2239 
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_4_bw_2)2240 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_4_bw_2) {
2241   TransposeMicrokernelTester()
2242     .input_stride(4)
2243     .output_stride(8)
2244     .block_width(2)
2245     .block_height(4)
2246     .element_size(4)
2247     .iterations(1)
2248     .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2249 }
2250 
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_1_8_bw_1_4)2251 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_1_8_bw_1_4) {
2252   for(size_t i = 1; i <= 8; ++i){
2253     for(size_t j = 1; j <= 4; ++j){
2254       TransposeMicrokernelTester()
2255         .input_stride(j * 3)
2256         .output_stride(i * 7)
2257         .block_width(j)
2258         .block_height(i)
2259         .element_size(4)
2260         .iterations(1)
2261         .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2262     }
2263   }
2264 }
2265 
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_4_bw_4)2266 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_4_bw_4) {
2267   TransposeMicrokernelTester()
2268     .input_stride(4)
2269     .output_stride(4)
2270     .block_width(4)
2271     .block_height(4)
2272     .element_size(4)
2273     .iterations(1)
2274     .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2275 }
2276 
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_4_bw_3_4)2277 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_4_bw_3_4) {
2278   for(size_t i = 3; i < 4; ++i){
2279     TransposeMicrokernelTester()
2280       .input_stride(i)
2281       .output_stride(8)
2282       .block_width(i)
2283       .block_height(4)
2284       .element_size(4)
2285       .iterations(1)
2286       .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2287   }
2288 }
2289 
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_8_bw_3_4)2290 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_8_bw_3_4) {
2291   for(size_t i = 3; i < 4; ++i){
2292     TransposeMicrokernelTester()
2293       .input_stride(i)
2294       .output_stride(8)
2295       .block_width(i)
2296       .block_height(8)
2297       .element_size(4)
2298       .iterations(1)
2299       .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2300   }
2301 }
2302 
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_8_bw_2)2303 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_8_bw_2) {
2304   TransposeMicrokernelTester()
2305     .input_stride(2)
2306     .output_stride(16)
2307     .block_width(2)
2308     .block_height(8)
2309     .element_size(4)
2310     .iterations(1)
2311     .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2312 }
2313 
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_5_8_bw_2)2314 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_5_8_bw_2){
2315   for(size_t i = 5; i < 8; ++i){
2316     TransposeMicrokernelTester()
2317       .input_stride(19)
2318       .output_stride(i)
2319       .block_width(5)
2320       .block_height(i)
2321       .element_size(4)
2322       .iterations(1)
2323       .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2324   }
2325 }
2326 
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_5_8_bw_4)2327 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_5_8_bw_4){
2328   for(size_t i = 5; i < 8; ++i){
2329     TransposeMicrokernelTester()
2330       .input_stride(4)
2331       .output_stride(i)
2332       .block_width(4)
2333       .block_height(i)
2334       .element_size(4)
2335       .iterations(1)
2336       .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2337   }
2338 }
2339 
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_5_8_bw_3_4)2340 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_5_8_bw_3_4) {
2341   for(size_t i = 5; i < 8; ++i){
2342     for(size_t j = 3; j < 4; ++j){
2343       TransposeMicrokernelTester()
2344         .input_stride(j)
2345         .output_stride(i)
2346         .block_width(j)
2347         .block_height(i)
2348         .element_size(4)
2349         .iterations(1)
2350         .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2351     }
2352   }
2353 }
2354 
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_4_bw_2_is_4)2355 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_4_bw_2_is_4) {
2356   TransposeMicrokernelTester()
2357     .input_stride(4)
2358     .output_stride(4)
2359     .block_width(2)
2360     .block_height(4)
2361     .element_size(4)
2362     .iterations(1)
2363     .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2364 }
2365 
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_4_bw_2_os_8)2366 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_4_bw_2_os_8) {
2367   TransposeMicrokernelTester()
2368     .input_stride(2)
2369     .output_stride(8)
2370     .block_width(2)
2371     .block_height(4)
2372     .element_size(4)
2373     .iterations(1)
2374     .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2375 }
2376 
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_4_bw_2_is_4_os_8)2377 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_4_bw_2_is_4_os_8) {
2378   TransposeMicrokernelTester()
2379     .input_stride(4)
2380     .output_stride(8)
2381     .block_width(2)
2382     .block_height(4)
2383     .element_size(4)
2384     .iterations(1)
2385     .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2386 }
2387 
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_68_bw_38_ies_15)2388 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_68_bw_38_ies_15) {
2389   TransposeMicrokernelTester()
2390     .input_stride(38)
2391     .output_stride(68)
2392     .block_width(38)
2393     .block_height(68)
2394     .element_size(4)
2395     .input_element_stride(15)
2396     .iterations(1)
2397     .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2398 }
2399 
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_12_bw_10_oes_15)2400 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_12_bw_10_oes_15) {
2401   TransposeMicrokernelTester()
2402     .input_stride(10)
2403     .output_stride(12)
2404     .block_width(10)
2405     .block_height(12)
2406     .element_size(4)
2407     .output_element_stride(15)
2408     .iterations(1)
2409     .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2410 }
2411 
TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4,bh_28_bw_46_ies_21_oes_17)2412 TEST(X32_TRANSPOSEC__4X2_SCALAR_FLOAT_4, bh_28_bw_46_ies_21_oes_17) {
2413   TransposeMicrokernelTester()
2414     .input_stride(51)
2415     .output_stride(34)
2416     .block_width(46)
2417     .block_height(28)
2418     .element_size(4)
2419     .input_element_stride(21)
2420     .output_element_stride(17)
2421     .iterations(1)
2422     .Test(xnn_x32_transposec_ukernel__4x2_scalar_float);
2423 }
2424 
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_4_bw_2)2425 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_4_bw_2) {
2426   TransposeMicrokernelTester()
2427     .input_stride(4)
2428     .output_stride(8)
2429     .block_width(2)
2430     .block_height(4)
2431     .element_size(4)
2432     .iterations(1)
2433     .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2434 }
2435 
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_1_8_bw_1_4)2436 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_1_8_bw_1_4) {
2437   for(size_t i = 1; i <= 8; ++i){
2438     for(size_t j = 1; j <= 4; ++j){
2439       TransposeMicrokernelTester()
2440         .input_stride(j * 3)
2441         .output_stride(i * 7)
2442         .block_width(j)
2443         .block_height(i)
2444         .element_size(4)
2445         .iterations(1)
2446         .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2447     }
2448   }
2449 }
2450 
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_4_bw_4)2451 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_4_bw_4) {
2452   TransposeMicrokernelTester()
2453     .input_stride(4)
2454     .output_stride(4)
2455     .block_width(4)
2456     .block_height(4)
2457     .element_size(4)
2458     .iterations(1)
2459     .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2460 }
2461 
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_4_bw_3_4)2462 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_4_bw_3_4) {
2463   for(size_t i = 3; i < 4; ++i){
2464     TransposeMicrokernelTester()
2465       .input_stride(i)
2466       .output_stride(8)
2467       .block_width(i)
2468       .block_height(4)
2469       .element_size(4)
2470       .iterations(1)
2471       .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2472   }
2473 }
2474 
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_8_bw_3_4)2475 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_8_bw_3_4) {
2476   for(size_t i = 3; i < 4; ++i){
2477     TransposeMicrokernelTester()
2478       .input_stride(i)
2479       .output_stride(8)
2480       .block_width(i)
2481       .block_height(8)
2482       .element_size(4)
2483       .iterations(1)
2484       .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2485   }
2486 }
2487 
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_8_bw_2)2488 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_8_bw_2) {
2489   TransposeMicrokernelTester()
2490     .input_stride(2)
2491     .output_stride(16)
2492     .block_width(2)
2493     .block_height(8)
2494     .element_size(4)
2495     .iterations(1)
2496     .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2497 }
2498 
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_5_8_bw_2)2499 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_5_8_bw_2){
2500   for(size_t i = 5; i < 8; ++i){
2501     TransposeMicrokernelTester()
2502       .input_stride(19)
2503       .output_stride(i)
2504       .block_width(5)
2505       .block_height(i)
2506       .element_size(4)
2507       .iterations(1)
2508       .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2509   }
2510 }
2511 
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_5_8_bw_4)2512 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_5_8_bw_4){
2513   for(size_t i = 5; i < 8; ++i){
2514     TransposeMicrokernelTester()
2515       .input_stride(4)
2516       .output_stride(i)
2517       .block_width(4)
2518       .block_height(i)
2519       .element_size(4)
2520       .iterations(1)
2521       .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2522   }
2523 }
2524 
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_5_8_bw_3_4)2525 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_5_8_bw_3_4) {
2526   for(size_t i = 5; i < 8; ++i){
2527     for(size_t j = 3; j < 4; ++j){
2528       TransposeMicrokernelTester()
2529         .input_stride(j)
2530         .output_stride(i)
2531         .block_width(j)
2532         .block_height(i)
2533         .element_size(4)
2534         .iterations(1)
2535         .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2536     }
2537   }
2538 }
2539 
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_4_bw_2_is_4)2540 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_4_bw_2_is_4) {
2541   TransposeMicrokernelTester()
2542     .input_stride(4)
2543     .output_stride(4)
2544     .block_width(2)
2545     .block_height(4)
2546     .element_size(4)
2547     .iterations(1)
2548     .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2549 }
2550 
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_4_bw_2_os_8)2551 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_4_bw_2_os_8) {
2552   TransposeMicrokernelTester()
2553     .input_stride(2)
2554     .output_stride(8)
2555     .block_width(2)
2556     .block_height(4)
2557     .element_size(4)
2558     .iterations(1)
2559     .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2560 }
2561 
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_4_bw_2_is_4_os_8)2562 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_4_bw_2_is_4_os_8) {
2563   TransposeMicrokernelTester()
2564     .input_stride(4)
2565     .output_stride(8)
2566     .block_width(2)
2567     .block_height(4)
2568     .element_size(4)
2569     .iterations(1)
2570     .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2571 }
2572 
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_68_bw_38_ies_15)2573 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_68_bw_38_ies_15) {
2574   TransposeMicrokernelTester()
2575     .input_stride(38)
2576     .output_stride(68)
2577     .block_width(38)
2578     .block_height(68)
2579     .element_size(4)
2580     .input_element_stride(15)
2581     .iterations(1)
2582     .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2583 }
2584 
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_12_bw_10_oes_15)2585 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_12_bw_10_oes_15) {
2586   TransposeMicrokernelTester()
2587     .input_stride(10)
2588     .output_stride(12)
2589     .block_width(10)
2590     .block_height(12)
2591     .element_size(4)
2592     .output_element_stride(15)
2593     .iterations(1)
2594     .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2595 }
2596 
TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4,bh_28_bw_46_ies_21_oes_17)2597 TEST(X32_TRANSPOSEC__4X2_SCALAR_INT_4, bh_28_bw_46_ies_21_oes_17) {
2598   TransposeMicrokernelTester()
2599     .input_stride(51)
2600     .output_stride(34)
2601     .block_width(46)
2602     .block_height(28)
2603     .element_size(4)
2604     .input_element_stride(21)
2605     .output_element_stride(17)
2606     .iterations(1)
2607     .Test(xnn_x32_transposec_ukernel__4x2_scalar_int);
2608 }
2609 
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_4_bw_4)2610 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_4_bw_4) {
2611   TransposeMicrokernelTester()
2612     .input_stride(8)
2613     .output_stride(8)
2614     .block_width(4)
2615     .block_height(4)
2616     .element_size(4)
2617     .iterations(1)
2618     .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2619 }
2620 
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_1_8_bw_1_8)2621 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_1_8_bw_1_8) {
2622   for(size_t i = 1; i <= 8; ++i){
2623     for(size_t j = 1; j <= 8; ++j){
2624       TransposeMicrokernelTester()
2625         .input_stride(j * 3)
2626         .output_stride(i * 7)
2627         .block_width(j)
2628         .block_height(i)
2629         .element_size(4)
2630         .iterations(1)
2631         .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2632     }
2633   }
2634 }
2635 
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_4_bw_8)2636 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_4_bw_8) {
2637   TransposeMicrokernelTester()
2638     .input_stride(8)
2639     .output_stride(4)
2640     .block_width(8)
2641     .block_height(4)
2642     .element_size(4)
2643     .iterations(1)
2644     .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2645 }
2646 
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_4_bw_5_8)2647 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_4_bw_5_8) {
2648   for(size_t i = 5; i < 8; ++i){
2649     TransposeMicrokernelTester()
2650       .input_stride(i)
2651       .output_stride(8)
2652       .block_width(i)
2653       .block_height(4)
2654       .element_size(4)
2655       .iterations(1)
2656       .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2657   }
2658 }
2659 
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_8_bw_5_8)2660 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_8_bw_5_8) {
2661   for(size_t i = 5; i < 8; ++i){
2662     TransposeMicrokernelTester()
2663       .input_stride(i)
2664       .output_stride(8)
2665       .block_width(i)
2666       .block_height(8)
2667       .element_size(4)
2668       .iterations(1)
2669       .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2670   }
2671 }
2672 
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_8_bw_4)2673 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_8_bw_4) {
2674   TransposeMicrokernelTester()
2675     .input_stride(4)
2676     .output_stride(16)
2677     .block_width(4)
2678     .block_height(8)
2679     .element_size(4)
2680     .iterations(1)
2681     .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2682 }
2683 
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_5_8_bw_4)2684 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_5_8_bw_4){
2685   for(size_t i = 5; i < 8; ++i){
2686     TransposeMicrokernelTester()
2687       .input_stride(21)
2688       .output_stride(i)
2689       .block_width(7)
2690       .block_height(i)
2691       .element_size(4)
2692       .iterations(1)
2693       .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2694   }
2695 }
2696 
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_5_8_bw_8)2697 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_5_8_bw_8){
2698   for(size_t i = 5; i < 8; ++i){
2699     TransposeMicrokernelTester()
2700       .input_stride(8)
2701       .output_stride(i)
2702       .block_width(8)
2703       .block_height(i)
2704       .element_size(4)
2705       .iterations(1)
2706       .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2707   }
2708 }
2709 
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_5_8_bw_5_8)2710 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_5_8_bw_5_8) {
2711   for(size_t i = 5; i < 8; ++i){
2712     for(size_t j = 5; j < 8; ++j){
2713       TransposeMicrokernelTester()
2714         .input_stride(j)
2715         .output_stride(i)
2716         .block_width(j)
2717         .block_height(i)
2718         .element_size(4)
2719         .iterations(1)
2720         .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2721     }
2722   }
2723 }
2724 
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_4_bw_4_is_8)2725 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_4_bw_4_is_8) {
2726   TransposeMicrokernelTester()
2727     .input_stride(8)
2728     .output_stride(4)
2729     .block_width(4)
2730     .block_height(4)
2731     .element_size(4)
2732     .iterations(1)
2733     .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2734 }
2735 
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_4_bw_4_os_8)2736 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_4_bw_4_os_8) {
2737   TransposeMicrokernelTester()
2738     .input_stride(4)
2739     .output_stride(8)
2740     .block_width(4)
2741     .block_height(4)
2742     .element_size(4)
2743     .iterations(1)
2744     .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2745 }
2746 
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_4_bw_4_is_8_os_8)2747 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_4_bw_4_is_8_os_8) {
2748   TransposeMicrokernelTester()
2749     .input_stride(8)
2750     .output_stride(8)
2751     .block_width(4)
2752     .block_height(4)
2753     .element_size(4)
2754     .iterations(1)
2755     .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2756 }
2757 
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_68_bw_76_ies_15)2758 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_68_bw_76_ies_15) {
2759   TransposeMicrokernelTester()
2760     .input_stride(76)
2761     .output_stride(68)
2762     .block_width(76)
2763     .block_height(68)
2764     .element_size(4)
2765     .input_element_stride(15)
2766     .iterations(1)
2767     .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2768 }
2769 
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_12_bw_20_oes_15)2770 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_12_bw_20_oes_15) {
2771   TransposeMicrokernelTester()
2772     .input_stride(20)
2773     .output_stride(12)
2774     .block_width(20)
2775     .block_height(12)
2776     .element_size(4)
2777     .output_element_stride(15)
2778     .iterations(1)
2779     .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2780 }
2781 
TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4,bh_28_bw_92_ies_21_oes_17)2782 TEST(X32_TRANSPOSEC__4X4_SCALAR_FLOAT_4, bh_28_bw_92_ies_21_oes_17) {
2783   TransposeMicrokernelTester()
2784     .input_stride(97)
2785     .output_stride(34)
2786     .block_width(92)
2787     .block_height(28)
2788     .element_size(4)
2789     .input_element_stride(21)
2790     .output_element_stride(17)
2791     .iterations(1)
2792     .Test(xnn_x32_transposec_ukernel__4x4_scalar_float);
2793 }
2794 
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_4_bw_4)2795 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4) {
2796   TransposeMicrokernelTester()
2797     .input_stride(8)
2798     .output_stride(8)
2799     .block_width(4)
2800     .block_height(4)
2801     .element_size(4)
2802     .iterations(1)
2803     .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2804 }
2805 
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_1_8_bw_1_8)2806 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_1_8_bw_1_8) {
2807   for(size_t i = 1; i <= 8; ++i){
2808     for(size_t j = 1; j <= 8; ++j){
2809       TransposeMicrokernelTester()
2810         .input_stride(j * 3)
2811         .output_stride(i * 7)
2812         .block_width(j)
2813         .block_height(i)
2814         .element_size(4)
2815         .iterations(1)
2816         .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2817     }
2818   }
2819 }
2820 
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_4_bw_8)2821 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_8) {
2822   TransposeMicrokernelTester()
2823     .input_stride(8)
2824     .output_stride(4)
2825     .block_width(8)
2826     .block_height(4)
2827     .element_size(4)
2828     .iterations(1)
2829     .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2830 }
2831 
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_4_bw_5_8)2832 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_5_8) {
2833   for(size_t i = 5; i < 8; ++i){
2834     TransposeMicrokernelTester()
2835       .input_stride(i)
2836       .output_stride(8)
2837       .block_width(i)
2838       .block_height(4)
2839       .element_size(4)
2840       .iterations(1)
2841       .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2842   }
2843 }
2844 
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_8_bw_5_8)2845 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_8_bw_5_8) {
2846   for(size_t i = 5; i < 8; ++i){
2847     TransposeMicrokernelTester()
2848       .input_stride(i)
2849       .output_stride(8)
2850       .block_width(i)
2851       .block_height(8)
2852       .element_size(4)
2853       .iterations(1)
2854       .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2855   }
2856 }
2857 
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_8_bw_4)2858 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_8_bw_4) {
2859   TransposeMicrokernelTester()
2860     .input_stride(4)
2861     .output_stride(16)
2862     .block_width(4)
2863     .block_height(8)
2864     .element_size(4)
2865     .iterations(1)
2866     .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2867 }
2868 
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_5_8_bw_4)2869 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_5_8_bw_4){
2870   for(size_t i = 5; i < 8; ++i){
2871     TransposeMicrokernelTester()
2872       .input_stride(21)
2873       .output_stride(i)
2874       .block_width(7)
2875       .block_height(i)
2876       .element_size(4)
2877       .iterations(1)
2878       .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2879   }
2880 }
2881 
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_5_8_bw_8)2882 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_5_8_bw_8){
2883   for(size_t i = 5; i < 8; ++i){
2884     TransposeMicrokernelTester()
2885       .input_stride(8)
2886       .output_stride(i)
2887       .block_width(8)
2888       .block_height(i)
2889       .element_size(4)
2890       .iterations(1)
2891       .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2892   }
2893 }
2894 
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_5_8_bw_5_8)2895 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_5_8_bw_5_8) {
2896   for(size_t i = 5; i < 8; ++i){
2897     for(size_t j = 5; j < 8; ++j){
2898       TransposeMicrokernelTester()
2899         .input_stride(j)
2900         .output_stride(i)
2901         .block_width(j)
2902         .block_height(i)
2903         .element_size(4)
2904         .iterations(1)
2905         .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2906     }
2907   }
2908 }
2909 
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_4_bw_4_is_8)2910 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8) {
2911   TransposeMicrokernelTester()
2912     .input_stride(8)
2913     .output_stride(4)
2914     .block_width(4)
2915     .block_height(4)
2916     .element_size(4)
2917     .iterations(1)
2918     .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2919 }
2920 
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_4_bw_4_os_8)2921 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_os_8) {
2922   TransposeMicrokernelTester()
2923     .input_stride(4)
2924     .output_stride(8)
2925     .block_width(4)
2926     .block_height(4)
2927     .element_size(4)
2928     .iterations(1)
2929     .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2930 }
2931 
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_4_bw_4_is_8_os_8)2932 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_4_bw_4_is_8_os_8) {
2933   TransposeMicrokernelTester()
2934     .input_stride(8)
2935     .output_stride(8)
2936     .block_width(4)
2937     .block_height(4)
2938     .element_size(4)
2939     .iterations(1)
2940     .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2941 }
2942 
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_68_bw_76_ies_15)2943 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_68_bw_76_ies_15) {
2944   TransposeMicrokernelTester()
2945     .input_stride(76)
2946     .output_stride(68)
2947     .block_width(76)
2948     .block_height(68)
2949     .element_size(4)
2950     .input_element_stride(15)
2951     .iterations(1)
2952     .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2953 }
2954 
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_12_bw_20_oes_15)2955 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_12_bw_20_oes_15) {
2956   TransposeMicrokernelTester()
2957     .input_stride(20)
2958     .output_stride(12)
2959     .block_width(20)
2960     .block_height(12)
2961     .element_size(4)
2962     .output_element_stride(15)
2963     .iterations(1)
2964     .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2965 }
2966 
TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4,bh_28_bw_92_ies_21_oes_17)2967 TEST(X32_TRANSPOSEC__4X4_SCALAR_INT_4, bh_28_bw_92_ies_21_oes_17) {
2968   TransposeMicrokernelTester()
2969     .input_stride(97)
2970     .output_stride(34)
2971     .block_width(92)
2972     .block_height(28)
2973     .element_size(4)
2974     .input_element_stride(21)
2975     .output_element_stride(17)
2976     .iterations(1)
2977     .Test(xnn_x32_transposec_ukernel__4x4_scalar_int);
2978 }
2979 
2980 #if XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_4_bw_4)2981   TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_4_bw_4) {
2982     TEST_REQUIRES_ARM_NEON;
2983     TransposeMicrokernelTester()
2984       .input_stride(8)
2985       .output_stride(8)
2986       .block_width(4)
2987       .block_height(4)
2988       .element_size(4)
2989       .iterations(1)
2990       .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
2991   }
2992 
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_1_8_bw_1_8)2993   TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_1_8_bw_1_8) {
2994     TEST_REQUIRES_ARM_NEON;
2995     for(size_t i = 1; i <= 8; ++i){
2996       for(size_t j = 1; j <= 8; ++j){
2997         TransposeMicrokernelTester()
2998           .input_stride(j * 3)
2999           .output_stride(i * 7)
3000           .block_width(j)
3001           .block_height(i)
3002           .element_size(4)
3003           .iterations(1)
3004           .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3005       }
3006     }
3007   }
3008 
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_4_bw_8)3009   TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_4_bw_8) {
3010     TEST_REQUIRES_ARM_NEON;
3011     TransposeMicrokernelTester()
3012       .input_stride(8)
3013       .output_stride(4)
3014       .block_width(8)
3015       .block_height(4)
3016       .element_size(4)
3017       .iterations(1)
3018       .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3019   }
3020 
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_4_bw_5_8)3021   TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_4_bw_5_8) {
3022     TEST_REQUIRES_ARM_NEON;
3023     for(size_t i = 5; i < 8; ++i){
3024       TransposeMicrokernelTester()
3025         .input_stride(i)
3026         .output_stride(8)
3027         .block_width(i)
3028         .block_height(4)
3029         .element_size(4)
3030         .iterations(1)
3031         .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3032     }
3033   }
3034 
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_8_bw_5_8)3035   TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_8_bw_5_8) {
3036     TEST_REQUIRES_ARM_NEON;
3037     for(size_t i = 5; i < 8; ++i){
3038       TransposeMicrokernelTester()
3039         .input_stride(i)
3040         .output_stride(8)
3041         .block_width(i)
3042         .block_height(8)
3043         .element_size(4)
3044         .iterations(1)
3045         .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3046     }
3047   }
3048 
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_8_bw_4)3049   TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_8_bw_4) {
3050     TEST_REQUIRES_ARM_NEON;
3051     TransposeMicrokernelTester()
3052       .input_stride(4)
3053       .output_stride(16)
3054       .block_width(4)
3055       .block_height(8)
3056       .element_size(4)
3057       .iterations(1)
3058       .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3059   }
3060 
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_5_8_bw_4)3061   TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_5_8_bw_4){
3062     TEST_REQUIRES_ARM_NEON;
3063     for(size_t i = 5; i < 8; ++i){
3064       TransposeMicrokernelTester()
3065         .input_stride(21)
3066         .output_stride(i)
3067         .block_width(7)
3068         .block_height(i)
3069         .element_size(4)
3070         .iterations(1)
3071         .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3072     }
3073   }
3074 
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_5_8_bw_8)3075   TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_5_8_bw_8){
3076     TEST_REQUIRES_ARM_NEON;
3077     for(size_t i = 5; i < 8; ++i){
3078       TransposeMicrokernelTester()
3079         .input_stride(8)
3080         .output_stride(i)
3081         .block_width(8)
3082         .block_height(i)
3083         .element_size(4)
3084         .iterations(1)
3085         .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3086     }
3087   }
3088 
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_5_8_bw_5_8)3089   TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_5_8_bw_5_8) {
3090     TEST_REQUIRES_ARM_NEON;
3091     for(size_t i = 5; i < 8; ++i){
3092       for(size_t j = 5; j < 8; ++j){
3093         TransposeMicrokernelTester()
3094           .input_stride(j)
3095           .output_stride(i)
3096           .block_width(j)
3097           .block_height(i)
3098           .element_size(4)
3099           .iterations(1)
3100           .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3101       }
3102     }
3103   }
3104 
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_4_bw_4_is_8)3105   TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_4_bw_4_is_8) {
3106     TEST_REQUIRES_ARM_NEON;
3107     TransposeMicrokernelTester()
3108       .input_stride(8)
3109       .output_stride(4)
3110       .block_width(4)
3111       .block_height(4)
3112       .element_size(4)
3113       .iterations(1)
3114       .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3115   }
3116 
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_4_bw_4_os_8)3117   TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_4_bw_4_os_8) {
3118     TEST_REQUIRES_ARM_NEON;
3119     TransposeMicrokernelTester()
3120       .input_stride(4)
3121       .output_stride(8)
3122       .block_width(4)
3123       .block_height(4)
3124       .element_size(4)
3125       .iterations(1)
3126       .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3127   }
3128 
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_4_bw_4_is_8_os_8)3129   TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_4_bw_4_is_8_os_8) {
3130     TEST_REQUIRES_ARM_NEON;
3131     TransposeMicrokernelTester()
3132       .input_stride(8)
3133       .output_stride(8)
3134       .block_width(4)
3135       .block_height(4)
3136       .element_size(4)
3137       .iterations(1)
3138       .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3139   }
3140 
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_68_bw_76_ies_15)3141   TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_68_bw_76_ies_15) {
3142     TEST_REQUIRES_ARM_NEON;
3143     TransposeMicrokernelTester()
3144       .input_stride(76)
3145       .output_stride(68)
3146       .block_width(76)
3147       .block_height(68)
3148       .element_size(4)
3149       .input_element_stride(15)
3150       .iterations(1)
3151       .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3152   }
3153 
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_12_bw_20_oes_15)3154   TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_12_bw_20_oes_15) {
3155     TEST_REQUIRES_ARM_NEON;
3156     TransposeMicrokernelTester()
3157       .input_stride(20)
3158       .output_stride(12)
3159       .block_width(20)
3160       .block_height(12)
3161       .element_size(4)
3162       .output_element_stride(15)
3163       .iterations(1)
3164       .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3165   }
3166 
TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4,bh_28_bw_92_ies_21_oes_17)3167   TEST(X32_TRANSPOSEC__4X4_AARCH64_NEON_TBL_4, bh_28_bw_92_ies_21_oes_17) {
3168     TEST_REQUIRES_ARM_NEON;
3169     TransposeMicrokernelTester()
3170       .input_stride(97)
3171       .output_stride(34)
3172       .block_width(92)
3173       .block_height(28)
3174       .element_size(4)
3175       .input_element_stride(21)
3176       .output_element_stride(17)
3177       .iterations(1)
3178       .Test(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl);
3179   }
3180 #endif  // XNN_ARCH_ARM64
3181 
3182 
3183 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_4_bw_4)3184   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_4_bw_4) {
3185     TEST_REQUIRES_X86_SSE2;
3186     TransposeMicrokernelTester()
3187       .input_stride(8)
3188       .output_stride(8)
3189       .block_width(4)
3190       .block_height(4)
3191       .element_size(4)
3192       .iterations(1)
3193       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3194   }
3195 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_1_8_bw_1_8)3196   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_1_8_bw_1_8) {
3197     TEST_REQUIRES_X86_SSE2;
3198     for(size_t i = 1; i <= 8; ++i){
3199       for(size_t j = 1; j <= 8; ++j){
3200         TransposeMicrokernelTester()
3201           .input_stride(j * 3)
3202           .output_stride(i * 7)
3203           .block_width(j)
3204           .block_height(i)
3205           .element_size(4)
3206           .iterations(1)
3207           .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3208       }
3209     }
3210   }
3211 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_4_bw_8)3212   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_4_bw_8) {
3213     TEST_REQUIRES_X86_SSE2;
3214     TransposeMicrokernelTester()
3215       .input_stride(8)
3216       .output_stride(4)
3217       .block_width(8)
3218       .block_height(4)
3219       .element_size(4)
3220       .iterations(1)
3221       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3222   }
3223 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_4_bw_5_8)3224   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_4_bw_5_8) {
3225     TEST_REQUIRES_X86_SSE2;
3226     for(size_t i = 5; i < 8; ++i){
3227       TransposeMicrokernelTester()
3228         .input_stride(i)
3229         .output_stride(8)
3230         .block_width(i)
3231         .block_height(4)
3232         .element_size(4)
3233         .iterations(1)
3234         .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3235     }
3236   }
3237 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_8_bw_5_8)3238   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_8_bw_5_8) {
3239     TEST_REQUIRES_X86_SSE2;
3240     for(size_t i = 5; i < 8; ++i){
3241       TransposeMicrokernelTester()
3242         .input_stride(i)
3243         .output_stride(8)
3244         .block_width(i)
3245         .block_height(8)
3246         .element_size(4)
3247         .iterations(1)
3248         .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3249     }
3250   }
3251 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_8_bw_4)3252   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_8_bw_4) {
3253     TEST_REQUIRES_X86_SSE2;
3254     TransposeMicrokernelTester()
3255       .input_stride(4)
3256       .output_stride(16)
3257       .block_width(4)
3258       .block_height(8)
3259       .element_size(4)
3260       .iterations(1)
3261       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3262   }
3263 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_5_8_bw_4)3264   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_5_8_bw_4){
3265     TEST_REQUIRES_X86_SSE2;
3266     for(size_t i = 5; i < 8; ++i){
3267       TransposeMicrokernelTester()
3268         .input_stride(21)
3269         .output_stride(i)
3270         .block_width(7)
3271         .block_height(i)
3272         .element_size(4)
3273         .iterations(1)
3274         .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3275     }
3276   }
3277 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_5_8_bw_8)3278   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_5_8_bw_8){
3279     TEST_REQUIRES_X86_SSE2;
3280     for(size_t i = 5; i < 8; ++i){
3281       TransposeMicrokernelTester()
3282         .input_stride(8)
3283         .output_stride(i)
3284         .block_width(8)
3285         .block_height(i)
3286         .element_size(4)
3287         .iterations(1)
3288         .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3289     }
3290   }
3291 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_5_8_bw_5_8)3292   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_5_8_bw_5_8) {
3293     TEST_REQUIRES_X86_SSE2;
3294     for(size_t i = 5; i < 8; ++i){
3295       for(size_t j = 5; j < 8; ++j){
3296         TransposeMicrokernelTester()
3297           .input_stride(j)
3298           .output_stride(i)
3299           .block_width(j)
3300           .block_height(i)
3301           .element_size(4)
3302           .iterations(1)
3303           .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3304       }
3305     }
3306   }
3307 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_4_bw_4_is_8)3308   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_4_bw_4_is_8) {
3309     TEST_REQUIRES_X86_SSE2;
3310     TransposeMicrokernelTester()
3311       .input_stride(8)
3312       .output_stride(4)
3313       .block_width(4)
3314       .block_height(4)
3315       .element_size(4)
3316       .iterations(1)
3317       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3318   }
3319 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_4_bw_4_os_8)3320   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_4_bw_4_os_8) {
3321     TEST_REQUIRES_X86_SSE2;
3322     TransposeMicrokernelTester()
3323       .input_stride(4)
3324       .output_stride(8)
3325       .block_width(4)
3326       .block_height(4)
3327       .element_size(4)
3328       .iterations(1)
3329       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3330   }
3331 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_4_bw_4_is_8_os_8)3332   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_4_bw_4_is_8_os_8) {
3333     TEST_REQUIRES_X86_SSE2;
3334     TransposeMicrokernelTester()
3335       .input_stride(8)
3336       .output_stride(8)
3337       .block_width(4)
3338       .block_height(4)
3339       .element_size(4)
3340       .iterations(1)
3341       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3342   }
3343 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_68_bw_76_ies_15)3344   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_68_bw_76_ies_15) {
3345     TEST_REQUIRES_X86_SSE2;
3346     TransposeMicrokernelTester()
3347       .input_stride(76)
3348       .output_stride(68)
3349       .block_width(76)
3350       .block_height(68)
3351       .element_size(4)
3352       .input_element_stride(15)
3353       .iterations(1)
3354       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3355   }
3356 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_12_bw_20_oes_15)3357   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_12_bw_20_oes_15) {
3358     TEST_REQUIRES_X86_SSE2;
3359     TransposeMicrokernelTester()
3360       .input_stride(20)
3361       .output_stride(12)
3362       .block_width(20)
3363       .block_height(12)
3364       .element_size(4)
3365       .output_element_stride(15)
3366       .iterations(1)
3367       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3368   }
3369 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4,bh_28_bw_92_ies_21_oes_17)3370   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_SSE2_4, bh_28_bw_92_ies_21_oes_17) {
3371     TEST_REQUIRES_X86_SSE2;
3372     TransposeMicrokernelTester()
3373       .input_stride(97)
3374       .output_stride(34)
3375       .block_width(92)
3376       .block_height(28)
3377       .element_size(4)
3378       .input_element_stride(21)
3379       .output_element_stride(17)
3380       .iterations(1)
3381       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2);
3382   }
3383 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3384 
3385 
3386 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_4_bw_4)3387   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_4_bw_4) {
3388     TEST_REQUIRES_X86_SSE2;
3389     TransposeMicrokernelTester()
3390       .input_stride(8)
3391       .output_stride(8)
3392       .block_width(4)
3393       .block_height(4)
3394       .element_size(4)
3395       .iterations(1)
3396       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3397   }
3398 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_1_8_bw_1_8)3399   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_1_8_bw_1_8) {
3400     TEST_REQUIRES_X86_SSE2;
3401     for(size_t i = 1; i <= 8; ++i){
3402       for(size_t j = 1; j <= 8; ++j){
3403         TransposeMicrokernelTester()
3404           .input_stride(j * 3)
3405           .output_stride(i * 7)
3406           .block_width(j)
3407           .block_height(i)
3408           .element_size(4)
3409           .iterations(1)
3410           .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3411       }
3412     }
3413   }
3414 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_4_bw_8)3415   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_4_bw_8) {
3416     TEST_REQUIRES_X86_SSE2;
3417     TransposeMicrokernelTester()
3418       .input_stride(8)
3419       .output_stride(4)
3420       .block_width(8)
3421       .block_height(4)
3422       .element_size(4)
3423       .iterations(1)
3424       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3425   }
3426 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_4_bw_5_8)3427   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_4_bw_5_8) {
3428     TEST_REQUIRES_X86_SSE2;
3429     for(size_t i = 5; i < 8; ++i){
3430       TransposeMicrokernelTester()
3431         .input_stride(i)
3432         .output_stride(8)
3433         .block_width(i)
3434         .block_height(4)
3435         .element_size(4)
3436         .iterations(1)
3437         .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3438     }
3439   }
3440 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_8_bw_5_8)3441   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_8_bw_5_8) {
3442     TEST_REQUIRES_X86_SSE2;
3443     for(size_t i = 5; i < 8; ++i){
3444       TransposeMicrokernelTester()
3445         .input_stride(i)
3446         .output_stride(8)
3447         .block_width(i)
3448         .block_height(8)
3449         .element_size(4)
3450         .iterations(1)
3451         .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3452     }
3453   }
3454 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_8_bw_4)3455   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_8_bw_4) {
3456     TEST_REQUIRES_X86_SSE2;
3457     TransposeMicrokernelTester()
3458       .input_stride(4)
3459       .output_stride(16)
3460       .block_width(4)
3461       .block_height(8)
3462       .element_size(4)
3463       .iterations(1)
3464       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3465   }
3466 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_5_8_bw_4)3467   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_5_8_bw_4){
3468     TEST_REQUIRES_X86_SSE2;
3469     for(size_t i = 5; i < 8; ++i){
3470       TransposeMicrokernelTester()
3471         .input_stride(21)
3472         .output_stride(i)
3473         .block_width(7)
3474         .block_height(i)
3475         .element_size(4)
3476         .iterations(1)
3477         .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3478     }
3479   }
3480 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_5_8_bw_8)3481   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_5_8_bw_8){
3482     TEST_REQUIRES_X86_SSE2;
3483     for(size_t i = 5; i < 8; ++i){
3484       TransposeMicrokernelTester()
3485         .input_stride(8)
3486         .output_stride(i)
3487         .block_width(8)
3488         .block_height(i)
3489         .element_size(4)
3490         .iterations(1)
3491         .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3492     }
3493   }
3494 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_5_8_bw_5_8)3495   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_5_8_bw_5_8) {
3496     TEST_REQUIRES_X86_SSE2;
3497     for(size_t i = 5; i < 8; ++i){
3498       for(size_t j = 5; j < 8; ++j){
3499         TransposeMicrokernelTester()
3500           .input_stride(j)
3501           .output_stride(i)
3502           .block_width(j)
3503           .block_height(i)
3504           .element_size(4)
3505           .iterations(1)
3506           .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3507       }
3508     }
3509   }
3510 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_4_bw_4_is_8)3511   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_4_bw_4_is_8) {
3512     TEST_REQUIRES_X86_SSE2;
3513     TransposeMicrokernelTester()
3514       .input_stride(8)
3515       .output_stride(4)
3516       .block_width(4)
3517       .block_height(4)
3518       .element_size(4)
3519       .iterations(1)
3520       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3521   }
3522 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_4_bw_4_os_8)3523   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_4_bw_4_os_8) {
3524     TEST_REQUIRES_X86_SSE2;
3525     TransposeMicrokernelTester()
3526       .input_stride(4)
3527       .output_stride(8)
3528       .block_width(4)
3529       .block_height(4)
3530       .element_size(4)
3531       .iterations(1)
3532       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3533   }
3534 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_4_bw_4_is_8_os_8)3535   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_4_bw_4_is_8_os_8) {
3536     TEST_REQUIRES_X86_SSE2;
3537     TransposeMicrokernelTester()
3538       .input_stride(8)
3539       .output_stride(8)
3540       .block_width(4)
3541       .block_height(4)
3542       .element_size(4)
3543       .iterations(1)
3544       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3545   }
3546 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_68_bw_76_ies_15)3547   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_68_bw_76_ies_15) {
3548     TEST_REQUIRES_X86_SSE2;
3549     TransposeMicrokernelTester()
3550       .input_stride(76)
3551       .output_stride(68)
3552       .block_width(76)
3553       .block_height(68)
3554       .element_size(4)
3555       .input_element_stride(15)
3556       .iterations(1)
3557       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3558   }
3559 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_12_bw_20_oes_15)3560   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_12_bw_20_oes_15) {
3561     TEST_REQUIRES_X86_SSE2;
3562     TransposeMicrokernelTester()
3563       .input_stride(20)
3564       .output_stride(12)
3565       .block_width(20)
3566       .block_height(12)
3567       .element_size(4)
3568       .output_element_stride(15)
3569       .iterations(1)
3570       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3571   }
3572 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4,bh_28_bw_92_ies_21_oes_17)3573   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_SSE2_4, bh_28_bw_92_ies_21_oes_17) {
3574     TEST_REQUIRES_X86_SSE2;
3575     TransposeMicrokernelTester()
3576       .input_stride(97)
3577       .output_stride(34)
3578       .block_width(92)
3579       .block_height(28)
3580       .element_size(4)
3581       .input_element_stride(21)
3582       .output_element_stride(17)
3583       .iterations(1)
3584       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2);
3585   }
3586 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3587 
3588 
3589 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_4_bw_4)3590   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_4_bw_4) {
3591     TEST_REQUIRES_X86_SSE2;
3592     TransposeMicrokernelTester()
3593       .input_stride(8)
3594       .output_stride(8)
3595       .block_width(4)
3596       .block_height(4)
3597       .element_size(4)
3598       .iterations(1)
3599       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3600   }
3601 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_1_8_bw_1_8)3602   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_1_8_bw_1_8) {
3603     TEST_REQUIRES_X86_SSE2;
3604     for(size_t i = 1; i <= 8; ++i){
3605       for(size_t j = 1; j <= 8; ++j){
3606         TransposeMicrokernelTester()
3607           .input_stride(j * 3)
3608           .output_stride(i * 7)
3609           .block_width(j)
3610           .block_height(i)
3611           .element_size(4)
3612           .iterations(1)
3613           .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3614       }
3615     }
3616   }
3617 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_4_bw_8)3618   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_4_bw_8) {
3619     TEST_REQUIRES_X86_SSE2;
3620     TransposeMicrokernelTester()
3621       .input_stride(8)
3622       .output_stride(4)
3623       .block_width(8)
3624       .block_height(4)
3625       .element_size(4)
3626       .iterations(1)
3627       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3628   }
3629 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_4_bw_5_8)3630   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_4_bw_5_8) {
3631     TEST_REQUIRES_X86_SSE2;
3632     for(size_t i = 5; i < 8; ++i){
3633       TransposeMicrokernelTester()
3634         .input_stride(i)
3635         .output_stride(8)
3636         .block_width(i)
3637         .block_height(4)
3638         .element_size(4)
3639         .iterations(1)
3640         .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3641     }
3642   }
3643 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_8_bw_5_8)3644   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_8_bw_5_8) {
3645     TEST_REQUIRES_X86_SSE2;
3646     for(size_t i = 5; i < 8; ++i){
3647       TransposeMicrokernelTester()
3648         .input_stride(i)
3649         .output_stride(8)
3650         .block_width(i)
3651         .block_height(8)
3652         .element_size(4)
3653         .iterations(1)
3654         .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3655     }
3656   }
3657 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_8_bw_4)3658   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_8_bw_4) {
3659     TEST_REQUIRES_X86_SSE2;
3660     TransposeMicrokernelTester()
3661       .input_stride(4)
3662       .output_stride(16)
3663       .block_width(4)
3664       .block_height(8)
3665       .element_size(4)
3666       .iterations(1)
3667       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3668   }
3669 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_5_8_bw_4)3670   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_5_8_bw_4){
3671     TEST_REQUIRES_X86_SSE2;
3672     for(size_t i = 5; i < 8; ++i){
3673       TransposeMicrokernelTester()
3674         .input_stride(21)
3675         .output_stride(i)
3676         .block_width(7)
3677         .block_height(i)
3678         .element_size(4)
3679         .iterations(1)
3680         .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3681     }
3682   }
3683 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_5_8_bw_8)3684   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_5_8_bw_8){
3685     TEST_REQUIRES_X86_SSE2;
3686     for(size_t i = 5; i < 8; ++i){
3687       TransposeMicrokernelTester()
3688         .input_stride(8)
3689         .output_stride(i)
3690         .block_width(8)
3691         .block_height(i)
3692         .element_size(4)
3693         .iterations(1)
3694         .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3695     }
3696   }
3697 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_5_8_bw_5_8)3698   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_5_8_bw_5_8) {
3699     TEST_REQUIRES_X86_SSE2;
3700     for(size_t i = 5; i < 8; ++i){
3701       for(size_t j = 5; j < 8; ++j){
3702         TransposeMicrokernelTester()
3703           .input_stride(j)
3704           .output_stride(i)
3705           .block_width(j)
3706           .block_height(i)
3707           .element_size(4)
3708           .iterations(1)
3709           .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3710       }
3711     }
3712   }
3713 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_4_bw_4_is_8)3714   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_4_bw_4_is_8) {
3715     TEST_REQUIRES_X86_SSE2;
3716     TransposeMicrokernelTester()
3717       .input_stride(8)
3718       .output_stride(4)
3719       .block_width(4)
3720       .block_height(4)
3721       .element_size(4)
3722       .iterations(1)
3723       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3724   }
3725 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_4_bw_4_os_8)3726   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_4_bw_4_os_8) {
3727     TEST_REQUIRES_X86_SSE2;
3728     TransposeMicrokernelTester()
3729       .input_stride(4)
3730       .output_stride(8)
3731       .block_width(4)
3732       .block_height(4)
3733       .element_size(4)
3734       .iterations(1)
3735       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3736   }
3737 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_4_bw_4_is_8_os_8)3738   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_4_bw_4_is_8_os_8) {
3739     TEST_REQUIRES_X86_SSE2;
3740     TransposeMicrokernelTester()
3741       .input_stride(8)
3742       .output_stride(8)
3743       .block_width(4)
3744       .block_height(4)
3745       .element_size(4)
3746       .iterations(1)
3747       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3748   }
3749 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_68_bw_76_ies_15)3750   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_68_bw_76_ies_15) {
3751     TEST_REQUIRES_X86_SSE2;
3752     TransposeMicrokernelTester()
3753       .input_stride(76)
3754       .output_stride(68)
3755       .block_width(76)
3756       .block_height(68)
3757       .element_size(4)
3758       .input_element_stride(15)
3759       .iterations(1)
3760       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3761   }
3762 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_12_bw_20_oes_15)3763   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_12_bw_20_oes_15) {
3764     TEST_REQUIRES_X86_SSE2;
3765     TransposeMicrokernelTester()
3766       .input_stride(20)
3767       .output_stride(12)
3768       .block_width(20)
3769       .block_height(12)
3770       .element_size(4)
3771       .output_element_stride(15)
3772       .iterations(1)
3773       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3774   }
3775 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4,bh_28_bw_92_ies_21_oes_17)3776   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_SSE2_4, bh_28_bw_92_ies_21_oes_17) {
3777     TEST_REQUIRES_X86_SSE2;
3778     TransposeMicrokernelTester()
3779       .input_stride(97)
3780       .output_stride(34)
3781       .block_width(92)
3782       .block_height(28)
3783       .element_size(4)
3784       .input_element_stride(21)
3785       .output_element_stride(17)
3786       .iterations(1)
3787       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2);
3788   }
3789 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3790 
3791 
3792 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_4_bw_4)3793   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_4_bw_4) {
3794     TEST_REQUIRES_X86_SSE2;
3795     TransposeMicrokernelTester()
3796       .input_stride(8)
3797       .output_stride(8)
3798       .block_width(4)
3799       .block_height(4)
3800       .element_size(4)
3801       .iterations(1)
3802       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3803   }
3804 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_1_8_bw_1_8)3805   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_1_8_bw_1_8) {
3806     TEST_REQUIRES_X86_SSE2;
3807     for(size_t i = 1; i <= 8; ++i){
3808       for(size_t j = 1; j <= 8; ++j){
3809         TransposeMicrokernelTester()
3810           .input_stride(j * 3)
3811           .output_stride(i * 7)
3812           .block_width(j)
3813           .block_height(i)
3814           .element_size(4)
3815           .iterations(1)
3816           .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3817       }
3818     }
3819   }
3820 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_4_bw_8)3821   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_4_bw_8) {
3822     TEST_REQUIRES_X86_SSE2;
3823     TransposeMicrokernelTester()
3824       .input_stride(8)
3825       .output_stride(4)
3826       .block_width(8)
3827       .block_height(4)
3828       .element_size(4)
3829       .iterations(1)
3830       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3831   }
3832 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_4_bw_5_8)3833   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_4_bw_5_8) {
3834     TEST_REQUIRES_X86_SSE2;
3835     for(size_t i = 5; i < 8; ++i){
3836       TransposeMicrokernelTester()
3837         .input_stride(i)
3838         .output_stride(8)
3839         .block_width(i)
3840         .block_height(4)
3841         .element_size(4)
3842         .iterations(1)
3843         .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3844     }
3845   }
3846 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_8_bw_5_8)3847   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_8_bw_5_8) {
3848     TEST_REQUIRES_X86_SSE2;
3849     for(size_t i = 5; i < 8; ++i){
3850       TransposeMicrokernelTester()
3851         .input_stride(i)
3852         .output_stride(8)
3853         .block_width(i)
3854         .block_height(8)
3855         .element_size(4)
3856         .iterations(1)
3857         .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3858     }
3859   }
3860 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_8_bw_4)3861   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_8_bw_4) {
3862     TEST_REQUIRES_X86_SSE2;
3863     TransposeMicrokernelTester()
3864       .input_stride(4)
3865       .output_stride(16)
3866       .block_width(4)
3867       .block_height(8)
3868       .element_size(4)
3869       .iterations(1)
3870       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3871   }
3872 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_5_8_bw_4)3873   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_5_8_bw_4){
3874     TEST_REQUIRES_X86_SSE2;
3875     for(size_t i = 5; i < 8; ++i){
3876       TransposeMicrokernelTester()
3877         .input_stride(21)
3878         .output_stride(i)
3879         .block_width(7)
3880         .block_height(i)
3881         .element_size(4)
3882         .iterations(1)
3883         .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3884     }
3885   }
3886 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_5_8_bw_8)3887   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_5_8_bw_8){
3888     TEST_REQUIRES_X86_SSE2;
3889     for(size_t i = 5; i < 8; ++i){
3890       TransposeMicrokernelTester()
3891         .input_stride(8)
3892         .output_stride(i)
3893         .block_width(8)
3894         .block_height(i)
3895         .element_size(4)
3896         .iterations(1)
3897         .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3898     }
3899   }
3900 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_5_8_bw_5_8)3901   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_5_8_bw_5_8) {
3902     TEST_REQUIRES_X86_SSE2;
3903     for(size_t i = 5; i < 8; ++i){
3904       for(size_t j = 5; j < 8; ++j){
3905         TransposeMicrokernelTester()
3906           .input_stride(j)
3907           .output_stride(i)
3908           .block_width(j)
3909           .block_height(i)
3910           .element_size(4)
3911           .iterations(1)
3912           .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3913       }
3914     }
3915   }
3916 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_4_bw_4_is_8)3917   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_4_bw_4_is_8) {
3918     TEST_REQUIRES_X86_SSE2;
3919     TransposeMicrokernelTester()
3920       .input_stride(8)
3921       .output_stride(4)
3922       .block_width(4)
3923       .block_height(4)
3924       .element_size(4)
3925       .iterations(1)
3926       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3927   }
3928 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_4_bw_4_os_8)3929   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_4_bw_4_os_8) {
3930     TEST_REQUIRES_X86_SSE2;
3931     TransposeMicrokernelTester()
3932       .input_stride(4)
3933       .output_stride(8)
3934       .block_width(4)
3935       .block_height(4)
3936       .element_size(4)
3937       .iterations(1)
3938       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3939   }
3940 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_4_bw_4_is_8_os_8)3941   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_4_bw_4_is_8_os_8) {
3942     TEST_REQUIRES_X86_SSE2;
3943     TransposeMicrokernelTester()
3944       .input_stride(8)
3945       .output_stride(8)
3946       .block_width(4)
3947       .block_height(4)
3948       .element_size(4)
3949       .iterations(1)
3950       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3951   }
3952 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_68_bw_76_ies_15)3953   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_68_bw_76_ies_15) {
3954     TEST_REQUIRES_X86_SSE2;
3955     TransposeMicrokernelTester()
3956       .input_stride(76)
3957       .output_stride(68)
3958       .block_width(76)
3959       .block_height(68)
3960       .element_size(4)
3961       .input_element_stride(15)
3962       .iterations(1)
3963       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3964   }
3965 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_12_bw_20_oes_15)3966   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_12_bw_20_oes_15) {
3967     TEST_REQUIRES_X86_SSE2;
3968     TransposeMicrokernelTester()
3969       .input_stride(20)
3970       .output_stride(12)
3971       .block_width(20)
3972       .block_height(12)
3973       .element_size(4)
3974       .output_element_stride(15)
3975       .iterations(1)
3976       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3977   }
3978 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4,bh_28_bw_92_ies_21_oes_17)3979   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_SSE2_4, bh_28_bw_92_ies_21_oes_17) {
3980     TEST_REQUIRES_X86_SSE2;
3981     TransposeMicrokernelTester()
3982       .input_stride(97)
3983       .output_stride(34)
3984       .block_width(92)
3985       .block_height(28)
3986       .element_size(4)
3987       .input_element_stride(21)
3988       .output_element_stride(17)
3989       .iterations(1)
3990       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2);
3991   }
3992 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3993 
3994 
3995 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_4_bw_4)3996   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_4_bw_4) {
3997     TEST_REQUIRES_X86_SSE2;
3998     TransposeMicrokernelTester()
3999       .input_stride(8)
4000       .output_stride(8)
4001       .block_width(4)
4002       .block_height(4)
4003       .element_size(4)
4004       .iterations(1)
4005       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4006   }
4007 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_1_8_bw_1_8)4008   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_1_8_bw_1_8) {
4009     TEST_REQUIRES_X86_SSE2;
4010     for(size_t i = 1; i <= 8; ++i){
4011       for(size_t j = 1; j <= 8; ++j){
4012         TransposeMicrokernelTester()
4013           .input_stride(j * 3)
4014           .output_stride(i * 7)
4015           .block_width(j)
4016           .block_height(i)
4017           .element_size(4)
4018           .iterations(1)
4019           .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4020       }
4021     }
4022   }
4023 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_4_bw_8)4024   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_4_bw_8) {
4025     TEST_REQUIRES_X86_SSE2;
4026     TransposeMicrokernelTester()
4027       .input_stride(8)
4028       .output_stride(4)
4029       .block_width(8)
4030       .block_height(4)
4031       .element_size(4)
4032       .iterations(1)
4033       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4034   }
4035 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_4_bw_5_8)4036   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_4_bw_5_8) {
4037     TEST_REQUIRES_X86_SSE2;
4038     for(size_t i = 5; i < 8; ++i){
4039       TransposeMicrokernelTester()
4040         .input_stride(i)
4041         .output_stride(8)
4042         .block_width(i)
4043         .block_height(4)
4044         .element_size(4)
4045         .iterations(1)
4046         .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4047     }
4048   }
4049 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_8_bw_5_8)4050   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_8_bw_5_8) {
4051     TEST_REQUIRES_X86_SSE2;
4052     for(size_t i = 5; i < 8; ++i){
4053       TransposeMicrokernelTester()
4054         .input_stride(i)
4055         .output_stride(8)
4056         .block_width(i)
4057         .block_height(8)
4058         .element_size(4)
4059         .iterations(1)
4060         .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4061     }
4062   }
4063 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_8_bw_4)4064   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_8_bw_4) {
4065     TEST_REQUIRES_X86_SSE2;
4066     TransposeMicrokernelTester()
4067       .input_stride(4)
4068       .output_stride(16)
4069       .block_width(4)
4070       .block_height(8)
4071       .element_size(4)
4072       .iterations(1)
4073       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4074   }
4075 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_5_8_bw_4)4076   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_5_8_bw_4){
4077     TEST_REQUIRES_X86_SSE2;
4078     for(size_t i = 5; i < 8; ++i){
4079       TransposeMicrokernelTester()
4080         .input_stride(21)
4081         .output_stride(i)
4082         .block_width(7)
4083         .block_height(i)
4084         .element_size(4)
4085         .iterations(1)
4086         .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4087     }
4088   }
4089 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_5_8_bw_8)4090   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_5_8_bw_8){
4091     TEST_REQUIRES_X86_SSE2;
4092     for(size_t i = 5; i < 8; ++i){
4093       TransposeMicrokernelTester()
4094         .input_stride(8)
4095         .output_stride(i)
4096         .block_width(8)
4097         .block_height(i)
4098         .element_size(4)
4099         .iterations(1)
4100         .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4101     }
4102   }
4103 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_5_8_bw_5_8)4104   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_5_8_bw_5_8) {
4105     TEST_REQUIRES_X86_SSE2;
4106     for(size_t i = 5; i < 8; ++i){
4107       for(size_t j = 5; j < 8; ++j){
4108         TransposeMicrokernelTester()
4109           .input_stride(j)
4110           .output_stride(i)
4111           .block_width(j)
4112           .block_height(i)
4113           .element_size(4)
4114           .iterations(1)
4115           .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4116       }
4117     }
4118   }
4119 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_4_bw_4_is_8)4120   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_4_bw_4_is_8) {
4121     TEST_REQUIRES_X86_SSE2;
4122     TransposeMicrokernelTester()
4123       .input_stride(8)
4124       .output_stride(4)
4125       .block_width(4)
4126       .block_height(4)
4127       .element_size(4)
4128       .iterations(1)
4129       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4130   }
4131 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_4_bw_4_os_8)4132   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_4_bw_4_os_8) {
4133     TEST_REQUIRES_X86_SSE2;
4134     TransposeMicrokernelTester()
4135       .input_stride(4)
4136       .output_stride(8)
4137       .block_width(4)
4138       .block_height(4)
4139       .element_size(4)
4140       .iterations(1)
4141       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4142   }
4143 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_4_bw_4_is_8_os_8)4144   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_4_bw_4_is_8_os_8) {
4145     TEST_REQUIRES_X86_SSE2;
4146     TransposeMicrokernelTester()
4147       .input_stride(8)
4148       .output_stride(8)
4149       .block_width(4)
4150       .block_height(4)
4151       .element_size(4)
4152       .iterations(1)
4153       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4154   }
4155 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_68_bw_76_ies_15)4156   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_68_bw_76_ies_15) {
4157     TEST_REQUIRES_X86_SSE2;
4158     TransposeMicrokernelTester()
4159       .input_stride(76)
4160       .output_stride(68)
4161       .block_width(76)
4162       .block_height(68)
4163       .element_size(4)
4164       .input_element_stride(15)
4165       .iterations(1)
4166       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4167   }
4168 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_12_bw_20_oes_15)4169   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_12_bw_20_oes_15) {
4170     TEST_REQUIRES_X86_SSE2;
4171     TransposeMicrokernelTester()
4172       .input_stride(20)
4173       .output_stride(12)
4174       .block_width(20)
4175       .block_height(12)
4176       .element_size(4)
4177       .output_element_stride(15)
4178       .iterations(1)
4179       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4180   }
4181 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4,bh_28_bw_92_ies_21_oes_17)4182   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_SSE2_4, bh_28_bw_92_ies_21_oes_17) {
4183     TEST_REQUIRES_X86_SSE2;
4184     TransposeMicrokernelTester()
4185       .input_stride(97)
4186       .output_stride(34)
4187       .block_width(92)
4188       .block_height(28)
4189       .element_size(4)
4190       .input_element_stride(21)
4191       .output_element_stride(17)
4192       .iterations(1)
4193       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2);
4194   }
4195 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4196 
4197 
4198 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_4_bw_4)4199   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_4_bw_4) {
4200     TEST_REQUIRES_X86_SSE2;
4201     TransposeMicrokernelTester()
4202       .input_stride(8)
4203       .output_stride(8)
4204       .block_width(4)
4205       .block_height(4)
4206       .element_size(4)
4207       .iterations(1)
4208       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4209   }
4210 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_1_8_bw_1_8)4211   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_1_8_bw_1_8) {
4212     TEST_REQUIRES_X86_SSE2;
4213     for(size_t i = 1; i <= 8; ++i){
4214       for(size_t j = 1; j <= 8; ++j){
4215         TransposeMicrokernelTester()
4216           .input_stride(j * 3)
4217           .output_stride(i * 7)
4218           .block_width(j)
4219           .block_height(i)
4220           .element_size(4)
4221           .iterations(1)
4222           .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4223       }
4224     }
4225   }
4226 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_4_bw_8)4227   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_4_bw_8) {
4228     TEST_REQUIRES_X86_SSE2;
4229     TransposeMicrokernelTester()
4230       .input_stride(8)
4231       .output_stride(4)
4232       .block_width(8)
4233       .block_height(4)
4234       .element_size(4)
4235       .iterations(1)
4236       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4237   }
4238 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_4_bw_5_8)4239   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_4_bw_5_8) {
4240     TEST_REQUIRES_X86_SSE2;
4241     for(size_t i = 5; i < 8; ++i){
4242       TransposeMicrokernelTester()
4243         .input_stride(i)
4244         .output_stride(8)
4245         .block_width(i)
4246         .block_height(4)
4247         .element_size(4)
4248         .iterations(1)
4249         .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4250     }
4251   }
4252 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_8_bw_5_8)4253   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_8_bw_5_8) {
4254     TEST_REQUIRES_X86_SSE2;
4255     for(size_t i = 5; i < 8; ++i){
4256       TransposeMicrokernelTester()
4257         .input_stride(i)
4258         .output_stride(8)
4259         .block_width(i)
4260         .block_height(8)
4261         .element_size(4)
4262         .iterations(1)
4263         .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4264     }
4265   }
4266 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_8_bw_4)4267   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_8_bw_4) {
4268     TEST_REQUIRES_X86_SSE2;
4269     TransposeMicrokernelTester()
4270       .input_stride(4)
4271       .output_stride(16)
4272       .block_width(4)
4273       .block_height(8)
4274       .element_size(4)
4275       .iterations(1)
4276       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4277   }
4278 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_5_8_bw_4)4279   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_5_8_bw_4){
4280     TEST_REQUIRES_X86_SSE2;
4281     for(size_t i = 5; i < 8; ++i){
4282       TransposeMicrokernelTester()
4283         .input_stride(21)
4284         .output_stride(i)
4285         .block_width(7)
4286         .block_height(i)
4287         .element_size(4)
4288         .iterations(1)
4289         .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4290     }
4291   }
4292 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_5_8_bw_8)4293   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_5_8_bw_8){
4294     TEST_REQUIRES_X86_SSE2;
4295     for(size_t i = 5; i < 8; ++i){
4296       TransposeMicrokernelTester()
4297         .input_stride(8)
4298         .output_stride(i)
4299         .block_width(8)
4300         .block_height(i)
4301         .element_size(4)
4302         .iterations(1)
4303         .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4304     }
4305   }
4306 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_5_8_bw_5_8)4307   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_5_8_bw_5_8) {
4308     TEST_REQUIRES_X86_SSE2;
4309     for(size_t i = 5; i < 8; ++i){
4310       for(size_t j = 5; j < 8; ++j){
4311         TransposeMicrokernelTester()
4312           .input_stride(j)
4313           .output_stride(i)
4314           .block_width(j)
4315           .block_height(i)
4316           .element_size(4)
4317           .iterations(1)
4318           .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4319       }
4320     }
4321   }
4322 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_4_bw_4_is_8)4323   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_4_bw_4_is_8) {
4324     TEST_REQUIRES_X86_SSE2;
4325     TransposeMicrokernelTester()
4326       .input_stride(8)
4327       .output_stride(4)
4328       .block_width(4)
4329       .block_height(4)
4330       .element_size(4)
4331       .iterations(1)
4332       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4333   }
4334 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_4_bw_4_os_8)4335   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_4_bw_4_os_8) {
4336     TEST_REQUIRES_X86_SSE2;
4337     TransposeMicrokernelTester()
4338       .input_stride(4)
4339       .output_stride(8)
4340       .block_width(4)
4341       .block_height(4)
4342       .element_size(4)
4343       .iterations(1)
4344       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4345   }
4346 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_4_bw_4_is_8_os_8)4347   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_4_bw_4_is_8_os_8) {
4348     TEST_REQUIRES_X86_SSE2;
4349     TransposeMicrokernelTester()
4350       .input_stride(8)
4351       .output_stride(8)
4352       .block_width(4)
4353       .block_height(4)
4354       .element_size(4)
4355       .iterations(1)
4356       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4357   }
4358 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_68_bw_76_ies_15)4359   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_68_bw_76_ies_15) {
4360     TEST_REQUIRES_X86_SSE2;
4361     TransposeMicrokernelTester()
4362       .input_stride(76)
4363       .output_stride(68)
4364       .block_width(76)
4365       .block_height(68)
4366       .element_size(4)
4367       .input_element_stride(15)
4368       .iterations(1)
4369       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4370   }
4371 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_12_bw_20_oes_15)4372   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_12_bw_20_oes_15) {
4373     TEST_REQUIRES_X86_SSE2;
4374     TransposeMicrokernelTester()
4375       .input_stride(20)
4376       .output_stride(12)
4377       .block_width(20)
4378       .block_height(12)
4379       .element_size(4)
4380       .output_element_stride(15)
4381       .iterations(1)
4382       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4383   }
4384 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4,bh_28_bw_92_ies_21_oes_17)4385   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_SSE2_4, bh_28_bw_92_ies_21_oes_17) {
4386     TEST_REQUIRES_X86_SSE2;
4387     TransposeMicrokernelTester()
4388       .input_stride(97)
4389       .output_stride(34)
4390       .block_width(92)
4391       .block_height(28)
4392       .element_size(4)
4393       .input_element_stride(21)
4394       .output_element_stride(17)
4395       .iterations(1)
4396       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2);
4397   }
4398 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4399 
4400 
4401 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_4_bw_4)4402   TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_4_bw_4) {
4403     TEST_REQUIRES_X86_SSE;
4404     TransposeMicrokernelTester()
4405       .input_stride(8)
4406       .output_stride(8)
4407       .block_width(4)
4408       .block_height(4)
4409       .element_size(4)
4410       .iterations(1)
4411       .Test(xnn_x32_transposec_ukernel__4x4_sse);
4412   }
4413 
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_1_8_bw_1_8)4414   TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_1_8_bw_1_8) {
4415     TEST_REQUIRES_X86_SSE;
4416     for(size_t i = 1; i <= 8; ++i){
4417       for(size_t j = 1; j <= 8; ++j){
4418         TransposeMicrokernelTester()
4419           .input_stride(j * 3)
4420           .output_stride(i * 7)
4421           .block_width(j)
4422           .block_height(i)
4423           .element_size(4)
4424           .iterations(1)
4425           .Test(xnn_x32_transposec_ukernel__4x4_sse);
4426       }
4427     }
4428   }
4429 
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_4_bw_8)4430   TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_4_bw_8) {
4431     TEST_REQUIRES_X86_SSE;
4432     TransposeMicrokernelTester()
4433       .input_stride(8)
4434       .output_stride(4)
4435       .block_width(8)
4436       .block_height(4)
4437       .element_size(4)
4438       .iterations(1)
4439       .Test(xnn_x32_transposec_ukernel__4x4_sse);
4440   }
4441 
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_4_bw_5_8)4442   TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_4_bw_5_8) {
4443     TEST_REQUIRES_X86_SSE;
4444     for(size_t i = 5; i < 8; ++i){
4445       TransposeMicrokernelTester()
4446         .input_stride(i)
4447         .output_stride(8)
4448         .block_width(i)
4449         .block_height(4)
4450         .element_size(4)
4451         .iterations(1)
4452         .Test(xnn_x32_transposec_ukernel__4x4_sse);
4453     }
4454   }
4455 
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_8_bw_5_8)4456   TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_8_bw_5_8) {
4457     TEST_REQUIRES_X86_SSE;
4458     for(size_t i = 5; i < 8; ++i){
4459       TransposeMicrokernelTester()
4460         .input_stride(i)
4461         .output_stride(8)
4462         .block_width(i)
4463         .block_height(8)
4464         .element_size(4)
4465         .iterations(1)
4466         .Test(xnn_x32_transposec_ukernel__4x4_sse);
4467     }
4468   }
4469 
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_8_bw_4)4470   TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_8_bw_4) {
4471     TEST_REQUIRES_X86_SSE;
4472     TransposeMicrokernelTester()
4473       .input_stride(4)
4474       .output_stride(16)
4475       .block_width(4)
4476       .block_height(8)
4477       .element_size(4)
4478       .iterations(1)
4479       .Test(xnn_x32_transposec_ukernel__4x4_sse);
4480   }
4481 
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_5_8_bw_4)4482   TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_5_8_bw_4){
4483     TEST_REQUIRES_X86_SSE;
4484     for(size_t i = 5; i < 8; ++i){
4485       TransposeMicrokernelTester()
4486         .input_stride(21)
4487         .output_stride(i)
4488         .block_width(7)
4489         .block_height(i)
4490         .element_size(4)
4491         .iterations(1)
4492         .Test(xnn_x32_transposec_ukernel__4x4_sse);
4493     }
4494   }
4495 
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_5_8_bw_8)4496   TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_5_8_bw_8){
4497     TEST_REQUIRES_X86_SSE;
4498     for(size_t i = 5; i < 8; ++i){
4499       TransposeMicrokernelTester()
4500         .input_stride(8)
4501         .output_stride(i)
4502         .block_width(8)
4503         .block_height(i)
4504         .element_size(4)
4505         .iterations(1)
4506         .Test(xnn_x32_transposec_ukernel__4x4_sse);
4507     }
4508   }
4509 
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_5_8_bw_5_8)4510   TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_5_8_bw_5_8) {
4511     TEST_REQUIRES_X86_SSE;
4512     for(size_t i = 5; i < 8; ++i){
4513       for(size_t j = 5; j < 8; ++j){
4514         TransposeMicrokernelTester()
4515           .input_stride(j)
4516           .output_stride(i)
4517           .block_width(j)
4518           .block_height(i)
4519           .element_size(4)
4520           .iterations(1)
4521           .Test(xnn_x32_transposec_ukernel__4x4_sse);
4522       }
4523     }
4524   }
4525 
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_4_bw_4_is_8)4526   TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_4_bw_4_is_8) {
4527     TEST_REQUIRES_X86_SSE;
4528     TransposeMicrokernelTester()
4529       .input_stride(8)
4530       .output_stride(4)
4531       .block_width(4)
4532       .block_height(4)
4533       .element_size(4)
4534       .iterations(1)
4535       .Test(xnn_x32_transposec_ukernel__4x4_sse);
4536   }
4537 
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_4_bw_4_os_8)4538   TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_4_bw_4_os_8) {
4539     TEST_REQUIRES_X86_SSE;
4540     TransposeMicrokernelTester()
4541       .input_stride(4)
4542       .output_stride(8)
4543       .block_width(4)
4544       .block_height(4)
4545       .element_size(4)
4546       .iterations(1)
4547       .Test(xnn_x32_transposec_ukernel__4x4_sse);
4548   }
4549 
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_4_bw_4_is_8_os_8)4550   TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_4_bw_4_is_8_os_8) {
4551     TEST_REQUIRES_X86_SSE;
4552     TransposeMicrokernelTester()
4553       .input_stride(8)
4554       .output_stride(8)
4555       .block_width(4)
4556       .block_height(4)
4557       .element_size(4)
4558       .iterations(1)
4559       .Test(xnn_x32_transposec_ukernel__4x4_sse);
4560   }
4561 
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_68_bw_76_ies_15)4562   TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_68_bw_76_ies_15) {
4563     TEST_REQUIRES_X86_SSE;
4564     TransposeMicrokernelTester()
4565       .input_stride(76)
4566       .output_stride(68)
4567       .block_width(76)
4568       .block_height(68)
4569       .element_size(4)
4570       .input_element_stride(15)
4571       .iterations(1)
4572       .Test(xnn_x32_transposec_ukernel__4x4_sse);
4573   }
4574 
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_12_bw_20_oes_15)4575   TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_12_bw_20_oes_15) {
4576     TEST_REQUIRES_X86_SSE;
4577     TransposeMicrokernelTester()
4578       .input_stride(20)
4579       .output_stride(12)
4580       .block_width(20)
4581       .block_height(12)
4582       .element_size(4)
4583       .output_element_stride(15)
4584       .iterations(1)
4585       .Test(xnn_x32_transposec_ukernel__4x4_sse);
4586   }
4587 
TEST(X32_TRANSPOSEC__4X4_SSE_4,bh_28_bw_92_ies_21_oes_17)4588   TEST(X32_TRANSPOSEC__4X4_SSE_4, bh_28_bw_92_ies_21_oes_17) {
4589     TEST_REQUIRES_X86_SSE;
4590     TransposeMicrokernelTester()
4591       .input_stride(97)
4592       .output_stride(34)
4593       .block_width(92)
4594       .block_height(28)
4595       .element_size(4)
4596       .input_element_stride(21)
4597       .output_element_stride(17)
4598       .iterations(1)
4599       .Test(xnn_x32_transposec_ukernel__4x4_sse);
4600   }
4601 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4602 
4603 
4604 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_4_bw_4)4605   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_4_bw_4) {
4606     TransposeMicrokernelTester()
4607       .input_stride(8)
4608       .output_stride(8)
4609       .block_width(4)
4610       .block_height(4)
4611       .element_size(4)
4612       .iterations(1)
4613       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4614   }
4615 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_1_8_bw_1_8)4616   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_1_8_bw_1_8) {
4617     for(size_t i = 1; i <= 8; ++i){
4618       for(size_t j = 1; j <= 8; ++j){
4619         TransposeMicrokernelTester()
4620           .input_stride(j * 3)
4621           .output_stride(i * 7)
4622           .block_width(j)
4623           .block_height(i)
4624           .element_size(4)
4625           .iterations(1)
4626           .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4627       }
4628     }
4629   }
4630 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_4_bw_8)4631   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_4_bw_8) {
4632     TransposeMicrokernelTester()
4633       .input_stride(8)
4634       .output_stride(4)
4635       .block_width(8)
4636       .block_height(4)
4637       .element_size(4)
4638       .iterations(1)
4639       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4640   }
4641 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_4_bw_5_8)4642   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_4_bw_5_8) {
4643     for(size_t i = 5; i < 8; ++i){
4644       TransposeMicrokernelTester()
4645         .input_stride(i)
4646         .output_stride(8)
4647         .block_width(i)
4648         .block_height(4)
4649         .element_size(4)
4650         .iterations(1)
4651         .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4652     }
4653   }
4654 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_8_bw_5_8)4655   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_8_bw_5_8) {
4656     for(size_t i = 5; i < 8; ++i){
4657       TransposeMicrokernelTester()
4658         .input_stride(i)
4659         .output_stride(8)
4660         .block_width(i)
4661         .block_height(8)
4662         .element_size(4)
4663         .iterations(1)
4664         .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4665     }
4666   }
4667 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_8_bw_4)4668   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_8_bw_4) {
4669     TransposeMicrokernelTester()
4670       .input_stride(4)
4671       .output_stride(16)
4672       .block_width(4)
4673       .block_height(8)
4674       .element_size(4)
4675       .iterations(1)
4676       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4677   }
4678 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_5_8_bw_4)4679   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_5_8_bw_4){
4680     for(size_t i = 5; i < 8; ++i){
4681       TransposeMicrokernelTester()
4682         .input_stride(21)
4683         .output_stride(i)
4684         .block_width(7)
4685         .block_height(i)
4686         .element_size(4)
4687         .iterations(1)
4688         .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4689     }
4690   }
4691 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_5_8_bw_8)4692   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_5_8_bw_8){
4693     for(size_t i = 5; i < 8; ++i){
4694       TransposeMicrokernelTester()
4695         .input_stride(8)
4696         .output_stride(i)
4697         .block_width(8)
4698         .block_height(i)
4699         .element_size(4)
4700         .iterations(1)
4701         .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4702     }
4703   }
4704 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_5_8_bw_5_8)4705   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_5_8_bw_5_8) {
4706     for(size_t i = 5; i < 8; ++i){
4707       for(size_t j = 5; j < 8; ++j){
4708         TransposeMicrokernelTester()
4709           .input_stride(j)
4710           .output_stride(i)
4711           .block_width(j)
4712           .block_height(i)
4713           .element_size(4)
4714           .iterations(1)
4715           .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4716       }
4717     }
4718   }
4719 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_4_bw_4_is_8)4720   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_4_bw_4_is_8) {
4721     TransposeMicrokernelTester()
4722       .input_stride(8)
4723       .output_stride(4)
4724       .block_width(4)
4725       .block_height(4)
4726       .element_size(4)
4727       .iterations(1)
4728       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4729   }
4730 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_4_bw_4_os_8)4731   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_4_bw_4_os_8) {
4732     TransposeMicrokernelTester()
4733       .input_stride(4)
4734       .output_stride(8)
4735       .block_width(4)
4736       .block_height(4)
4737       .element_size(4)
4738       .iterations(1)
4739       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4740   }
4741 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_4_bw_4_is_8_os_8)4742   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_4_bw_4_is_8_os_8) {
4743     TransposeMicrokernelTester()
4744       .input_stride(8)
4745       .output_stride(8)
4746       .block_width(4)
4747       .block_height(4)
4748       .element_size(4)
4749       .iterations(1)
4750       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4751   }
4752 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_68_bw_76_ies_15)4753   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_68_bw_76_ies_15) {
4754     TransposeMicrokernelTester()
4755       .input_stride(76)
4756       .output_stride(68)
4757       .block_width(76)
4758       .block_height(68)
4759       .element_size(4)
4760       .input_element_stride(15)
4761       .iterations(1)
4762       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4763   }
4764 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_12_bw_20_oes_15)4765   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_12_bw_20_oes_15) {
4766     TransposeMicrokernelTester()
4767       .input_stride(20)
4768       .output_stride(12)
4769       .block_width(20)
4770       .block_height(12)
4771       .element_size(4)
4772       .output_element_stride(15)
4773       .iterations(1)
4774       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4775   }
4776 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4,bh_28_bw_92_ies_21_oes_17)4777   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_WASMSIMD_4, bh_28_bw_92_ies_21_oes_17) {
4778     TransposeMicrokernelTester()
4779       .input_stride(97)
4780       .output_stride(34)
4781       .block_width(92)
4782       .block_height(28)
4783       .element_size(4)
4784       .input_element_stride(21)
4785       .output_element_stride(17)
4786       .iterations(1)
4787       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd);
4788   }
4789 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
4790 
4791 
4792 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_4_bw_4)4793   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_4_bw_4) {
4794     TransposeMicrokernelTester()
4795       .input_stride(8)
4796       .output_stride(8)
4797       .block_width(4)
4798       .block_height(4)
4799       .element_size(4)
4800       .iterations(1)
4801       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4802   }
4803 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_1_8_bw_1_8)4804   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_1_8_bw_1_8) {
4805     for(size_t i = 1; i <= 8; ++i){
4806       for(size_t j = 1; j <= 8; ++j){
4807         TransposeMicrokernelTester()
4808           .input_stride(j * 3)
4809           .output_stride(i * 7)
4810           .block_width(j)
4811           .block_height(i)
4812           .element_size(4)
4813           .iterations(1)
4814           .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4815       }
4816     }
4817   }
4818 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_4_bw_8)4819   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_4_bw_8) {
4820     TransposeMicrokernelTester()
4821       .input_stride(8)
4822       .output_stride(4)
4823       .block_width(8)
4824       .block_height(4)
4825       .element_size(4)
4826       .iterations(1)
4827       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4828   }
4829 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_4_bw_5_8)4830   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_4_bw_5_8) {
4831     for(size_t i = 5; i < 8; ++i){
4832       TransposeMicrokernelTester()
4833         .input_stride(i)
4834         .output_stride(8)
4835         .block_width(i)
4836         .block_height(4)
4837         .element_size(4)
4838         .iterations(1)
4839         .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4840     }
4841   }
4842 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_8_bw_5_8)4843   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_8_bw_5_8) {
4844     for(size_t i = 5; i < 8; ++i){
4845       TransposeMicrokernelTester()
4846         .input_stride(i)
4847         .output_stride(8)
4848         .block_width(i)
4849         .block_height(8)
4850         .element_size(4)
4851         .iterations(1)
4852         .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4853     }
4854   }
4855 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_8_bw_4)4856   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_8_bw_4) {
4857     TransposeMicrokernelTester()
4858       .input_stride(4)
4859       .output_stride(16)
4860       .block_width(4)
4861       .block_height(8)
4862       .element_size(4)
4863       .iterations(1)
4864       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4865   }
4866 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_5_8_bw_4)4867   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_5_8_bw_4){
4868     for(size_t i = 5; i < 8; ++i){
4869       TransposeMicrokernelTester()
4870         .input_stride(21)
4871         .output_stride(i)
4872         .block_width(7)
4873         .block_height(i)
4874         .element_size(4)
4875         .iterations(1)
4876         .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4877     }
4878   }
4879 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_5_8_bw_8)4880   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_5_8_bw_8){
4881     for(size_t i = 5; i < 8; ++i){
4882       TransposeMicrokernelTester()
4883         .input_stride(8)
4884         .output_stride(i)
4885         .block_width(8)
4886         .block_height(i)
4887         .element_size(4)
4888         .iterations(1)
4889         .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4890     }
4891   }
4892 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_5_8_bw_5_8)4893   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_5_8_bw_5_8) {
4894     for(size_t i = 5; i < 8; ++i){
4895       for(size_t j = 5; j < 8; ++j){
4896         TransposeMicrokernelTester()
4897           .input_stride(j)
4898           .output_stride(i)
4899           .block_width(j)
4900           .block_height(i)
4901           .element_size(4)
4902           .iterations(1)
4903           .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4904       }
4905     }
4906   }
4907 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_4_bw_4_is_8)4908   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_4_bw_4_is_8) {
4909     TransposeMicrokernelTester()
4910       .input_stride(8)
4911       .output_stride(4)
4912       .block_width(4)
4913       .block_height(4)
4914       .element_size(4)
4915       .iterations(1)
4916       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4917   }
4918 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_4_bw_4_os_8)4919   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_4_bw_4_os_8) {
4920     TransposeMicrokernelTester()
4921       .input_stride(4)
4922       .output_stride(8)
4923       .block_width(4)
4924       .block_height(4)
4925       .element_size(4)
4926       .iterations(1)
4927       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4928   }
4929 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_4_bw_4_is_8_os_8)4930   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_4_bw_4_is_8_os_8) {
4931     TransposeMicrokernelTester()
4932       .input_stride(8)
4933       .output_stride(8)
4934       .block_width(4)
4935       .block_height(4)
4936       .element_size(4)
4937       .iterations(1)
4938       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4939   }
4940 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_68_bw_76_ies_15)4941   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_68_bw_76_ies_15) {
4942     TransposeMicrokernelTester()
4943       .input_stride(76)
4944       .output_stride(68)
4945       .block_width(76)
4946       .block_height(68)
4947       .element_size(4)
4948       .input_element_stride(15)
4949       .iterations(1)
4950       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4951   }
4952 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_12_bw_20_oes_15)4953   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_12_bw_20_oes_15) {
4954     TransposeMicrokernelTester()
4955       .input_stride(20)
4956       .output_stride(12)
4957       .block_width(20)
4958       .block_height(12)
4959       .element_size(4)
4960       .output_element_stride(15)
4961       .iterations(1)
4962       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4963   }
4964 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4,bh_28_bw_92_ies_21_oes_17)4965   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_WASMSIMD_4, bh_28_bw_92_ies_21_oes_17) {
4966     TransposeMicrokernelTester()
4967       .input_stride(97)
4968       .output_stride(34)
4969       .block_width(92)
4970       .block_height(28)
4971       .element_size(4)
4972       .input_element_stride(21)
4973       .output_element_stride(17)
4974       .iterations(1)
4975       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd);
4976   }
4977 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
4978 
4979 
4980 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_4_bw_4)4981   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_4_bw_4) {
4982     TransposeMicrokernelTester()
4983       .input_stride(8)
4984       .output_stride(8)
4985       .block_width(4)
4986       .block_height(4)
4987       .element_size(4)
4988       .iterations(1)
4989       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
4990   }
4991 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_1_8_bw_1_8)4992   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_1_8_bw_1_8) {
4993     for(size_t i = 1; i <= 8; ++i){
4994       for(size_t j = 1; j <= 8; ++j){
4995         TransposeMicrokernelTester()
4996           .input_stride(j * 3)
4997           .output_stride(i * 7)
4998           .block_width(j)
4999           .block_height(i)
5000           .element_size(4)
5001           .iterations(1)
5002           .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5003       }
5004     }
5005   }
5006 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_4_bw_8)5007   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_4_bw_8) {
5008     TransposeMicrokernelTester()
5009       .input_stride(8)
5010       .output_stride(4)
5011       .block_width(8)
5012       .block_height(4)
5013       .element_size(4)
5014       .iterations(1)
5015       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5016   }
5017 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_4_bw_5_8)5018   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_4_bw_5_8) {
5019     for(size_t i = 5; i < 8; ++i){
5020       TransposeMicrokernelTester()
5021         .input_stride(i)
5022         .output_stride(8)
5023         .block_width(i)
5024         .block_height(4)
5025         .element_size(4)
5026         .iterations(1)
5027         .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5028     }
5029   }
5030 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_8_bw_5_8)5031   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_8_bw_5_8) {
5032     for(size_t i = 5; i < 8; ++i){
5033       TransposeMicrokernelTester()
5034         .input_stride(i)
5035         .output_stride(8)
5036         .block_width(i)
5037         .block_height(8)
5038         .element_size(4)
5039         .iterations(1)
5040         .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5041     }
5042   }
5043 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_8_bw_4)5044   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_8_bw_4) {
5045     TransposeMicrokernelTester()
5046       .input_stride(4)
5047       .output_stride(16)
5048       .block_width(4)
5049       .block_height(8)
5050       .element_size(4)
5051       .iterations(1)
5052       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5053   }
5054 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_5_8_bw_4)5055   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_5_8_bw_4){
5056     for(size_t i = 5; i < 8; ++i){
5057       TransposeMicrokernelTester()
5058         .input_stride(21)
5059         .output_stride(i)
5060         .block_width(7)
5061         .block_height(i)
5062         .element_size(4)
5063         .iterations(1)
5064         .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5065     }
5066   }
5067 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_5_8_bw_8)5068   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_5_8_bw_8){
5069     for(size_t i = 5; i < 8; ++i){
5070       TransposeMicrokernelTester()
5071         .input_stride(8)
5072         .output_stride(i)
5073         .block_width(8)
5074         .block_height(i)
5075         .element_size(4)
5076         .iterations(1)
5077         .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5078     }
5079   }
5080 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_5_8_bw_5_8)5081   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_5_8_bw_5_8) {
5082     for(size_t i = 5; i < 8; ++i){
5083       for(size_t j = 5; j < 8; ++j){
5084         TransposeMicrokernelTester()
5085           .input_stride(j)
5086           .output_stride(i)
5087           .block_width(j)
5088           .block_height(i)
5089           .element_size(4)
5090           .iterations(1)
5091           .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5092       }
5093     }
5094   }
5095 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_4_bw_4_is_8)5096   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_4_bw_4_is_8) {
5097     TransposeMicrokernelTester()
5098       .input_stride(8)
5099       .output_stride(4)
5100       .block_width(4)
5101       .block_height(4)
5102       .element_size(4)
5103       .iterations(1)
5104       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5105   }
5106 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_4_bw_4_os_8)5107   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_4_bw_4_os_8) {
5108     TransposeMicrokernelTester()
5109       .input_stride(4)
5110       .output_stride(8)
5111       .block_width(4)
5112       .block_height(4)
5113       .element_size(4)
5114       .iterations(1)
5115       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5116   }
5117 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_4_bw_4_is_8_os_8)5118   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_4_bw_4_is_8_os_8) {
5119     TransposeMicrokernelTester()
5120       .input_stride(8)
5121       .output_stride(8)
5122       .block_width(4)
5123       .block_height(4)
5124       .element_size(4)
5125       .iterations(1)
5126       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5127   }
5128 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_68_bw_76_ies_15)5129   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_68_bw_76_ies_15) {
5130     TransposeMicrokernelTester()
5131       .input_stride(76)
5132       .output_stride(68)
5133       .block_width(76)
5134       .block_height(68)
5135       .element_size(4)
5136       .input_element_stride(15)
5137       .iterations(1)
5138       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5139   }
5140 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_12_bw_20_oes_15)5141   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_12_bw_20_oes_15) {
5142     TransposeMicrokernelTester()
5143       .input_stride(20)
5144       .output_stride(12)
5145       .block_width(20)
5146       .block_height(12)
5147       .element_size(4)
5148       .output_element_stride(15)
5149       .iterations(1)
5150       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5151   }
5152 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4,bh_28_bw_92_ies_21_oes_17)5153   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_WASMSIMD_4, bh_28_bw_92_ies_21_oes_17) {
5154     TransposeMicrokernelTester()
5155       .input_stride(97)
5156       .output_stride(34)
5157       .block_width(92)
5158       .block_height(28)
5159       .element_size(4)
5160       .input_element_stride(21)
5161       .output_element_stride(17)
5162       .iterations(1)
5163       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd);
5164   }
5165 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5166 
5167 
5168 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_4_bw_4)5169   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_4_bw_4) {
5170     TransposeMicrokernelTester()
5171       .input_stride(8)
5172       .output_stride(8)
5173       .block_width(4)
5174       .block_height(4)
5175       .element_size(4)
5176       .iterations(1)
5177       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5178   }
5179 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_1_8_bw_1_8)5180   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_1_8_bw_1_8) {
5181     for(size_t i = 1; i <= 8; ++i){
5182       for(size_t j = 1; j <= 8; ++j){
5183         TransposeMicrokernelTester()
5184           .input_stride(j * 3)
5185           .output_stride(i * 7)
5186           .block_width(j)
5187           .block_height(i)
5188           .element_size(4)
5189           .iterations(1)
5190           .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5191       }
5192     }
5193   }
5194 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_4_bw_8)5195   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_4_bw_8) {
5196     TransposeMicrokernelTester()
5197       .input_stride(8)
5198       .output_stride(4)
5199       .block_width(8)
5200       .block_height(4)
5201       .element_size(4)
5202       .iterations(1)
5203       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5204   }
5205 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_4_bw_5_8)5206   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_4_bw_5_8) {
5207     for(size_t i = 5; i < 8; ++i){
5208       TransposeMicrokernelTester()
5209         .input_stride(i)
5210         .output_stride(8)
5211         .block_width(i)
5212         .block_height(4)
5213         .element_size(4)
5214         .iterations(1)
5215         .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5216     }
5217   }
5218 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_8_bw_5_8)5219   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_8_bw_5_8) {
5220     for(size_t i = 5; i < 8; ++i){
5221       TransposeMicrokernelTester()
5222         .input_stride(i)
5223         .output_stride(8)
5224         .block_width(i)
5225         .block_height(8)
5226         .element_size(4)
5227         .iterations(1)
5228         .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5229     }
5230   }
5231 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_8_bw_4)5232   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_8_bw_4) {
5233     TransposeMicrokernelTester()
5234       .input_stride(4)
5235       .output_stride(16)
5236       .block_width(4)
5237       .block_height(8)
5238       .element_size(4)
5239       .iterations(1)
5240       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5241   }
5242 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_5_8_bw_4)5243   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_5_8_bw_4){
5244     for(size_t i = 5; i < 8; ++i){
5245       TransposeMicrokernelTester()
5246         .input_stride(21)
5247         .output_stride(i)
5248         .block_width(7)
5249         .block_height(i)
5250         .element_size(4)
5251         .iterations(1)
5252         .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5253     }
5254   }
5255 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_5_8_bw_8)5256   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_5_8_bw_8){
5257     for(size_t i = 5; i < 8; ++i){
5258       TransposeMicrokernelTester()
5259         .input_stride(8)
5260         .output_stride(i)
5261         .block_width(8)
5262         .block_height(i)
5263         .element_size(4)
5264         .iterations(1)
5265         .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5266     }
5267   }
5268 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_5_8_bw_5_8)5269   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_5_8_bw_5_8) {
5270     for(size_t i = 5; i < 8; ++i){
5271       for(size_t j = 5; j < 8; ++j){
5272         TransposeMicrokernelTester()
5273           .input_stride(j)
5274           .output_stride(i)
5275           .block_width(j)
5276           .block_height(i)
5277           .element_size(4)
5278           .iterations(1)
5279           .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5280       }
5281     }
5282   }
5283 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_4_bw_4_is_8)5284   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_4_bw_4_is_8) {
5285     TransposeMicrokernelTester()
5286       .input_stride(8)
5287       .output_stride(4)
5288       .block_width(4)
5289       .block_height(4)
5290       .element_size(4)
5291       .iterations(1)
5292       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5293   }
5294 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_4_bw_4_os_8)5295   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_4_bw_4_os_8) {
5296     TransposeMicrokernelTester()
5297       .input_stride(4)
5298       .output_stride(8)
5299       .block_width(4)
5300       .block_height(4)
5301       .element_size(4)
5302       .iterations(1)
5303       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5304   }
5305 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_4_bw_4_is_8_os_8)5306   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_4_bw_4_is_8_os_8) {
5307     TransposeMicrokernelTester()
5308       .input_stride(8)
5309       .output_stride(8)
5310       .block_width(4)
5311       .block_height(4)
5312       .element_size(4)
5313       .iterations(1)
5314       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5315   }
5316 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_68_bw_76_ies_15)5317   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_68_bw_76_ies_15) {
5318     TransposeMicrokernelTester()
5319       .input_stride(76)
5320       .output_stride(68)
5321       .block_width(76)
5322       .block_height(68)
5323       .element_size(4)
5324       .input_element_stride(15)
5325       .iterations(1)
5326       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5327   }
5328 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_12_bw_20_oes_15)5329   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_12_bw_20_oes_15) {
5330     TransposeMicrokernelTester()
5331       .input_stride(20)
5332       .output_stride(12)
5333       .block_width(20)
5334       .block_height(12)
5335       .element_size(4)
5336       .output_element_stride(15)
5337       .iterations(1)
5338       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5339   }
5340 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4,bh_28_bw_92_ies_21_oes_17)5341   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_WASMSIMD_4, bh_28_bw_92_ies_21_oes_17) {
5342     TransposeMicrokernelTester()
5343       .input_stride(97)
5344       .output_stride(34)
5345       .block_width(92)
5346       .block_height(28)
5347       .element_size(4)
5348       .input_element_stride(21)
5349       .output_element_stride(17)
5350       .iterations(1)
5351       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd);
5352   }
5353 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5354 
5355 
5356 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_4_bw_4)5357   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_4_bw_4) {
5358     TransposeMicrokernelTester()
5359       .input_stride(8)
5360       .output_stride(8)
5361       .block_width(4)
5362       .block_height(4)
5363       .element_size(4)
5364       .iterations(1)
5365       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5366   }
5367 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_1_8_bw_1_8)5368   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_1_8_bw_1_8) {
5369     for(size_t i = 1; i <= 8; ++i){
5370       for(size_t j = 1; j <= 8; ++j){
5371         TransposeMicrokernelTester()
5372           .input_stride(j * 3)
5373           .output_stride(i * 7)
5374           .block_width(j)
5375           .block_height(i)
5376           .element_size(4)
5377           .iterations(1)
5378           .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5379       }
5380     }
5381   }
5382 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_4_bw_8)5383   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_4_bw_8) {
5384     TransposeMicrokernelTester()
5385       .input_stride(8)
5386       .output_stride(4)
5387       .block_width(8)
5388       .block_height(4)
5389       .element_size(4)
5390       .iterations(1)
5391       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5392   }
5393 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_4_bw_5_8)5394   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_4_bw_5_8) {
5395     for(size_t i = 5; i < 8; ++i){
5396       TransposeMicrokernelTester()
5397         .input_stride(i)
5398         .output_stride(8)
5399         .block_width(i)
5400         .block_height(4)
5401         .element_size(4)
5402         .iterations(1)
5403         .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5404     }
5405   }
5406 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_8_bw_5_8)5407   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_8_bw_5_8) {
5408     for(size_t i = 5; i < 8; ++i){
5409       TransposeMicrokernelTester()
5410         .input_stride(i)
5411         .output_stride(8)
5412         .block_width(i)
5413         .block_height(8)
5414         .element_size(4)
5415         .iterations(1)
5416         .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5417     }
5418   }
5419 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_8_bw_4)5420   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_8_bw_4) {
5421     TransposeMicrokernelTester()
5422       .input_stride(4)
5423       .output_stride(16)
5424       .block_width(4)
5425       .block_height(8)
5426       .element_size(4)
5427       .iterations(1)
5428       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5429   }
5430 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_5_8_bw_4)5431   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_5_8_bw_4){
5432     for(size_t i = 5; i < 8; ++i){
5433       TransposeMicrokernelTester()
5434         .input_stride(21)
5435         .output_stride(i)
5436         .block_width(7)
5437         .block_height(i)
5438         .element_size(4)
5439         .iterations(1)
5440         .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5441     }
5442   }
5443 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_5_8_bw_8)5444   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_5_8_bw_8){
5445     for(size_t i = 5; i < 8; ++i){
5446       TransposeMicrokernelTester()
5447         .input_stride(8)
5448         .output_stride(i)
5449         .block_width(8)
5450         .block_height(i)
5451         .element_size(4)
5452         .iterations(1)
5453         .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5454     }
5455   }
5456 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_5_8_bw_5_8)5457   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_5_8_bw_5_8) {
5458     for(size_t i = 5; i < 8; ++i){
5459       for(size_t j = 5; j < 8; ++j){
5460         TransposeMicrokernelTester()
5461           .input_stride(j)
5462           .output_stride(i)
5463           .block_width(j)
5464           .block_height(i)
5465           .element_size(4)
5466           .iterations(1)
5467           .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5468       }
5469     }
5470   }
5471 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_4_bw_4_is_8)5472   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_4_bw_4_is_8) {
5473     TransposeMicrokernelTester()
5474       .input_stride(8)
5475       .output_stride(4)
5476       .block_width(4)
5477       .block_height(4)
5478       .element_size(4)
5479       .iterations(1)
5480       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5481   }
5482 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_4_bw_4_os_8)5483   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_4_bw_4_os_8) {
5484     TransposeMicrokernelTester()
5485       .input_stride(4)
5486       .output_stride(8)
5487       .block_width(4)
5488       .block_height(4)
5489       .element_size(4)
5490       .iterations(1)
5491       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5492   }
5493 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_4_bw_4_is_8_os_8)5494   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_4_bw_4_is_8_os_8) {
5495     TransposeMicrokernelTester()
5496       .input_stride(8)
5497       .output_stride(8)
5498       .block_width(4)
5499       .block_height(4)
5500       .element_size(4)
5501       .iterations(1)
5502       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5503   }
5504 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_68_bw_76_ies_15)5505   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_68_bw_76_ies_15) {
5506     TransposeMicrokernelTester()
5507       .input_stride(76)
5508       .output_stride(68)
5509       .block_width(76)
5510       .block_height(68)
5511       .element_size(4)
5512       .input_element_stride(15)
5513       .iterations(1)
5514       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5515   }
5516 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_12_bw_20_oes_15)5517   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_12_bw_20_oes_15) {
5518     TransposeMicrokernelTester()
5519       .input_stride(20)
5520       .output_stride(12)
5521       .block_width(20)
5522       .block_height(12)
5523       .element_size(4)
5524       .output_element_stride(15)
5525       .iterations(1)
5526       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5527   }
5528 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4,bh_28_bw_92_ies_21_oes_17)5529   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_WASMSIMD_4, bh_28_bw_92_ies_21_oes_17) {
5530     TransposeMicrokernelTester()
5531       .input_stride(97)
5532       .output_stride(34)
5533       .block_width(92)
5534       .block_height(28)
5535       .element_size(4)
5536       .input_element_stride(21)
5537       .output_element_stride(17)
5538       .iterations(1)
5539       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd);
5540   }
5541 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5542 
5543 
5544 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_4_bw_4)5545   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_4_bw_4) {
5546     TransposeMicrokernelTester()
5547       .input_stride(8)
5548       .output_stride(8)
5549       .block_width(4)
5550       .block_height(4)
5551       .element_size(4)
5552       .iterations(1)
5553       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5554   }
5555 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_1_8_bw_1_8)5556   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_1_8_bw_1_8) {
5557     for(size_t i = 1; i <= 8; ++i){
5558       for(size_t j = 1; j <= 8; ++j){
5559         TransposeMicrokernelTester()
5560           .input_stride(j * 3)
5561           .output_stride(i * 7)
5562           .block_width(j)
5563           .block_height(i)
5564           .element_size(4)
5565           .iterations(1)
5566           .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5567       }
5568     }
5569   }
5570 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_4_bw_8)5571   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_4_bw_8) {
5572     TransposeMicrokernelTester()
5573       .input_stride(8)
5574       .output_stride(4)
5575       .block_width(8)
5576       .block_height(4)
5577       .element_size(4)
5578       .iterations(1)
5579       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5580   }
5581 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_4_bw_5_8)5582   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_4_bw_5_8) {
5583     for(size_t i = 5; i < 8; ++i){
5584       TransposeMicrokernelTester()
5585         .input_stride(i)
5586         .output_stride(8)
5587         .block_width(i)
5588         .block_height(4)
5589         .element_size(4)
5590         .iterations(1)
5591         .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5592     }
5593   }
5594 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_8_bw_5_8)5595   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_8_bw_5_8) {
5596     for(size_t i = 5; i < 8; ++i){
5597       TransposeMicrokernelTester()
5598         .input_stride(i)
5599         .output_stride(8)
5600         .block_width(i)
5601         .block_height(8)
5602         .element_size(4)
5603         .iterations(1)
5604         .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5605     }
5606   }
5607 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_8_bw_4)5608   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_8_bw_4) {
5609     TransposeMicrokernelTester()
5610       .input_stride(4)
5611       .output_stride(16)
5612       .block_width(4)
5613       .block_height(8)
5614       .element_size(4)
5615       .iterations(1)
5616       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5617   }
5618 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_5_8_bw_4)5619   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_5_8_bw_4){
5620     for(size_t i = 5; i < 8; ++i){
5621       TransposeMicrokernelTester()
5622         .input_stride(21)
5623         .output_stride(i)
5624         .block_width(7)
5625         .block_height(i)
5626         .element_size(4)
5627         .iterations(1)
5628         .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5629     }
5630   }
5631 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_5_8_bw_8)5632   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_5_8_bw_8){
5633     for(size_t i = 5; i < 8; ++i){
5634       TransposeMicrokernelTester()
5635         .input_stride(8)
5636         .output_stride(i)
5637         .block_width(8)
5638         .block_height(i)
5639         .element_size(4)
5640         .iterations(1)
5641         .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5642     }
5643   }
5644 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_5_8_bw_5_8)5645   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_5_8_bw_5_8) {
5646     for(size_t i = 5; i < 8; ++i){
5647       for(size_t j = 5; j < 8; ++j){
5648         TransposeMicrokernelTester()
5649           .input_stride(j)
5650           .output_stride(i)
5651           .block_width(j)
5652           .block_height(i)
5653           .element_size(4)
5654           .iterations(1)
5655           .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5656       }
5657     }
5658   }
5659 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_4_bw_4_is_8)5660   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_4_bw_4_is_8) {
5661     TransposeMicrokernelTester()
5662       .input_stride(8)
5663       .output_stride(4)
5664       .block_width(4)
5665       .block_height(4)
5666       .element_size(4)
5667       .iterations(1)
5668       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5669   }
5670 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_4_bw_4_os_8)5671   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_4_bw_4_os_8) {
5672     TransposeMicrokernelTester()
5673       .input_stride(4)
5674       .output_stride(8)
5675       .block_width(4)
5676       .block_height(4)
5677       .element_size(4)
5678       .iterations(1)
5679       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5680   }
5681 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_4_bw_4_is_8_os_8)5682   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_4_bw_4_is_8_os_8) {
5683     TransposeMicrokernelTester()
5684       .input_stride(8)
5685       .output_stride(8)
5686       .block_width(4)
5687       .block_height(4)
5688       .element_size(4)
5689       .iterations(1)
5690       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5691   }
5692 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_68_bw_76_ies_15)5693   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_68_bw_76_ies_15) {
5694     TransposeMicrokernelTester()
5695       .input_stride(76)
5696       .output_stride(68)
5697       .block_width(76)
5698       .block_height(68)
5699       .element_size(4)
5700       .input_element_stride(15)
5701       .iterations(1)
5702       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5703   }
5704 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_12_bw_20_oes_15)5705   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_12_bw_20_oes_15) {
5706     TransposeMicrokernelTester()
5707       .input_stride(20)
5708       .output_stride(12)
5709       .block_width(20)
5710       .block_height(12)
5711       .element_size(4)
5712       .output_element_stride(15)
5713       .iterations(1)
5714       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5715   }
5716 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4,bh_28_bw_92_ies_21_oes_17)5717   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_WASMSIMD_4, bh_28_bw_92_ies_21_oes_17) {
5718     TransposeMicrokernelTester()
5719       .input_stride(97)
5720       .output_stride(34)
5721       .block_width(92)
5722       .block_height(28)
5723       .element_size(4)
5724       .input_element_stride(21)
5725       .output_element_stride(17)
5726       .iterations(1)
5727       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd);
5728   }
5729 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5730 
5731 
5732 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_2_bw_2)5733   TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_2_bw_2) {
5734     TEST_REQUIRES_ARM_NEON;
5735     TransposeMicrokernelTester()
5736       .input_stride(4)
5737       .output_stride(4)
5738       .block_width(2)
5739       .block_height(2)
5740       .element_size(4)
5741       .iterations(1)
5742       .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5743   }
5744 
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_1_4_bw_1_4)5745   TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_1_4_bw_1_4) {
5746     TEST_REQUIRES_ARM_NEON;
5747     for(size_t i = 1; i <= 4; ++i){
5748       for(size_t j = 1; j <= 4; ++j){
5749         TransposeMicrokernelTester()
5750           .input_stride(j * 3)
5751           .output_stride(i * 7)
5752           .block_width(j)
5753           .block_height(i)
5754           .element_size(4)
5755           .iterations(1)
5756           .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5757       }
5758     }
5759   }
5760 
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_2_bw_4)5761   TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_2_bw_4) {
5762     TEST_REQUIRES_ARM_NEON;
5763     TransposeMicrokernelTester()
5764       .input_stride(4)
5765       .output_stride(2)
5766       .block_width(4)
5767       .block_height(2)
5768       .element_size(4)
5769       .iterations(1)
5770       .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5771   }
5772 
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_2_bw_3_4)5773   TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_2_bw_3_4) {
5774     TEST_REQUIRES_ARM_NEON;
5775     for(size_t i = 3; i < 4; ++i){
5776       TransposeMicrokernelTester()
5777         .input_stride(i)
5778         .output_stride(4)
5779         .block_width(i)
5780         .block_height(2)
5781         .element_size(4)
5782         .iterations(1)
5783         .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5784     }
5785   }
5786 
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_4_bw_3_4)5787   TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_4_bw_3_4) {
5788     TEST_REQUIRES_ARM_NEON;
5789     for(size_t i = 3; i < 4; ++i){
5790       TransposeMicrokernelTester()
5791         .input_stride(i)
5792         .output_stride(4)
5793         .block_width(i)
5794         .block_height(4)
5795         .element_size(4)
5796         .iterations(1)
5797         .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5798     }
5799   }
5800 
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_4_bw_2)5801   TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_4_bw_2) {
5802     TEST_REQUIRES_ARM_NEON;
5803     TransposeMicrokernelTester()
5804       .input_stride(2)
5805       .output_stride(10)
5806       .block_width(2)
5807       .block_height(4)
5808       .element_size(4)
5809       .iterations(1)
5810       .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5811   }
5812 
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_3_4_bw_2)5813   TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_3_4_bw_2){
5814     TEST_REQUIRES_ARM_NEON;
5815     for(size_t i = 3; i < 4; ++i){
5816       TransposeMicrokernelTester()
5817         .input_stride(19)
5818         .output_stride(i)
5819         .block_width(5)
5820         .block_height(i)
5821         .element_size(4)
5822         .iterations(1)
5823         .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5824     }
5825   }
5826 
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_3_4_bw_4)5827   TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_3_4_bw_4){
5828     TEST_REQUIRES_ARM_NEON;
5829     for(size_t i = 3; i < 4; ++i){
5830       TransposeMicrokernelTester()
5831         .input_stride(4)
5832         .output_stride(i)
5833         .block_width(4)
5834         .block_height(i)
5835         .element_size(4)
5836         .iterations(1)
5837         .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5838     }
5839   }
5840 
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_3_4_bw_3_4)5841   TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_3_4_bw_3_4) {
5842     TEST_REQUIRES_ARM_NEON;
5843     for(size_t i = 3; i < 4; ++i){
5844       for(size_t j = 3; j < 4; ++j){
5845         TransposeMicrokernelTester()
5846           .input_stride(j)
5847           .output_stride(i)
5848           .block_width(j)
5849           .block_height(i)
5850           .element_size(4)
5851           .iterations(1)
5852           .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5853       }
5854     }
5855   }
5856 
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_2_bw_2_is_4)5857   TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_2_bw_2_is_4) {
5858     TEST_REQUIRES_ARM_NEON;
5859     TransposeMicrokernelTester()
5860       .input_stride(4)
5861       .output_stride(2)
5862       .block_width(2)
5863       .block_height(2)
5864       .element_size(4)
5865       .iterations(1)
5866       .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5867   }
5868 
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_2_bw_2_os_4)5869   TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_2_bw_2_os_4) {
5870     TEST_REQUIRES_ARM_NEON;
5871     TransposeMicrokernelTester()
5872       .input_stride(2)
5873       .output_stride(4)
5874       .block_width(2)
5875       .block_height(2)
5876       .element_size(4)
5877       .iterations(1)
5878       .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5879   }
5880 
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_2_bw_2_is_4_os_4)5881   TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_2_bw_2_is_4_os_4) {
5882     TEST_REQUIRES_ARM_NEON;
5883     TransposeMicrokernelTester()
5884       .input_stride(4)
5885       .output_stride(4)
5886       .block_width(2)
5887       .block_height(2)
5888       .element_size(4)
5889       .iterations(1)
5890       .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5891   }
5892 
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_34_bw_38_ies_15)5893   TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_34_bw_38_ies_15) {
5894     TEST_REQUIRES_ARM_NEON;
5895     TransposeMicrokernelTester()
5896       .input_stride(38)
5897       .output_stride(34)
5898       .block_width(38)
5899       .block_height(34)
5900       .element_size(4)
5901       .input_element_stride(15)
5902       .iterations(1)
5903       .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5904   }
5905 
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_6_bw_10_oes_15)5906   TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_6_bw_10_oes_15) {
5907     TEST_REQUIRES_ARM_NEON;
5908     TransposeMicrokernelTester()
5909       .input_stride(10)
5910       .output_stride(6)
5911       .block_width(10)
5912       .block_height(6)
5913       .element_size(4)
5914       .output_element_stride(15)
5915       .iterations(1)
5916       .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5917   }
5918 
TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4,bh_14_bw_46_ies_21_oes_17)5919   TEST(X32_TRANSPOSEC__2X2_MULTI_DEC_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
5920     TEST_REQUIRES_ARM_NEON;
5921     TransposeMicrokernelTester()
5922       .input_stride(51)
5923       .output_stride(20)
5924       .block_width(46)
5925       .block_height(14)
5926       .element_size(4)
5927       .input_element_stride(21)
5928       .output_element_stride(17)
5929       .iterations(1)
5930       .Test(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon);
5931   }
5932 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
5933 
5934 
5935 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_2_bw_2)5936   TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_2_bw_2) {
5937     TEST_REQUIRES_ARM_NEON;
5938     TransposeMicrokernelTester()
5939       .input_stride(4)
5940       .output_stride(4)
5941       .block_width(2)
5942       .block_height(2)
5943       .element_size(4)
5944       .iterations(1)
5945       .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
5946   }
5947 
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_1_4_bw_1_4)5948   TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_1_4_bw_1_4) {
5949     TEST_REQUIRES_ARM_NEON;
5950     for(size_t i = 1; i <= 4; ++i){
5951       for(size_t j = 1; j <= 4; ++j){
5952         TransposeMicrokernelTester()
5953           .input_stride(j * 3)
5954           .output_stride(i * 7)
5955           .block_width(j)
5956           .block_height(i)
5957           .element_size(4)
5958           .iterations(1)
5959           .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
5960       }
5961     }
5962   }
5963 
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_2_bw_4)5964   TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_2_bw_4) {
5965     TEST_REQUIRES_ARM_NEON;
5966     TransposeMicrokernelTester()
5967       .input_stride(4)
5968       .output_stride(2)
5969       .block_width(4)
5970       .block_height(2)
5971       .element_size(4)
5972       .iterations(1)
5973       .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
5974   }
5975 
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_2_bw_3_4)5976   TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_2_bw_3_4) {
5977     TEST_REQUIRES_ARM_NEON;
5978     for(size_t i = 3; i < 4; ++i){
5979       TransposeMicrokernelTester()
5980         .input_stride(i)
5981         .output_stride(4)
5982         .block_width(i)
5983         .block_height(2)
5984         .element_size(4)
5985         .iterations(1)
5986         .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
5987     }
5988   }
5989 
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_4_bw_3_4)5990   TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_4_bw_3_4) {
5991     TEST_REQUIRES_ARM_NEON;
5992     for(size_t i = 3; i < 4; ++i){
5993       TransposeMicrokernelTester()
5994         .input_stride(i)
5995         .output_stride(4)
5996         .block_width(i)
5997         .block_height(4)
5998         .element_size(4)
5999         .iterations(1)
6000         .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6001     }
6002   }
6003 
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_4_bw_2)6004   TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_4_bw_2) {
6005     TEST_REQUIRES_ARM_NEON;
6006     TransposeMicrokernelTester()
6007       .input_stride(2)
6008       .output_stride(10)
6009       .block_width(2)
6010       .block_height(4)
6011       .element_size(4)
6012       .iterations(1)
6013       .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6014   }
6015 
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_3_4_bw_2)6016   TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_3_4_bw_2){
6017     TEST_REQUIRES_ARM_NEON;
6018     for(size_t i = 3; i < 4; ++i){
6019       TransposeMicrokernelTester()
6020         .input_stride(19)
6021         .output_stride(i)
6022         .block_width(5)
6023         .block_height(i)
6024         .element_size(4)
6025         .iterations(1)
6026         .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6027     }
6028   }
6029 
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_3_4_bw_4)6030   TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_3_4_bw_4){
6031     TEST_REQUIRES_ARM_NEON;
6032     for(size_t i = 3; i < 4; ++i){
6033       TransposeMicrokernelTester()
6034         .input_stride(4)
6035         .output_stride(i)
6036         .block_width(4)
6037         .block_height(i)
6038         .element_size(4)
6039         .iterations(1)
6040         .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6041     }
6042   }
6043 
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_3_4_bw_3_4)6044   TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_3_4_bw_3_4) {
6045     TEST_REQUIRES_ARM_NEON;
6046     for(size_t i = 3; i < 4; ++i){
6047       for(size_t j = 3; j < 4; ++j){
6048         TransposeMicrokernelTester()
6049           .input_stride(j)
6050           .output_stride(i)
6051           .block_width(j)
6052           .block_height(i)
6053           .element_size(4)
6054           .iterations(1)
6055           .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6056       }
6057     }
6058   }
6059 
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_2_bw_2_is_4)6060   TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_2_bw_2_is_4) {
6061     TEST_REQUIRES_ARM_NEON;
6062     TransposeMicrokernelTester()
6063       .input_stride(4)
6064       .output_stride(2)
6065       .block_width(2)
6066       .block_height(2)
6067       .element_size(4)
6068       .iterations(1)
6069       .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6070   }
6071 
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_2_bw_2_os_4)6072   TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_2_bw_2_os_4) {
6073     TEST_REQUIRES_ARM_NEON;
6074     TransposeMicrokernelTester()
6075       .input_stride(2)
6076       .output_stride(4)
6077       .block_width(2)
6078       .block_height(2)
6079       .element_size(4)
6080       .iterations(1)
6081       .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6082   }
6083 
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_2_bw_2_is_4_os_4)6084   TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_2_bw_2_is_4_os_4) {
6085     TEST_REQUIRES_ARM_NEON;
6086     TransposeMicrokernelTester()
6087       .input_stride(4)
6088       .output_stride(4)
6089       .block_width(2)
6090       .block_height(2)
6091       .element_size(4)
6092       .iterations(1)
6093       .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6094   }
6095 
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_34_bw_38_ies_15)6096   TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_34_bw_38_ies_15) {
6097     TEST_REQUIRES_ARM_NEON;
6098     TransposeMicrokernelTester()
6099       .input_stride(38)
6100       .output_stride(34)
6101       .block_width(38)
6102       .block_height(34)
6103       .element_size(4)
6104       .input_element_stride(15)
6105       .iterations(1)
6106       .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6107   }
6108 
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_6_bw_10_oes_15)6109   TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_6_bw_10_oes_15) {
6110     TEST_REQUIRES_ARM_NEON;
6111     TransposeMicrokernelTester()
6112       .input_stride(10)
6113       .output_stride(6)
6114       .block_width(10)
6115       .block_height(6)
6116       .element_size(4)
6117       .output_element_stride(15)
6118       .iterations(1)
6119       .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6120   }
6121 
TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4,bh_14_bw_46_ies_21_oes_17)6122   TEST(X32_TRANSPOSEC__2X2_MULTI_MOV_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
6123     TEST_REQUIRES_ARM_NEON;
6124     TransposeMicrokernelTester()
6125       .input_stride(51)
6126       .output_stride(20)
6127       .block_width(46)
6128       .block_height(14)
6129       .element_size(4)
6130       .input_element_stride(21)
6131       .output_element_stride(17)
6132       .iterations(1)
6133       .Test(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon);
6134   }
6135 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
6136 
6137 
6138 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_2_bw_2)6139   TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_2_bw_2) {
6140     TEST_REQUIRES_ARM_NEON;
6141     TransposeMicrokernelTester()
6142       .input_stride(4)
6143       .output_stride(4)
6144       .block_width(2)
6145       .block_height(2)
6146       .element_size(4)
6147       .iterations(1)
6148       .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6149   }
6150 
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_1_4_bw_1_4)6151   TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_1_4_bw_1_4) {
6152     TEST_REQUIRES_ARM_NEON;
6153     for(size_t i = 1; i <= 4; ++i){
6154       for(size_t j = 1; j <= 4; ++j){
6155         TransposeMicrokernelTester()
6156           .input_stride(j * 3)
6157           .output_stride(i * 7)
6158           .block_width(j)
6159           .block_height(i)
6160           .element_size(4)
6161           .iterations(1)
6162           .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6163       }
6164     }
6165   }
6166 
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_2_bw_4)6167   TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_2_bw_4) {
6168     TEST_REQUIRES_ARM_NEON;
6169     TransposeMicrokernelTester()
6170       .input_stride(4)
6171       .output_stride(2)
6172       .block_width(4)
6173       .block_height(2)
6174       .element_size(4)
6175       .iterations(1)
6176       .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6177   }
6178 
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_2_bw_3_4)6179   TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_2_bw_3_4) {
6180     TEST_REQUIRES_ARM_NEON;
6181     for(size_t i = 3; i < 4; ++i){
6182       TransposeMicrokernelTester()
6183         .input_stride(i)
6184         .output_stride(4)
6185         .block_width(i)
6186         .block_height(2)
6187         .element_size(4)
6188         .iterations(1)
6189         .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6190     }
6191   }
6192 
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_4_bw_3_4)6193   TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_4_bw_3_4) {
6194     TEST_REQUIRES_ARM_NEON;
6195     for(size_t i = 3; i < 4; ++i){
6196       TransposeMicrokernelTester()
6197         .input_stride(i)
6198         .output_stride(4)
6199         .block_width(i)
6200         .block_height(4)
6201         .element_size(4)
6202         .iterations(1)
6203         .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6204     }
6205   }
6206 
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_4_bw_2)6207   TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_4_bw_2) {
6208     TEST_REQUIRES_ARM_NEON;
6209     TransposeMicrokernelTester()
6210       .input_stride(2)
6211       .output_stride(10)
6212       .block_width(2)
6213       .block_height(4)
6214       .element_size(4)
6215       .iterations(1)
6216       .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6217   }
6218 
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_3_4_bw_2)6219   TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_3_4_bw_2){
6220     TEST_REQUIRES_ARM_NEON;
6221     for(size_t i = 3; i < 4; ++i){
6222       TransposeMicrokernelTester()
6223         .input_stride(19)
6224         .output_stride(i)
6225         .block_width(5)
6226         .block_height(i)
6227         .element_size(4)
6228         .iterations(1)
6229         .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6230     }
6231   }
6232 
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_3_4_bw_4)6233   TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_3_4_bw_4){
6234     TEST_REQUIRES_ARM_NEON;
6235     for(size_t i = 3; i < 4; ++i){
6236       TransposeMicrokernelTester()
6237         .input_stride(4)
6238         .output_stride(i)
6239         .block_width(4)
6240         .block_height(i)
6241         .element_size(4)
6242         .iterations(1)
6243         .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6244     }
6245   }
6246 
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_3_4_bw_3_4)6247   TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_3_4_bw_3_4) {
6248     TEST_REQUIRES_ARM_NEON;
6249     for(size_t i = 3; i < 4; ++i){
6250       for(size_t j = 3; j < 4; ++j){
6251         TransposeMicrokernelTester()
6252           .input_stride(j)
6253           .output_stride(i)
6254           .block_width(j)
6255           .block_height(i)
6256           .element_size(4)
6257           .iterations(1)
6258           .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6259       }
6260     }
6261   }
6262 
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_2_bw_2_is_4)6263   TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_2_bw_2_is_4) {
6264     TEST_REQUIRES_ARM_NEON;
6265     TransposeMicrokernelTester()
6266       .input_stride(4)
6267       .output_stride(2)
6268       .block_width(2)
6269       .block_height(2)
6270       .element_size(4)
6271       .iterations(1)
6272       .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6273   }
6274 
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_2_bw_2_os_4)6275   TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_2_bw_2_os_4) {
6276     TEST_REQUIRES_ARM_NEON;
6277     TransposeMicrokernelTester()
6278       .input_stride(2)
6279       .output_stride(4)
6280       .block_width(2)
6281       .block_height(2)
6282       .element_size(4)
6283       .iterations(1)
6284       .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6285   }
6286 
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_2_bw_2_is_4_os_4)6287   TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_2_bw_2_is_4_os_4) {
6288     TEST_REQUIRES_ARM_NEON;
6289     TransposeMicrokernelTester()
6290       .input_stride(4)
6291       .output_stride(4)
6292       .block_width(2)
6293       .block_height(2)
6294       .element_size(4)
6295       .iterations(1)
6296       .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6297   }
6298 
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_34_bw_38_ies_15)6299   TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_34_bw_38_ies_15) {
6300     TEST_REQUIRES_ARM_NEON;
6301     TransposeMicrokernelTester()
6302       .input_stride(38)
6303       .output_stride(34)
6304       .block_width(38)
6305       .block_height(34)
6306       .element_size(4)
6307       .input_element_stride(15)
6308       .iterations(1)
6309       .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6310   }
6311 
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_6_bw_10_oes_15)6312   TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_6_bw_10_oes_15) {
6313     TEST_REQUIRES_ARM_NEON;
6314     TransposeMicrokernelTester()
6315       .input_stride(10)
6316       .output_stride(6)
6317       .block_width(10)
6318       .block_height(6)
6319       .element_size(4)
6320       .output_element_stride(15)
6321       .iterations(1)
6322       .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6323   }
6324 
TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4,bh_14_bw_46_ies_21_oes_17)6325   TEST(X32_TRANSPOSEC__2X2_MULTI_MULTI_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
6326     TEST_REQUIRES_ARM_NEON;
6327     TransposeMicrokernelTester()
6328       .input_stride(51)
6329       .output_stride(20)
6330       .block_width(46)
6331       .block_height(14)
6332       .element_size(4)
6333       .input_element_stride(21)
6334       .output_element_stride(17)
6335       .iterations(1)
6336       .Test(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon);
6337   }
6338 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
6339 
6340 
6341 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_2_bw_2)6342   TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_2_bw_2) {
6343     TEST_REQUIRES_ARM_NEON;
6344     TransposeMicrokernelTester()
6345       .input_stride(4)
6346       .output_stride(4)
6347       .block_width(2)
6348       .block_height(2)
6349       .element_size(4)
6350       .iterations(1)
6351       .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6352   }
6353 
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_1_4_bw_1_4)6354   TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_1_4_bw_1_4) {
6355     TEST_REQUIRES_ARM_NEON;
6356     for(size_t i = 1; i <= 4; ++i){
6357       for(size_t j = 1; j <= 4; ++j){
6358         TransposeMicrokernelTester()
6359           .input_stride(j * 3)
6360           .output_stride(i * 7)
6361           .block_width(j)
6362           .block_height(i)
6363           .element_size(4)
6364           .iterations(1)
6365           .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6366       }
6367     }
6368   }
6369 
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_2_bw_4)6370   TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_2_bw_4) {
6371     TEST_REQUIRES_ARM_NEON;
6372     TransposeMicrokernelTester()
6373       .input_stride(4)
6374       .output_stride(2)
6375       .block_width(4)
6376       .block_height(2)
6377       .element_size(4)
6378       .iterations(1)
6379       .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6380   }
6381 
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_2_bw_3_4)6382   TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_2_bw_3_4) {
6383     TEST_REQUIRES_ARM_NEON;
6384     for(size_t i = 3; i < 4; ++i){
6385       TransposeMicrokernelTester()
6386         .input_stride(i)
6387         .output_stride(4)
6388         .block_width(i)
6389         .block_height(2)
6390         .element_size(4)
6391         .iterations(1)
6392         .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6393     }
6394   }
6395 
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_4_bw_3_4)6396   TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_4_bw_3_4) {
6397     TEST_REQUIRES_ARM_NEON;
6398     for(size_t i = 3; i < 4; ++i){
6399       TransposeMicrokernelTester()
6400         .input_stride(i)
6401         .output_stride(4)
6402         .block_width(i)
6403         .block_height(4)
6404         .element_size(4)
6405         .iterations(1)
6406         .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6407     }
6408   }
6409 
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_4_bw_2)6410   TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_4_bw_2) {
6411     TEST_REQUIRES_ARM_NEON;
6412     TransposeMicrokernelTester()
6413       .input_stride(2)
6414       .output_stride(10)
6415       .block_width(2)
6416       .block_height(4)
6417       .element_size(4)
6418       .iterations(1)
6419       .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6420   }
6421 
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_3_4_bw_2)6422   TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_3_4_bw_2){
6423     TEST_REQUIRES_ARM_NEON;
6424     for(size_t i = 3; i < 4; ++i){
6425       TransposeMicrokernelTester()
6426         .input_stride(19)
6427         .output_stride(i)
6428         .block_width(5)
6429         .block_height(i)
6430         .element_size(4)
6431         .iterations(1)
6432         .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6433     }
6434   }
6435 
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_3_4_bw_4)6436   TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_3_4_bw_4){
6437     TEST_REQUIRES_ARM_NEON;
6438     for(size_t i = 3; i < 4; ++i){
6439       TransposeMicrokernelTester()
6440         .input_stride(4)
6441         .output_stride(i)
6442         .block_width(4)
6443         .block_height(i)
6444         .element_size(4)
6445         .iterations(1)
6446         .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6447     }
6448   }
6449 
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_3_4_bw_3_4)6450   TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_3_4_bw_3_4) {
6451     TEST_REQUIRES_ARM_NEON;
6452     for(size_t i = 3; i < 4; ++i){
6453       for(size_t j = 3; j < 4; ++j){
6454         TransposeMicrokernelTester()
6455           .input_stride(j)
6456           .output_stride(i)
6457           .block_width(j)
6458           .block_height(i)
6459           .element_size(4)
6460           .iterations(1)
6461           .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6462       }
6463     }
6464   }
6465 
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_2_bw_2_is_4)6466   TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_2_bw_2_is_4) {
6467     TEST_REQUIRES_ARM_NEON;
6468     TransposeMicrokernelTester()
6469       .input_stride(4)
6470       .output_stride(2)
6471       .block_width(2)
6472       .block_height(2)
6473       .element_size(4)
6474       .iterations(1)
6475       .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6476   }
6477 
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_2_bw_2_os_4)6478   TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_2_bw_2_os_4) {
6479     TEST_REQUIRES_ARM_NEON;
6480     TransposeMicrokernelTester()
6481       .input_stride(2)
6482       .output_stride(4)
6483       .block_width(2)
6484       .block_height(2)
6485       .element_size(4)
6486       .iterations(1)
6487       .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6488   }
6489 
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_2_bw_2_is_4_os_4)6490   TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_2_bw_2_is_4_os_4) {
6491     TEST_REQUIRES_ARM_NEON;
6492     TransposeMicrokernelTester()
6493       .input_stride(4)
6494       .output_stride(4)
6495       .block_width(2)
6496       .block_height(2)
6497       .element_size(4)
6498       .iterations(1)
6499       .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6500   }
6501 
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_34_bw_38_ies_15)6502   TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_34_bw_38_ies_15) {
6503     TEST_REQUIRES_ARM_NEON;
6504     TransposeMicrokernelTester()
6505       .input_stride(38)
6506       .output_stride(34)
6507       .block_width(38)
6508       .block_height(34)
6509       .element_size(4)
6510       .input_element_stride(15)
6511       .iterations(1)
6512       .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6513   }
6514 
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_6_bw_10_oes_15)6515   TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_6_bw_10_oes_15) {
6516     TEST_REQUIRES_ARM_NEON;
6517     TransposeMicrokernelTester()
6518       .input_stride(10)
6519       .output_stride(6)
6520       .block_width(10)
6521       .block_height(6)
6522       .element_size(4)
6523       .output_element_stride(15)
6524       .iterations(1)
6525       .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6526   }
6527 
TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4,bh_14_bw_46_ies_21_oes_17)6528   TEST(X32_TRANSPOSEC__2X2_MULTI_SWITCH_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
6529     TEST_REQUIRES_ARM_NEON;
6530     TransposeMicrokernelTester()
6531       .input_stride(51)
6532       .output_stride(20)
6533       .block_width(46)
6534       .block_height(14)
6535       .element_size(4)
6536       .input_element_stride(21)
6537       .output_element_stride(17)
6538       .iterations(1)
6539       .Test(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon);
6540   }
6541 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
6542 
6543 
6544 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_2_bw_2)6545   TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_2_bw_2) {
6546     TEST_REQUIRES_ARM_NEON;
6547     TransposeMicrokernelTester()
6548       .input_stride(4)
6549       .output_stride(4)
6550       .block_width(2)
6551       .block_height(2)
6552       .element_size(4)
6553       .iterations(1)
6554       .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6555   }
6556 
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_1_4_bw_1_4)6557   TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_1_4_bw_1_4) {
6558     TEST_REQUIRES_ARM_NEON;
6559     for(size_t i = 1; i <= 4; ++i){
6560       for(size_t j = 1; j <= 4; ++j){
6561         TransposeMicrokernelTester()
6562           .input_stride(j * 3)
6563           .output_stride(i * 7)
6564           .block_width(j)
6565           .block_height(i)
6566           .element_size(4)
6567           .iterations(1)
6568           .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6569       }
6570     }
6571   }
6572 
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_2_bw_4)6573   TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_2_bw_4) {
6574     TEST_REQUIRES_ARM_NEON;
6575     TransposeMicrokernelTester()
6576       .input_stride(4)
6577       .output_stride(2)
6578       .block_width(4)
6579       .block_height(2)
6580       .element_size(4)
6581       .iterations(1)
6582       .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6583   }
6584 
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_2_bw_3_4)6585   TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_2_bw_3_4) {
6586     TEST_REQUIRES_ARM_NEON;
6587     for(size_t i = 3; i < 4; ++i){
6588       TransposeMicrokernelTester()
6589         .input_stride(i)
6590         .output_stride(4)
6591         .block_width(i)
6592         .block_height(2)
6593         .element_size(4)
6594         .iterations(1)
6595         .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6596     }
6597   }
6598 
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_4_bw_3_4)6599   TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_4_bw_3_4) {
6600     TEST_REQUIRES_ARM_NEON;
6601     for(size_t i = 3; i < 4; ++i){
6602       TransposeMicrokernelTester()
6603         .input_stride(i)
6604         .output_stride(4)
6605         .block_width(i)
6606         .block_height(4)
6607         .element_size(4)
6608         .iterations(1)
6609         .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6610     }
6611   }
6612 
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_4_bw_2)6613   TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_4_bw_2) {
6614     TEST_REQUIRES_ARM_NEON;
6615     TransposeMicrokernelTester()
6616       .input_stride(2)
6617       .output_stride(10)
6618       .block_width(2)
6619       .block_height(4)
6620       .element_size(4)
6621       .iterations(1)
6622       .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6623   }
6624 
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_3_4_bw_2)6625   TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_3_4_bw_2){
6626     TEST_REQUIRES_ARM_NEON;
6627     for(size_t i = 3; i < 4; ++i){
6628       TransposeMicrokernelTester()
6629         .input_stride(19)
6630         .output_stride(i)
6631         .block_width(5)
6632         .block_height(i)
6633         .element_size(4)
6634         .iterations(1)
6635         .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6636     }
6637   }
6638 
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_3_4_bw_4)6639   TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_3_4_bw_4){
6640     TEST_REQUIRES_ARM_NEON;
6641     for(size_t i = 3; i < 4; ++i){
6642       TransposeMicrokernelTester()
6643         .input_stride(4)
6644         .output_stride(i)
6645         .block_width(4)
6646         .block_height(i)
6647         .element_size(4)
6648         .iterations(1)
6649         .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6650     }
6651   }
6652 
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_3_4_bw_3_4)6653   TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_3_4_bw_3_4) {
6654     TEST_REQUIRES_ARM_NEON;
6655     for(size_t i = 3; i < 4; ++i){
6656       for(size_t j = 3; j < 4; ++j){
6657         TransposeMicrokernelTester()
6658           .input_stride(j)
6659           .output_stride(i)
6660           .block_width(j)
6661           .block_height(i)
6662           .element_size(4)
6663           .iterations(1)
6664           .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6665       }
6666     }
6667   }
6668 
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_2_bw_2_is_4)6669   TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_2_bw_2_is_4) {
6670     TEST_REQUIRES_ARM_NEON;
6671     TransposeMicrokernelTester()
6672       .input_stride(4)
6673       .output_stride(2)
6674       .block_width(2)
6675       .block_height(2)
6676       .element_size(4)
6677       .iterations(1)
6678       .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6679   }
6680 
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_2_bw_2_os_4)6681   TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_2_bw_2_os_4) {
6682     TEST_REQUIRES_ARM_NEON;
6683     TransposeMicrokernelTester()
6684       .input_stride(2)
6685       .output_stride(4)
6686       .block_width(2)
6687       .block_height(2)
6688       .element_size(4)
6689       .iterations(1)
6690       .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6691   }
6692 
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_2_bw_2_is_4_os_4)6693   TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_2_bw_2_is_4_os_4) {
6694     TEST_REQUIRES_ARM_NEON;
6695     TransposeMicrokernelTester()
6696       .input_stride(4)
6697       .output_stride(4)
6698       .block_width(2)
6699       .block_height(2)
6700       .element_size(4)
6701       .iterations(1)
6702       .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6703   }
6704 
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_34_bw_38_ies_15)6705   TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_34_bw_38_ies_15) {
6706     TEST_REQUIRES_ARM_NEON;
6707     TransposeMicrokernelTester()
6708       .input_stride(38)
6709       .output_stride(34)
6710       .block_width(38)
6711       .block_height(34)
6712       .element_size(4)
6713       .input_element_stride(15)
6714       .iterations(1)
6715       .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6716   }
6717 
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_6_bw_10_oes_15)6718   TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_6_bw_10_oes_15) {
6719     TEST_REQUIRES_ARM_NEON;
6720     TransposeMicrokernelTester()
6721       .input_stride(10)
6722       .output_stride(6)
6723       .block_width(10)
6724       .block_height(6)
6725       .element_size(4)
6726       .output_element_stride(15)
6727       .iterations(1)
6728       .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6729   }
6730 
TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4,bh_14_bw_46_ies_21_oes_17)6731   TEST(X32_TRANSPOSEC__2X2_REUSE_DEC_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
6732     TEST_REQUIRES_ARM_NEON;
6733     TransposeMicrokernelTester()
6734       .input_stride(51)
6735       .output_stride(20)
6736       .block_width(46)
6737       .block_height(14)
6738       .element_size(4)
6739       .input_element_stride(21)
6740       .output_element_stride(17)
6741       .iterations(1)
6742       .Test(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon);
6743   }
6744 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
6745 
6746 
6747 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_2_bw_2)6748   TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_2_bw_2) {
6749     TEST_REQUIRES_ARM_NEON;
6750     TransposeMicrokernelTester()
6751       .input_stride(4)
6752       .output_stride(4)
6753       .block_width(2)
6754       .block_height(2)
6755       .element_size(4)
6756       .iterations(1)
6757       .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6758   }
6759 
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_1_4_bw_1_4)6760   TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_1_4_bw_1_4) {
6761     TEST_REQUIRES_ARM_NEON;
6762     for(size_t i = 1; i <= 4; ++i){
6763       for(size_t j = 1; j <= 4; ++j){
6764         TransposeMicrokernelTester()
6765           .input_stride(j * 3)
6766           .output_stride(i * 7)
6767           .block_width(j)
6768           .block_height(i)
6769           .element_size(4)
6770           .iterations(1)
6771           .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6772       }
6773     }
6774   }
6775 
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_2_bw_4)6776   TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_2_bw_4) {
6777     TEST_REQUIRES_ARM_NEON;
6778     TransposeMicrokernelTester()
6779       .input_stride(4)
6780       .output_stride(2)
6781       .block_width(4)
6782       .block_height(2)
6783       .element_size(4)
6784       .iterations(1)
6785       .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6786   }
6787 
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_2_bw_3_4)6788   TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_2_bw_3_4) {
6789     TEST_REQUIRES_ARM_NEON;
6790     for(size_t i = 3; i < 4; ++i){
6791       TransposeMicrokernelTester()
6792         .input_stride(i)
6793         .output_stride(4)
6794         .block_width(i)
6795         .block_height(2)
6796         .element_size(4)
6797         .iterations(1)
6798         .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6799     }
6800   }
6801 
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_4_bw_3_4)6802   TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_4_bw_3_4) {
6803     TEST_REQUIRES_ARM_NEON;
6804     for(size_t i = 3; i < 4; ++i){
6805       TransposeMicrokernelTester()
6806         .input_stride(i)
6807         .output_stride(4)
6808         .block_width(i)
6809         .block_height(4)
6810         .element_size(4)
6811         .iterations(1)
6812         .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6813     }
6814   }
6815 
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_4_bw_2)6816   TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_4_bw_2) {
6817     TEST_REQUIRES_ARM_NEON;
6818     TransposeMicrokernelTester()
6819       .input_stride(2)
6820       .output_stride(10)
6821       .block_width(2)
6822       .block_height(4)
6823       .element_size(4)
6824       .iterations(1)
6825       .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6826   }
6827 
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_3_4_bw_2)6828   TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_3_4_bw_2){
6829     TEST_REQUIRES_ARM_NEON;
6830     for(size_t i = 3; i < 4; ++i){
6831       TransposeMicrokernelTester()
6832         .input_stride(19)
6833         .output_stride(i)
6834         .block_width(5)
6835         .block_height(i)
6836         .element_size(4)
6837         .iterations(1)
6838         .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6839     }
6840   }
6841 
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_3_4_bw_4)6842   TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_3_4_bw_4){
6843     TEST_REQUIRES_ARM_NEON;
6844     for(size_t i = 3; i < 4; ++i){
6845       TransposeMicrokernelTester()
6846         .input_stride(4)
6847         .output_stride(i)
6848         .block_width(4)
6849         .block_height(i)
6850         .element_size(4)
6851         .iterations(1)
6852         .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6853     }
6854   }
6855 
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_3_4_bw_3_4)6856   TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_3_4_bw_3_4) {
6857     TEST_REQUIRES_ARM_NEON;
6858     for(size_t i = 3; i < 4; ++i){
6859       for(size_t j = 3; j < 4; ++j){
6860         TransposeMicrokernelTester()
6861           .input_stride(j)
6862           .output_stride(i)
6863           .block_width(j)
6864           .block_height(i)
6865           .element_size(4)
6866           .iterations(1)
6867           .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6868       }
6869     }
6870   }
6871 
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_2_bw_2_is_4)6872   TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_2_bw_2_is_4) {
6873     TEST_REQUIRES_ARM_NEON;
6874     TransposeMicrokernelTester()
6875       .input_stride(4)
6876       .output_stride(2)
6877       .block_width(2)
6878       .block_height(2)
6879       .element_size(4)
6880       .iterations(1)
6881       .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6882   }
6883 
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_2_bw_2_os_4)6884   TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_2_bw_2_os_4) {
6885     TEST_REQUIRES_ARM_NEON;
6886     TransposeMicrokernelTester()
6887       .input_stride(2)
6888       .output_stride(4)
6889       .block_width(2)
6890       .block_height(2)
6891       .element_size(4)
6892       .iterations(1)
6893       .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6894   }
6895 
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_2_bw_2_is_4_os_4)6896   TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_2_bw_2_is_4_os_4) {
6897     TEST_REQUIRES_ARM_NEON;
6898     TransposeMicrokernelTester()
6899       .input_stride(4)
6900       .output_stride(4)
6901       .block_width(2)
6902       .block_height(2)
6903       .element_size(4)
6904       .iterations(1)
6905       .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6906   }
6907 
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_34_bw_38_ies_15)6908   TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_34_bw_38_ies_15) {
6909     TEST_REQUIRES_ARM_NEON;
6910     TransposeMicrokernelTester()
6911       .input_stride(38)
6912       .output_stride(34)
6913       .block_width(38)
6914       .block_height(34)
6915       .element_size(4)
6916       .input_element_stride(15)
6917       .iterations(1)
6918       .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6919   }
6920 
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_6_bw_10_oes_15)6921   TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_6_bw_10_oes_15) {
6922     TEST_REQUIRES_ARM_NEON;
6923     TransposeMicrokernelTester()
6924       .input_stride(10)
6925       .output_stride(6)
6926       .block_width(10)
6927       .block_height(6)
6928       .element_size(4)
6929       .output_element_stride(15)
6930       .iterations(1)
6931       .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6932   }
6933 
TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4,bh_14_bw_46_ies_21_oes_17)6934   TEST(X32_TRANSPOSEC__2X2_REUSE_MOV_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
6935     TEST_REQUIRES_ARM_NEON;
6936     TransposeMicrokernelTester()
6937       .input_stride(51)
6938       .output_stride(20)
6939       .block_width(46)
6940       .block_height(14)
6941       .element_size(4)
6942       .input_element_stride(21)
6943       .output_element_stride(17)
6944       .iterations(1)
6945       .Test(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon);
6946   }
6947 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
6948 
6949 
6950 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_2_bw_2)6951   TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_2_bw_2) {
6952     TEST_REQUIRES_ARM_NEON;
6953     TransposeMicrokernelTester()
6954       .input_stride(4)
6955       .output_stride(4)
6956       .block_width(2)
6957       .block_height(2)
6958       .element_size(4)
6959       .iterations(1)
6960       .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
6961   }
6962 
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_1_4_bw_1_4)6963   TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_1_4_bw_1_4) {
6964     TEST_REQUIRES_ARM_NEON;
6965     for(size_t i = 1; i <= 4; ++i){
6966       for(size_t j = 1; j <= 4; ++j){
6967         TransposeMicrokernelTester()
6968           .input_stride(j * 3)
6969           .output_stride(i * 7)
6970           .block_width(j)
6971           .block_height(i)
6972           .element_size(4)
6973           .iterations(1)
6974           .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
6975       }
6976     }
6977   }
6978 
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_2_bw_4)6979   TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_2_bw_4) {
6980     TEST_REQUIRES_ARM_NEON;
6981     TransposeMicrokernelTester()
6982       .input_stride(4)
6983       .output_stride(2)
6984       .block_width(4)
6985       .block_height(2)
6986       .element_size(4)
6987       .iterations(1)
6988       .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
6989   }
6990 
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_2_bw_3_4)6991   TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_2_bw_3_4) {
6992     TEST_REQUIRES_ARM_NEON;
6993     for(size_t i = 3; i < 4; ++i){
6994       TransposeMicrokernelTester()
6995         .input_stride(i)
6996         .output_stride(4)
6997         .block_width(i)
6998         .block_height(2)
6999         .element_size(4)
7000         .iterations(1)
7001         .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7002     }
7003   }
7004 
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_4_bw_3_4)7005   TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_4_bw_3_4) {
7006     TEST_REQUIRES_ARM_NEON;
7007     for(size_t i = 3; i < 4; ++i){
7008       TransposeMicrokernelTester()
7009         .input_stride(i)
7010         .output_stride(4)
7011         .block_width(i)
7012         .block_height(4)
7013         .element_size(4)
7014         .iterations(1)
7015         .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7016     }
7017   }
7018 
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_4_bw_2)7019   TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_4_bw_2) {
7020     TEST_REQUIRES_ARM_NEON;
7021     TransposeMicrokernelTester()
7022       .input_stride(2)
7023       .output_stride(10)
7024       .block_width(2)
7025       .block_height(4)
7026       .element_size(4)
7027       .iterations(1)
7028       .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7029   }
7030 
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_3_4_bw_2)7031   TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_3_4_bw_2){
7032     TEST_REQUIRES_ARM_NEON;
7033     for(size_t i = 3; i < 4; ++i){
7034       TransposeMicrokernelTester()
7035         .input_stride(19)
7036         .output_stride(i)
7037         .block_width(5)
7038         .block_height(i)
7039         .element_size(4)
7040         .iterations(1)
7041         .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7042     }
7043   }
7044 
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_3_4_bw_4)7045   TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_3_4_bw_4){
7046     TEST_REQUIRES_ARM_NEON;
7047     for(size_t i = 3; i < 4; ++i){
7048       TransposeMicrokernelTester()
7049         .input_stride(4)
7050         .output_stride(i)
7051         .block_width(4)
7052         .block_height(i)
7053         .element_size(4)
7054         .iterations(1)
7055         .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7056     }
7057   }
7058 
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_3_4_bw_3_4)7059   TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_3_4_bw_3_4) {
7060     TEST_REQUIRES_ARM_NEON;
7061     for(size_t i = 3; i < 4; ++i){
7062       for(size_t j = 3; j < 4; ++j){
7063         TransposeMicrokernelTester()
7064           .input_stride(j)
7065           .output_stride(i)
7066           .block_width(j)
7067           .block_height(i)
7068           .element_size(4)
7069           .iterations(1)
7070           .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7071       }
7072     }
7073   }
7074 
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_2_bw_2_is_4)7075   TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_2_bw_2_is_4) {
7076     TEST_REQUIRES_ARM_NEON;
7077     TransposeMicrokernelTester()
7078       .input_stride(4)
7079       .output_stride(2)
7080       .block_width(2)
7081       .block_height(2)
7082       .element_size(4)
7083       .iterations(1)
7084       .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7085   }
7086 
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_2_bw_2_os_4)7087   TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_2_bw_2_os_4) {
7088     TEST_REQUIRES_ARM_NEON;
7089     TransposeMicrokernelTester()
7090       .input_stride(2)
7091       .output_stride(4)
7092       .block_width(2)
7093       .block_height(2)
7094       .element_size(4)
7095       .iterations(1)
7096       .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7097   }
7098 
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_2_bw_2_is_4_os_4)7099   TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_2_bw_2_is_4_os_4) {
7100     TEST_REQUIRES_ARM_NEON;
7101     TransposeMicrokernelTester()
7102       .input_stride(4)
7103       .output_stride(4)
7104       .block_width(2)
7105       .block_height(2)
7106       .element_size(4)
7107       .iterations(1)
7108       .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7109   }
7110 
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_34_bw_38_ies_15)7111   TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_34_bw_38_ies_15) {
7112     TEST_REQUIRES_ARM_NEON;
7113     TransposeMicrokernelTester()
7114       .input_stride(38)
7115       .output_stride(34)
7116       .block_width(38)
7117       .block_height(34)
7118       .element_size(4)
7119       .input_element_stride(15)
7120       .iterations(1)
7121       .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7122   }
7123 
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_6_bw_10_oes_15)7124   TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_6_bw_10_oes_15) {
7125     TEST_REQUIRES_ARM_NEON;
7126     TransposeMicrokernelTester()
7127       .input_stride(10)
7128       .output_stride(6)
7129       .block_width(10)
7130       .block_height(6)
7131       .element_size(4)
7132       .output_element_stride(15)
7133       .iterations(1)
7134       .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7135   }
7136 
TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4,bh_14_bw_46_ies_21_oes_17)7137   TEST(X32_TRANSPOSEC__2X2_REUSE_MULTI_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
7138     TEST_REQUIRES_ARM_NEON;
7139     TransposeMicrokernelTester()
7140       .input_stride(51)
7141       .output_stride(20)
7142       .block_width(46)
7143       .block_height(14)
7144       .element_size(4)
7145       .input_element_stride(21)
7146       .output_element_stride(17)
7147       .iterations(1)
7148       .Test(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon);
7149   }
7150 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7151 
7152 
7153 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_2_bw_2)7154   TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_2_bw_2) {
7155     TEST_REQUIRES_ARM_NEON;
7156     TransposeMicrokernelTester()
7157       .input_stride(4)
7158       .output_stride(4)
7159       .block_width(2)
7160       .block_height(2)
7161       .element_size(4)
7162       .iterations(1)
7163       .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7164   }
7165 
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_1_4_bw_1_4)7166   TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_1_4_bw_1_4) {
7167     TEST_REQUIRES_ARM_NEON;
7168     for(size_t i = 1; i <= 4; ++i){
7169       for(size_t j = 1; j <= 4; ++j){
7170         TransposeMicrokernelTester()
7171           .input_stride(j * 3)
7172           .output_stride(i * 7)
7173           .block_width(j)
7174           .block_height(i)
7175           .element_size(4)
7176           .iterations(1)
7177           .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7178       }
7179     }
7180   }
7181 
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_2_bw_4)7182   TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_2_bw_4) {
7183     TEST_REQUIRES_ARM_NEON;
7184     TransposeMicrokernelTester()
7185       .input_stride(4)
7186       .output_stride(2)
7187       .block_width(4)
7188       .block_height(2)
7189       .element_size(4)
7190       .iterations(1)
7191       .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7192   }
7193 
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_2_bw_3_4)7194   TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_2_bw_3_4) {
7195     TEST_REQUIRES_ARM_NEON;
7196     for(size_t i = 3; i < 4; ++i){
7197       TransposeMicrokernelTester()
7198         .input_stride(i)
7199         .output_stride(4)
7200         .block_width(i)
7201         .block_height(2)
7202         .element_size(4)
7203         .iterations(1)
7204         .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7205     }
7206   }
7207 
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_4_bw_3_4)7208   TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_4_bw_3_4) {
7209     TEST_REQUIRES_ARM_NEON;
7210     for(size_t i = 3; i < 4; ++i){
7211       TransposeMicrokernelTester()
7212         .input_stride(i)
7213         .output_stride(4)
7214         .block_width(i)
7215         .block_height(4)
7216         .element_size(4)
7217         .iterations(1)
7218         .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7219     }
7220   }
7221 
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_4_bw_2)7222   TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_4_bw_2) {
7223     TEST_REQUIRES_ARM_NEON;
7224     TransposeMicrokernelTester()
7225       .input_stride(2)
7226       .output_stride(10)
7227       .block_width(2)
7228       .block_height(4)
7229       .element_size(4)
7230       .iterations(1)
7231       .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7232   }
7233 
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_3_4_bw_2)7234   TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_3_4_bw_2){
7235     TEST_REQUIRES_ARM_NEON;
7236     for(size_t i = 3; i < 4; ++i){
7237       TransposeMicrokernelTester()
7238         .input_stride(19)
7239         .output_stride(i)
7240         .block_width(5)
7241         .block_height(i)
7242         .element_size(4)
7243         .iterations(1)
7244         .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7245     }
7246   }
7247 
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_3_4_bw_4)7248   TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_3_4_bw_4){
7249     TEST_REQUIRES_ARM_NEON;
7250     for(size_t i = 3; i < 4; ++i){
7251       TransposeMicrokernelTester()
7252         .input_stride(4)
7253         .output_stride(i)
7254         .block_width(4)
7255         .block_height(i)
7256         .element_size(4)
7257         .iterations(1)
7258         .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7259     }
7260   }
7261 
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_3_4_bw_3_4)7262   TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_3_4_bw_3_4) {
7263     TEST_REQUIRES_ARM_NEON;
7264     for(size_t i = 3; i < 4; ++i){
7265       for(size_t j = 3; j < 4; ++j){
7266         TransposeMicrokernelTester()
7267           .input_stride(j)
7268           .output_stride(i)
7269           .block_width(j)
7270           .block_height(i)
7271           .element_size(4)
7272           .iterations(1)
7273           .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7274       }
7275     }
7276   }
7277 
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_2_bw_2_is_4)7278   TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_2_bw_2_is_4) {
7279     TEST_REQUIRES_ARM_NEON;
7280     TransposeMicrokernelTester()
7281       .input_stride(4)
7282       .output_stride(2)
7283       .block_width(2)
7284       .block_height(2)
7285       .element_size(4)
7286       .iterations(1)
7287       .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7288   }
7289 
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_2_bw_2_os_4)7290   TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_2_bw_2_os_4) {
7291     TEST_REQUIRES_ARM_NEON;
7292     TransposeMicrokernelTester()
7293       .input_stride(2)
7294       .output_stride(4)
7295       .block_width(2)
7296       .block_height(2)
7297       .element_size(4)
7298       .iterations(1)
7299       .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7300   }
7301 
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_2_bw_2_is_4_os_4)7302   TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_2_bw_2_is_4_os_4) {
7303     TEST_REQUIRES_ARM_NEON;
7304     TransposeMicrokernelTester()
7305       .input_stride(4)
7306       .output_stride(4)
7307       .block_width(2)
7308       .block_height(2)
7309       .element_size(4)
7310       .iterations(1)
7311       .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7312   }
7313 
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_34_bw_38_ies_15)7314   TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_34_bw_38_ies_15) {
7315     TEST_REQUIRES_ARM_NEON;
7316     TransposeMicrokernelTester()
7317       .input_stride(38)
7318       .output_stride(34)
7319       .block_width(38)
7320       .block_height(34)
7321       .element_size(4)
7322       .input_element_stride(15)
7323       .iterations(1)
7324       .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7325   }
7326 
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_6_bw_10_oes_15)7327   TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_6_bw_10_oes_15) {
7328     TEST_REQUIRES_ARM_NEON;
7329     TransposeMicrokernelTester()
7330       .input_stride(10)
7331       .output_stride(6)
7332       .block_width(10)
7333       .block_height(6)
7334       .element_size(4)
7335       .output_element_stride(15)
7336       .iterations(1)
7337       .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7338   }
7339 
TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4,bh_14_bw_46_ies_21_oes_17)7340   TEST(X32_TRANSPOSEC__2X2_REUSE_SWITCH_ZIP_NEON_4, bh_14_bw_46_ies_21_oes_17) {
7341     TEST_REQUIRES_ARM_NEON;
7342     TransposeMicrokernelTester()
7343       .input_stride(51)
7344       .output_stride(20)
7345       .block_width(46)
7346       .block_height(14)
7347       .element_size(4)
7348       .input_element_stride(21)
7349       .output_element_stride(17)
7350       .iterations(1)
7351       .Test(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon);
7352   }
7353 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7354 
7355 
7356 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_4_bw_4)7357   TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_4_bw_4) {
7358     TEST_REQUIRES_ARM_NEON;
7359     TransposeMicrokernelTester()
7360       .input_stride(8)
7361       .output_stride(8)
7362       .block_width(4)
7363       .block_height(4)
7364       .element_size(4)
7365       .iterations(1)
7366       .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7367   }
7368 
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_1_8_bw_1_8)7369   TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_1_8_bw_1_8) {
7370     TEST_REQUIRES_ARM_NEON;
7371     for(size_t i = 1; i <= 8; ++i){
7372       for(size_t j = 1; j <= 8; ++j){
7373         TransposeMicrokernelTester()
7374           .input_stride(j * 3)
7375           .output_stride(i * 7)
7376           .block_width(j)
7377           .block_height(i)
7378           .element_size(4)
7379           .iterations(1)
7380           .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7381       }
7382     }
7383   }
7384 
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_4_bw_8)7385   TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_4_bw_8) {
7386     TEST_REQUIRES_ARM_NEON;
7387     TransposeMicrokernelTester()
7388       .input_stride(8)
7389       .output_stride(4)
7390       .block_width(8)
7391       .block_height(4)
7392       .element_size(4)
7393       .iterations(1)
7394       .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7395   }
7396 
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_4_bw_5_8)7397   TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_4_bw_5_8) {
7398     TEST_REQUIRES_ARM_NEON;
7399     for(size_t i = 5; i < 8; ++i){
7400       TransposeMicrokernelTester()
7401         .input_stride(i)
7402         .output_stride(8)
7403         .block_width(i)
7404         .block_height(4)
7405         .element_size(4)
7406         .iterations(1)
7407         .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7408     }
7409   }
7410 
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_8_bw_5_8)7411   TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_8_bw_5_8) {
7412     TEST_REQUIRES_ARM_NEON;
7413     for(size_t i = 5; i < 8; ++i){
7414       TransposeMicrokernelTester()
7415         .input_stride(i)
7416         .output_stride(8)
7417         .block_width(i)
7418         .block_height(8)
7419         .element_size(4)
7420         .iterations(1)
7421         .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7422     }
7423   }
7424 
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_8_bw_4)7425   TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_8_bw_4) {
7426     TEST_REQUIRES_ARM_NEON;
7427     TransposeMicrokernelTester()
7428       .input_stride(4)
7429       .output_stride(16)
7430       .block_width(4)
7431       .block_height(8)
7432       .element_size(4)
7433       .iterations(1)
7434       .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7435   }
7436 
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_5_8_bw_4)7437   TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_5_8_bw_4){
7438     TEST_REQUIRES_ARM_NEON;
7439     for(size_t i = 5; i < 8; ++i){
7440       TransposeMicrokernelTester()
7441         .input_stride(21)
7442         .output_stride(i)
7443         .block_width(7)
7444         .block_height(i)
7445         .element_size(4)
7446         .iterations(1)
7447         .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7448     }
7449   }
7450 
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_5_8_bw_8)7451   TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_5_8_bw_8){
7452     TEST_REQUIRES_ARM_NEON;
7453     for(size_t i = 5; i < 8; ++i){
7454       TransposeMicrokernelTester()
7455         .input_stride(8)
7456         .output_stride(i)
7457         .block_width(8)
7458         .block_height(i)
7459         .element_size(4)
7460         .iterations(1)
7461         .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7462     }
7463   }
7464 
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_5_8_bw_5_8)7465   TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_5_8_bw_5_8) {
7466     TEST_REQUIRES_ARM_NEON;
7467     for(size_t i = 5; i < 8; ++i){
7468       for(size_t j = 5; j < 8; ++j){
7469         TransposeMicrokernelTester()
7470           .input_stride(j)
7471           .output_stride(i)
7472           .block_width(j)
7473           .block_height(i)
7474           .element_size(4)
7475           .iterations(1)
7476           .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7477       }
7478     }
7479   }
7480 
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_4_bw_4_is_8)7481   TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_4_bw_4_is_8) {
7482     TEST_REQUIRES_ARM_NEON;
7483     TransposeMicrokernelTester()
7484       .input_stride(8)
7485       .output_stride(4)
7486       .block_width(4)
7487       .block_height(4)
7488       .element_size(4)
7489       .iterations(1)
7490       .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7491   }
7492 
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_4_bw_4_os_8)7493   TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_4_bw_4_os_8) {
7494     TEST_REQUIRES_ARM_NEON;
7495     TransposeMicrokernelTester()
7496       .input_stride(4)
7497       .output_stride(8)
7498       .block_width(4)
7499       .block_height(4)
7500       .element_size(4)
7501       .iterations(1)
7502       .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7503   }
7504 
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_4_bw_4_is_8_os_8)7505   TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_4_bw_4_is_8_os_8) {
7506     TEST_REQUIRES_ARM_NEON;
7507     TransposeMicrokernelTester()
7508       .input_stride(8)
7509       .output_stride(8)
7510       .block_width(4)
7511       .block_height(4)
7512       .element_size(4)
7513       .iterations(1)
7514       .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7515   }
7516 
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_68_bw_76_ies_15)7517   TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_68_bw_76_ies_15) {
7518     TEST_REQUIRES_ARM_NEON;
7519     TransposeMicrokernelTester()
7520       .input_stride(76)
7521       .output_stride(68)
7522       .block_width(76)
7523       .block_height(68)
7524       .element_size(4)
7525       .input_element_stride(15)
7526       .iterations(1)
7527       .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7528   }
7529 
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_12_bw_20_oes_15)7530   TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_12_bw_20_oes_15) {
7531     TEST_REQUIRES_ARM_NEON;
7532     TransposeMicrokernelTester()
7533       .input_stride(20)
7534       .output_stride(12)
7535       .block_width(20)
7536       .block_height(12)
7537       .element_size(4)
7538       .output_element_stride(15)
7539       .iterations(1)
7540       .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7541   }
7542 
TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4,bh_28_bw_92_ies_21_oes_17)7543   TEST(X32_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
7544     TEST_REQUIRES_ARM_NEON;
7545     TransposeMicrokernelTester()
7546       .input_stride(97)
7547       .output_stride(34)
7548       .block_width(92)
7549       .block_height(28)
7550       .element_size(4)
7551       .input_element_stride(21)
7552       .output_element_stride(17)
7553       .iterations(1)
7554       .Test(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon);
7555   }
7556 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7557 
7558 
7559 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_4_bw_4)7560   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_4_bw_4) {
7561     TEST_REQUIRES_ARM_NEON;
7562     TransposeMicrokernelTester()
7563       .input_stride(8)
7564       .output_stride(8)
7565       .block_width(4)
7566       .block_height(4)
7567       .element_size(4)
7568       .iterations(1)
7569       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7570   }
7571 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_1_8_bw_1_8)7572   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_1_8_bw_1_8) {
7573     TEST_REQUIRES_ARM_NEON;
7574     for(size_t i = 1; i <= 8; ++i){
7575       for(size_t j = 1; j <= 8; ++j){
7576         TransposeMicrokernelTester()
7577           .input_stride(j * 3)
7578           .output_stride(i * 7)
7579           .block_width(j)
7580           .block_height(i)
7581           .element_size(4)
7582           .iterations(1)
7583           .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7584       }
7585     }
7586   }
7587 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_4_bw_8)7588   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_4_bw_8) {
7589     TEST_REQUIRES_ARM_NEON;
7590     TransposeMicrokernelTester()
7591       .input_stride(8)
7592       .output_stride(4)
7593       .block_width(8)
7594       .block_height(4)
7595       .element_size(4)
7596       .iterations(1)
7597       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7598   }
7599 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_4_bw_5_8)7600   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_4_bw_5_8) {
7601     TEST_REQUIRES_ARM_NEON;
7602     for(size_t i = 5; i < 8; ++i){
7603       TransposeMicrokernelTester()
7604         .input_stride(i)
7605         .output_stride(8)
7606         .block_width(i)
7607         .block_height(4)
7608         .element_size(4)
7609         .iterations(1)
7610         .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7611     }
7612   }
7613 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_8_bw_5_8)7614   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_8_bw_5_8) {
7615     TEST_REQUIRES_ARM_NEON;
7616     for(size_t i = 5; i < 8; ++i){
7617       TransposeMicrokernelTester()
7618         .input_stride(i)
7619         .output_stride(8)
7620         .block_width(i)
7621         .block_height(8)
7622         .element_size(4)
7623         .iterations(1)
7624         .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7625     }
7626   }
7627 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_8_bw_4)7628   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_8_bw_4) {
7629     TEST_REQUIRES_ARM_NEON;
7630     TransposeMicrokernelTester()
7631       .input_stride(4)
7632       .output_stride(16)
7633       .block_width(4)
7634       .block_height(8)
7635       .element_size(4)
7636       .iterations(1)
7637       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7638   }
7639 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_5_8_bw_4)7640   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_5_8_bw_4){
7641     TEST_REQUIRES_ARM_NEON;
7642     for(size_t i = 5; i < 8; ++i){
7643       TransposeMicrokernelTester()
7644         .input_stride(21)
7645         .output_stride(i)
7646         .block_width(7)
7647         .block_height(i)
7648         .element_size(4)
7649         .iterations(1)
7650         .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7651     }
7652   }
7653 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_5_8_bw_8)7654   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_5_8_bw_8){
7655     TEST_REQUIRES_ARM_NEON;
7656     for(size_t i = 5; i < 8; ++i){
7657       TransposeMicrokernelTester()
7658         .input_stride(8)
7659         .output_stride(i)
7660         .block_width(8)
7661         .block_height(i)
7662         .element_size(4)
7663         .iterations(1)
7664         .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7665     }
7666   }
7667 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_5_8_bw_5_8)7668   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_5_8_bw_5_8) {
7669     TEST_REQUIRES_ARM_NEON;
7670     for(size_t i = 5; i < 8; ++i){
7671       for(size_t j = 5; j < 8; ++j){
7672         TransposeMicrokernelTester()
7673           .input_stride(j)
7674           .output_stride(i)
7675           .block_width(j)
7676           .block_height(i)
7677           .element_size(4)
7678           .iterations(1)
7679           .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7680       }
7681     }
7682   }
7683 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_4_bw_4_is_8)7684   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_4_bw_4_is_8) {
7685     TEST_REQUIRES_ARM_NEON;
7686     TransposeMicrokernelTester()
7687       .input_stride(8)
7688       .output_stride(4)
7689       .block_width(4)
7690       .block_height(4)
7691       .element_size(4)
7692       .iterations(1)
7693       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7694   }
7695 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_4_bw_4_os_8)7696   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_4_bw_4_os_8) {
7697     TEST_REQUIRES_ARM_NEON;
7698     TransposeMicrokernelTester()
7699       .input_stride(4)
7700       .output_stride(8)
7701       .block_width(4)
7702       .block_height(4)
7703       .element_size(4)
7704       .iterations(1)
7705       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7706   }
7707 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_4_bw_4_is_8_os_8)7708   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_4_bw_4_is_8_os_8) {
7709     TEST_REQUIRES_ARM_NEON;
7710     TransposeMicrokernelTester()
7711       .input_stride(8)
7712       .output_stride(8)
7713       .block_width(4)
7714       .block_height(4)
7715       .element_size(4)
7716       .iterations(1)
7717       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7718   }
7719 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_68_bw_76_ies_15)7720   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_68_bw_76_ies_15) {
7721     TEST_REQUIRES_ARM_NEON;
7722     TransposeMicrokernelTester()
7723       .input_stride(76)
7724       .output_stride(68)
7725       .block_width(76)
7726       .block_height(68)
7727       .element_size(4)
7728       .input_element_stride(15)
7729       .iterations(1)
7730       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7731   }
7732 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_12_bw_20_oes_15)7733   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_12_bw_20_oes_15) {
7734     TEST_REQUIRES_ARM_NEON;
7735     TransposeMicrokernelTester()
7736       .input_stride(20)
7737       .output_stride(12)
7738       .block_width(20)
7739       .block_height(12)
7740       .element_size(4)
7741       .output_element_stride(15)
7742       .iterations(1)
7743       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7744   }
7745 
TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4,bh_28_bw_92_ies_21_oes_17)7746   TEST(X32_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
7747     TEST_REQUIRES_ARM_NEON;
7748     TransposeMicrokernelTester()
7749       .input_stride(97)
7750       .output_stride(34)
7751       .block_width(92)
7752       .block_height(28)
7753       .element_size(4)
7754       .input_element_stride(21)
7755       .output_element_stride(17)
7756       .iterations(1)
7757       .Test(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon);
7758   }
7759 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7760 
7761 
7762 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_4_bw_4)7763   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_4_bw_4) {
7764     TEST_REQUIRES_ARM_NEON;
7765     TransposeMicrokernelTester()
7766       .input_stride(8)
7767       .output_stride(8)
7768       .block_width(4)
7769       .block_height(4)
7770       .element_size(4)
7771       .iterations(1)
7772       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7773   }
7774 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_1_8_bw_1_8)7775   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_1_8_bw_1_8) {
7776     TEST_REQUIRES_ARM_NEON;
7777     for(size_t i = 1; i <= 8; ++i){
7778       for(size_t j = 1; j <= 8; ++j){
7779         TransposeMicrokernelTester()
7780           .input_stride(j * 3)
7781           .output_stride(i * 7)
7782           .block_width(j)
7783           .block_height(i)
7784           .element_size(4)
7785           .iterations(1)
7786           .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7787       }
7788     }
7789   }
7790 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_4_bw_8)7791   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_4_bw_8) {
7792     TEST_REQUIRES_ARM_NEON;
7793     TransposeMicrokernelTester()
7794       .input_stride(8)
7795       .output_stride(4)
7796       .block_width(8)
7797       .block_height(4)
7798       .element_size(4)
7799       .iterations(1)
7800       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7801   }
7802 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_4_bw_5_8)7803   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_4_bw_5_8) {
7804     TEST_REQUIRES_ARM_NEON;
7805     for(size_t i = 5; i < 8; ++i){
7806       TransposeMicrokernelTester()
7807         .input_stride(i)
7808         .output_stride(8)
7809         .block_width(i)
7810         .block_height(4)
7811         .element_size(4)
7812         .iterations(1)
7813         .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7814     }
7815   }
7816 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_8_bw_5_8)7817   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_8_bw_5_8) {
7818     TEST_REQUIRES_ARM_NEON;
7819     for(size_t i = 5; i < 8; ++i){
7820       TransposeMicrokernelTester()
7821         .input_stride(i)
7822         .output_stride(8)
7823         .block_width(i)
7824         .block_height(8)
7825         .element_size(4)
7826         .iterations(1)
7827         .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7828     }
7829   }
7830 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_8_bw_4)7831   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_8_bw_4) {
7832     TEST_REQUIRES_ARM_NEON;
7833     TransposeMicrokernelTester()
7834       .input_stride(4)
7835       .output_stride(16)
7836       .block_width(4)
7837       .block_height(8)
7838       .element_size(4)
7839       .iterations(1)
7840       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7841   }
7842 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_5_8_bw_4)7843   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_5_8_bw_4){
7844     TEST_REQUIRES_ARM_NEON;
7845     for(size_t i = 5; i < 8; ++i){
7846       TransposeMicrokernelTester()
7847         .input_stride(21)
7848         .output_stride(i)
7849         .block_width(7)
7850         .block_height(i)
7851         .element_size(4)
7852         .iterations(1)
7853         .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7854     }
7855   }
7856 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_5_8_bw_8)7857   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_5_8_bw_8){
7858     TEST_REQUIRES_ARM_NEON;
7859     for(size_t i = 5; i < 8; ++i){
7860       TransposeMicrokernelTester()
7861         .input_stride(8)
7862         .output_stride(i)
7863         .block_width(8)
7864         .block_height(i)
7865         .element_size(4)
7866         .iterations(1)
7867         .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7868     }
7869   }
7870 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_5_8_bw_5_8)7871   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_5_8_bw_5_8) {
7872     TEST_REQUIRES_ARM_NEON;
7873     for(size_t i = 5; i < 8; ++i){
7874       for(size_t j = 5; j < 8; ++j){
7875         TransposeMicrokernelTester()
7876           .input_stride(j)
7877           .output_stride(i)
7878           .block_width(j)
7879           .block_height(i)
7880           .element_size(4)
7881           .iterations(1)
7882           .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7883       }
7884     }
7885   }
7886 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_4_bw_4_is_8)7887   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_4_bw_4_is_8) {
7888     TEST_REQUIRES_ARM_NEON;
7889     TransposeMicrokernelTester()
7890       .input_stride(8)
7891       .output_stride(4)
7892       .block_width(4)
7893       .block_height(4)
7894       .element_size(4)
7895       .iterations(1)
7896       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7897   }
7898 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_4_bw_4_os_8)7899   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_4_bw_4_os_8) {
7900     TEST_REQUIRES_ARM_NEON;
7901     TransposeMicrokernelTester()
7902       .input_stride(4)
7903       .output_stride(8)
7904       .block_width(4)
7905       .block_height(4)
7906       .element_size(4)
7907       .iterations(1)
7908       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7909   }
7910 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_4_bw_4_is_8_os_8)7911   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_4_bw_4_is_8_os_8) {
7912     TEST_REQUIRES_ARM_NEON;
7913     TransposeMicrokernelTester()
7914       .input_stride(8)
7915       .output_stride(8)
7916       .block_width(4)
7917       .block_height(4)
7918       .element_size(4)
7919       .iterations(1)
7920       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7921   }
7922 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_68_bw_76_ies_15)7923   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_68_bw_76_ies_15) {
7924     TEST_REQUIRES_ARM_NEON;
7925     TransposeMicrokernelTester()
7926       .input_stride(76)
7927       .output_stride(68)
7928       .block_width(76)
7929       .block_height(68)
7930       .element_size(4)
7931       .input_element_stride(15)
7932       .iterations(1)
7933       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7934   }
7935 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_12_bw_20_oes_15)7936   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_12_bw_20_oes_15) {
7937     TEST_REQUIRES_ARM_NEON;
7938     TransposeMicrokernelTester()
7939       .input_stride(20)
7940       .output_stride(12)
7941       .block_width(20)
7942       .block_height(12)
7943       .element_size(4)
7944       .output_element_stride(15)
7945       .iterations(1)
7946       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7947   }
7948 
TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4,bh_28_bw_92_ies_21_oes_17)7949   TEST(X32_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
7950     TEST_REQUIRES_ARM_NEON;
7951     TransposeMicrokernelTester()
7952       .input_stride(97)
7953       .output_stride(34)
7954       .block_width(92)
7955       .block_height(28)
7956       .element_size(4)
7957       .input_element_stride(21)
7958       .output_element_stride(17)
7959       .iterations(1)
7960       .Test(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon);
7961   }
7962 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7963 
7964 
7965 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_4_bw_4)7966   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_4_bw_4) {
7967     TEST_REQUIRES_ARM_NEON;
7968     TransposeMicrokernelTester()
7969       .input_stride(8)
7970       .output_stride(8)
7971       .block_width(4)
7972       .block_height(4)
7973       .element_size(4)
7974       .iterations(1)
7975       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
7976   }
7977 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_1_8_bw_1_8)7978   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_1_8_bw_1_8) {
7979     TEST_REQUIRES_ARM_NEON;
7980     for(size_t i = 1; i <= 8; ++i){
7981       for(size_t j = 1; j <= 8; ++j){
7982         TransposeMicrokernelTester()
7983           .input_stride(j * 3)
7984           .output_stride(i * 7)
7985           .block_width(j)
7986           .block_height(i)
7987           .element_size(4)
7988           .iterations(1)
7989           .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
7990       }
7991     }
7992   }
7993 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_4_bw_8)7994   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_4_bw_8) {
7995     TEST_REQUIRES_ARM_NEON;
7996     TransposeMicrokernelTester()
7997       .input_stride(8)
7998       .output_stride(4)
7999       .block_width(8)
8000       .block_height(4)
8001       .element_size(4)
8002       .iterations(1)
8003       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8004   }
8005 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_4_bw_5_8)8006   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_4_bw_5_8) {
8007     TEST_REQUIRES_ARM_NEON;
8008     for(size_t i = 5; i < 8; ++i){
8009       TransposeMicrokernelTester()
8010         .input_stride(i)
8011         .output_stride(8)
8012         .block_width(i)
8013         .block_height(4)
8014         .element_size(4)
8015         .iterations(1)
8016         .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8017     }
8018   }
8019 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_8_bw_5_8)8020   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_8_bw_5_8) {
8021     TEST_REQUIRES_ARM_NEON;
8022     for(size_t i = 5; i < 8; ++i){
8023       TransposeMicrokernelTester()
8024         .input_stride(i)
8025         .output_stride(8)
8026         .block_width(i)
8027         .block_height(8)
8028         .element_size(4)
8029         .iterations(1)
8030         .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8031     }
8032   }
8033 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_8_bw_4)8034   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_8_bw_4) {
8035     TEST_REQUIRES_ARM_NEON;
8036     TransposeMicrokernelTester()
8037       .input_stride(4)
8038       .output_stride(16)
8039       .block_width(4)
8040       .block_height(8)
8041       .element_size(4)
8042       .iterations(1)
8043       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8044   }
8045 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_5_8_bw_4)8046   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_5_8_bw_4){
8047     TEST_REQUIRES_ARM_NEON;
8048     for(size_t i = 5; i < 8; ++i){
8049       TransposeMicrokernelTester()
8050         .input_stride(21)
8051         .output_stride(i)
8052         .block_width(7)
8053         .block_height(i)
8054         .element_size(4)
8055         .iterations(1)
8056         .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8057     }
8058   }
8059 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_5_8_bw_8)8060   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_5_8_bw_8){
8061     TEST_REQUIRES_ARM_NEON;
8062     for(size_t i = 5; i < 8; ++i){
8063       TransposeMicrokernelTester()
8064         .input_stride(8)
8065         .output_stride(i)
8066         .block_width(8)
8067         .block_height(i)
8068         .element_size(4)
8069         .iterations(1)
8070         .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8071     }
8072   }
8073 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_5_8_bw_5_8)8074   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_5_8_bw_5_8) {
8075     TEST_REQUIRES_ARM_NEON;
8076     for(size_t i = 5; i < 8; ++i){
8077       for(size_t j = 5; j < 8; ++j){
8078         TransposeMicrokernelTester()
8079           .input_stride(j)
8080           .output_stride(i)
8081           .block_width(j)
8082           .block_height(i)
8083           .element_size(4)
8084           .iterations(1)
8085           .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8086       }
8087     }
8088   }
8089 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_4_bw_4_is_8)8090   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_4_bw_4_is_8) {
8091     TEST_REQUIRES_ARM_NEON;
8092     TransposeMicrokernelTester()
8093       .input_stride(8)
8094       .output_stride(4)
8095       .block_width(4)
8096       .block_height(4)
8097       .element_size(4)
8098       .iterations(1)
8099       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8100   }
8101 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_4_bw_4_os_8)8102   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_4_bw_4_os_8) {
8103     TEST_REQUIRES_ARM_NEON;
8104     TransposeMicrokernelTester()
8105       .input_stride(4)
8106       .output_stride(8)
8107       .block_width(4)
8108       .block_height(4)
8109       .element_size(4)
8110       .iterations(1)
8111       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8112   }
8113 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_4_bw_4_is_8_os_8)8114   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_4_bw_4_is_8_os_8) {
8115     TEST_REQUIRES_ARM_NEON;
8116     TransposeMicrokernelTester()
8117       .input_stride(8)
8118       .output_stride(8)
8119       .block_width(4)
8120       .block_height(4)
8121       .element_size(4)
8122       .iterations(1)
8123       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8124   }
8125 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_68_bw_76_ies_15)8126   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_68_bw_76_ies_15) {
8127     TEST_REQUIRES_ARM_NEON;
8128     TransposeMicrokernelTester()
8129       .input_stride(76)
8130       .output_stride(68)
8131       .block_width(76)
8132       .block_height(68)
8133       .element_size(4)
8134       .input_element_stride(15)
8135       .iterations(1)
8136       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8137   }
8138 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_12_bw_20_oes_15)8139   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_12_bw_20_oes_15) {
8140     TEST_REQUIRES_ARM_NEON;
8141     TransposeMicrokernelTester()
8142       .input_stride(20)
8143       .output_stride(12)
8144       .block_width(20)
8145       .block_height(12)
8146       .element_size(4)
8147       .output_element_stride(15)
8148       .iterations(1)
8149       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8150   }
8151 
TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4,bh_28_bw_92_ies_21_oes_17)8152   TEST(X32_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
8153     TEST_REQUIRES_ARM_NEON;
8154     TransposeMicrokernelTester()
8155       .input_stride(97)
8156       .output_stride(34)
8157       .block_width(92)
8158       .block_height(28)
8159       .element_size(4)
8160       .input_element_stride(21)
8161       .output_element_stride(17)
8162       .iterations(1)
8163       .Test(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon);
8164   }
8165 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
8166 
8167 
8168 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_4_bw_4)8169   TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_4_bw_4) {
8170     TEST_REQUIRES_ARM_NEON;
8171     TransposeMicrokernelTester()
8172       .input_stride(8)
8173       .output_stride(8)
8174       .block_width(4)
8175       .block_height(4)
8176       .element_size(4)
8177       .iterations(1)
8178       .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8179   }
8180 
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_1_8_bw_1_8)8181   TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_1_8_bw_1_8) {
8182     TEST_REQUIRES_ARM_NEON;
8183     for(size_t i = 1; i <= 8; ++i){
8184       for(size_t j = 1; j <= 8; ++j){
8185         TransposeMicrokernelTester()
8186           .input_stride(j * 3)
8187           .output_stride(i * 7)
8188           .block_width(j)
8189           .block_height(i)
8190           .element_size(4)
8191           .iterations(1)
8192           .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8193       }
8194     }
8195   }
8196 
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_4_bw_8)8197   TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_4_bw_8) {
8198     TEST_REQUIRES_ARM_NEON;
8199     TransposeMicrokernelTester()
8200       .input_stride(8)
8201       .output_stride(4)
8202       .block_width(8)
8203       .block_height(4)
8204       .element_size(4)
8205       .iterations(1)
8206       .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8207   }
8208 
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_4_bw_5_8)8209   TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_4_bw_5_8) {
8210     TEST_REQUIRES_ARM_NEON;
8211     for(size_t i = 5; i < 8; ++i){
8212       TransposeMicrokernelTester()
8213         .input_stride(i)
8214         .output_stride(8)
8215         .block_width(i)
8216         .block_height(4)
8217         .element_size(4)
8218         .iterations(1)
8219         .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8220     }
8221   }
8222 
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_8_bw_5_8)8223   TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_8_bw_5_8) {
8224     TEST_REQUIRES_ARM_NEON;
8225     for(size_t i = 5; i < 8; ++i){
8226       TransposeMicrokernelTester()
8227         .input_stride(i)
8228         .output_stride(8)
8229         .block_width(i)
8230         .block_height(8)
8231         .element_size(4)
8232         .iterations(1)
8233         .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8234     }
8235   }
8236 
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_8_bw_4)8237   TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_8_bw_4) {
8238     TEST_REQUIRES_ARM_NEON;
8239     TransposeMicrokernelTester()
8240       .input_stride(4)
8241       .output_stride(16)
8242       .block_width(4)
8243       .block_height(8)
8244       .element_size(4)
8245       .iterations(1)
8246       .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8247   }
8248 
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_5_8_bw_4)8249   TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_5_8_bw_4){
8250     TEST_REQUIRES_ARM_NEON;
8251     for(size_t i = 5; i < 8; ++i){
8252       TransposeMicrokernelTester()
8253         .input_stride(21)
8254         .output_stride(i)
8255         .block_width(7)
8256         .block_height(i)
8257         .element_size(4)
8258         .iterations(1)
8259         .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8260     }
8261   }
8262 
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_5_8_bw_8)8263   TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_5_8_bw_8){
8264     TEST_REQUIRES_ARM_NEON;
8265     for(size_t i = 5; i < 8; ++i){
8266       TransposeMicrokernelTester()
8267         .input_stride(8)
8268         .output_stride(i)
8269         .block_width(8)
8270         .block_height(i)
8271         .element_size(4)
8272         .iterations(1)
8273         .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8274     }
8275   }
8276 
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_5_8_bw_5_8)8277   TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_5_8_bw_5_8) {
8278     TEST_REQUIRES_ARM_NEON;
8279     for(size_t i = 5; i < 8; ++i){
8280       for(size_t j = 5; j < 8; ++j){
8281         TransposeMicrokernelTester()
8282           .input_stride(j)
8283           .output_stride(i)
8284           .block_width(j)
8285           .block_height(i)
8286           .element_size(4)
8287           .iterations(1)
8288           .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8289       }
8290     }
8291   }
8292 
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_4_bw_4_is_8)8293   TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_4_bw_4_is_8) {
8294     TEST_REQUIRES_ARM_NEON;
8295     TransposeMicrokernelTester()
8296       .input_stride(8)
8297       .output_stride(4)
8298       .block_width(4)
8299       .block_height(4)
8300       .element_size(4)
8301       .iterations(1)
8302       .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8303   }
8304 
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_4_bw_4_os_8)8305   TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_4_bw_4_os_8) {
8306     TEST_REQUIRES_ARM_NEON;
8307     TransposeMicrokernelTester()
8308       .input_stride(4)
8309       .output_stride(8)
8310       .block_width(4)
8311       .block_height(4)
8312       .element_size(4)
8313       .iterations(1)
8314       .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8315   }
8316 
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_4_bw_4_is_8_os_8)8317   TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_4_bw_4_is_8_os_8) {
8318     TEST_REQUIRES_ARM_NEON;
8319     TransposeMicrokernelTester()
8320       .input_stride(8)
8321       .output_stride(8)
8322       .block_width(4)
8323       .block_height(4)
8324       .element_size(4)
8325       .iterations(1)
8326       .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8327   }
8328 
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_68_bw_76_ies_15)8329   TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_68_bw_76_ies_15) {
8330     TEST_REQUIRES_ARM_NEON;
8331     TransposeMicrokernelTester()
8332       .input_stride(76)
8333       .output_stride(68)
8334       .block_width(76)
8335       .block_height(68)
8336       .element_size(4)
8337       .input_element_stride(15)
8338       .iterations(1)
8339       .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8340   }
8341 
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_12_bw_20_oes_15)8342   TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_12_bw_20_oes_15) {
8343     TEST_REQUIRES_ARM_NEON;
8344     TransposeMicrokernelTester()
8345       .input_stride(20)
8346       .output_stride(12)
8347       .block_width(20)
8348       .block_height(12)
8349       .element_size(4)
8350       .output_element_stride(15)
8351       .iterations(1)
8352       .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8353   }
8354 
TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4,bh_28_bw_92_ies_21_oes_17)8355   TEST(X32_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
8356     TEST_REQUIRES_ARM_NEON;
8357     TransposeMicrokernelTester()
8358       .input_stride(97)
8359       .output_stride(34)
8360       .block_width(92)
8361       .block_height(28)
8362       .element_size(4)
8363       .input_element_stride(21)
8364       .output_element_stride(17)
8365       .iterations(1)
8366       .Test(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon);
8367   }
8368 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
8369 
8370 
8371 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_4_bw_4)8372   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_4_bw_4) {
8373     TEST_REQUIRES_ARM_NEON;
8374     TransposeMicrokernelTester()
8375       .input_stride(8)
8376       .output_stride(8)
8377       .block_width(4)
8378       .block_height(4)
8379       .element_size(4)
8380       .iterations(1)
8381       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8382   }
8383 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_1_8_bw_1_8)8384   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_1_8_bw_1_8) {
8385     TEST_REQUIRES_ARM_NEON;
8386     for(size_t i = 1; i <= 8; ++i){
8387       for(size_t j = 1; j <= 8; ++j){
8388         TransposeMicrokernelTester()
8389           .input_stride(j * 3)
8390           .output_stride(i * 7)
8391           .block_width(j)
8392           .block_height(i)
8393           .element_size(4)
8394           .iterations(1)
8395           .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8396       }
8397     }
8398   }
8399 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_4_bw_8)8400   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_4_bw_8) {
8401     TEST_REQUIRES_ARM_NEON;
8402     TransposeMicrokernelTester()
8403       .input_stride(8)
8404       .output_stride(4)
8405       .block_width(8)
8406       .block_height(4)
8407       .element_size(4)
8408       .iterations(1)
8409       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8410   }
8411 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_4_bw_5_8)8412   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_4_bw_5_8) {
8413     TEST_REQUIRES_ARM_NEON;
8414     for(size_t i = 5; i < 8; ++i){
8415       TransposeMicrokernelTester()
8416         .input_stride(i)
8417         .output_stride(8)
8418         .block_width(i)
8419         .block_height(4)
8420         .element_size(4)
8421         .iterations(1)
8422         .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8423     }
8424   }
8425 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_8_bw_5_8)8426   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_8_bw_5_8) {
8427     TEST_REQUIRES_ARM_NEON;
8428     for(size_t i = 5; i < 8; ++i){
8429       TransposeMicrokernelTester()
8430         .input_stride(i)
8431         .output_stride(8)
8432         .block_width(i)
8433         .block_height(8)
8434         .element_size(4)
8435         .iterations(1)
8436         .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8437     }
8438   }
8439 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_8_bw_4)8440   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_8_bw_4) {
8441     TEST_REQUIRES_ARM_NEON;
8442     TransposeMicrokernelTester()
8443       .input_stride(4)
8444       .output_stride(16)
8445       .block_width(4)
8446       .block_height(8)
8447       .element_size(4)
8448       .iterations(1)
8449       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8450   }
8451 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_5_8_bw_4)8452   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_5_8_bw_4){
8453     TEST_REQUIRES_ARM_NEON;
8454     for(size_t i = 5; i < 8; ++i){
8455       TransposeMicrokernelTester()
8456         .input_stride(21)
8457         .output_stride(i)
8458         .block_width(7)
8459         .block_height(i)
8460         .element_size(4)
8461         .iterations(1)
8462         .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8463     }
8464   }
8465 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_5_8_bw_8)8466   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_5_8_bw_8){
8467     TEST_REQUIRES_ARM_NEON;
8468     for(size_t i = 5; i < 8; ++i){
8469       TransposeMicrokernelTester()
8470         .input_stride(8)
8471         .output_stride(i)
8472         .block_width(8)
8473         .block_height(i)
8474         .element_size(4)
8475         .iterations(1)
8476         .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8477     }
8478   }
8479 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_5_8_bw_5_8)8480   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_5_8_bw_5_8) {
8481     TEST_REQUIRES_ARM_NEON;
8482     for(size_t i = 5; i < 8; ++i){
8483       for(size_t j = 5; j < 8; ++j){
8484         TransposeMicrokernelTester()
8485           .input_stride(j)
8486           .output_stride(i)
8487           .block_width(j)
8488           .block_height(i)
8489           .element_size(4)
8490           .iterations(1)
8491           .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8492       }
8493     }
8494   }
8495 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_4_bw_4_is_8)8496   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_4_bw_4_is_8) {
8497     TEST_REQUIRES_ARM_NEON;
8498     TransposeMicrokernelTester()
8499       .input_stride(8)
8500       .output_stride(4)
8501       .block_width(4)
8502       .block_height(4)
8503       .element_size(4)
8504       .iterations(1)
8505       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8506   }
8507 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_4_bw_4_os_8)8508   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_4_bw_4_os_8) {
8509     TEST_REQUIRES_ARM_NEON;
8510     TransposeMicrokernelTester()
8511       .input_stride(4)
8512       .output_stride(8)
8513       .block_width(4)
8514       .block_height(4)
8515       .element_size(4)
8516       .iterations(1)
8517       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8518   }
8519 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_4_bw_4_is_8_os_8)8520   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_4_bw_4_is_8_os_8) {
8521     TEST_REQUIRES_ARM_NEON;
8522     TransposeMicrokernelTester()
8523       .input_stride(8)
8524       .output_stride(8)
8525       .block_width(4)
8526       .block_height(4)
8527       .element_size(4)
8528       .iterations(1)
8529       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8530   }
8531 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_68_bw_76_ies_15)8532   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_68_bw_76_ies_15) {
8533     TEST_REQUIRES_ARM_NEON;
8534     TransposeMicrokernelTester()
8535       .input_stride(76)
8536       .output_stride(68)
8537       .block_width(76)
8538       .block_height(68)
8539       .element_size(4)
8540       .input_element_stride(15)
8541       .iterations(1)
8542       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8543   }
8544 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_12_bw_20_oes_15)8545   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_12_bw_20_oes_15) {
8546     TEST_REQUIRES_ARM_NEON;
8547     TransposeMicrokernelTester()
8548       .input_stride(20)
8549       .output_stride(12)
8550       .block_width(20)
8551       .block_height(12)
8552       .element_size(4)
8553       .output_element_stride(15)
8554       .iterations(1)
8555       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8556   }
8557 
TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4,bh_28_bw_92_ies_21_oes_17)8558   TEST(X32_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
8559     TEST_REQUIRES_ARM_NEON;
8560     TransposeMicrokernelTester()
8561       .input_stride(97)
8562       .output_stride(34)
8563       .block_width(92)
8564       .block_height(28)
8565       .element_size(4)
8566       .input_element_stride(21)
8567       .output_element_stride(17)
8568       .iterations(1)
8569       .Test(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon);
8570   }
8571 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
8572 
8573 
8574 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_4_bw_4)8575   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_4_bw_4) {
8576     TEST_REQUIRES_ARM_NEON;
8577     TransposeMicrokernelTester()
8578       .input_stride(8)
8579       .output_stride(8)
8580       .block_width(4)
8581       .block_height(4)
8582       .element_size(4)
8583       .iterations(1)
8584       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8585   }
8586 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_1_8_bw_1_8)8587   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_1_8_bw_1_8) {
8588     TEST_REQUIRES_ARM_NEON;
8589     for(size_t i = 1; i <= 8; ++i){
8590       for(size_t j = 1; j <= 8; ++j){
8591         TransposeMicrokernelTester()
8592           .input_stride(j * 3)
8593           .output_stride(i * 7)
8594           .block_width(j)
8595           .block_height(i)
8596           .element_size(4)
8597           .iterations(1)
8598           .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8599       }
8600     }
8601   }
8602 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_4_bw_8)8603   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_4_bw_8) {
8604     TEST_REQUIRES_ARM_NEON;
8605     TransposeMicrokernelTester()
8606       .input_stride(8)
8607       .output_stride(4)
8608       .block_width(8)
8609       .block_height(4)
8610       .element_size(4)
8611       .iterations(1)
8612       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8613   }
8614 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_4_bw_5_8)8615   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_4_bw_5_8) {
8616     TEST_REQUIRES_ARM_NEON;
8617     for(size_t i = 5; i < 8; ++i){
8618       TransposeMicrokernelTester()
8619         .input_stride(i)
8620         .output_stride(8)
8621         .block_width(i)
8622         .block_height(4)
8623         .element_size(4)
8624         .iterations(1)
8625         .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8626     }
8627   }
8628 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_8_bw_5_8)8629   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_8_bw_5_8) {
8630     TEST_REQUIRES_ARM_NEON;
8631     for(size_t i = 5; i < 8; ++i){
8632       TransposeMicrokernelTester()
8633         .input_stride(i)
8634         .output_stride(8)
8635         .block_width(i)
8636         .block_height(8)
8637         .element_size(4)
8638         .iterations(1)
8639         .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8640     }
8641   }
8642 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_8_bw_4)8643   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_8_bw_4) {
8644     TEST_REQUIRES_ARM_NEON;
8645     TransposeMicrokernelTester()
8646       .input_stride(4)
8647       .output_stride(16)
8648       .block_width(4)
8649       .block_height(8)
8650       .element_size(4)
8651       .iterations(1)
8652       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8653   }
8654 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_5_8_bw_4)8655   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_5_8_bw_4){
8656     TEST_REQUIRES_ARM_NEON;
8657     for(size_t i = 5; i < 8; ++i){
8658       TransposeMicrokernelTester()
8659         .input_stride(21)
8660         .output_stride(i)
8661         .block_width(7)
8662         .block_height(i)
8663         .element_size(4)
8664         .iterations(1)
8665         .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8666     }
8667   }
8668 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_5_8_bw_8)8669   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_5_8_bw_8){
8670     TEST_REQUIRES_ARM_NEON;
8671     for(size_t i = 5; i < 8; ++i){
8672       TransposeMicrokernelTester()
8673         .input_stride(8)
8674         .output_stride(i)
8675         .block_width(8)
8676         .block_height(i)
8677         .element_size(4)
8678         .iterations(1)
8679         .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8680     }
8681   }
8682 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_5_8_bw_5_8)8683   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_5_8_bw_5_8) {
8684     TEST_REQUIRES_ARM_NEON;
8685     for(size_t i = 5; i < 8; ++i){
8686       for(size_t j = 5; j < 8; ++j){
8687         TransposeMicrokernelTester()
8688           .input_stride(j)
8689           .output_stride(i)
8690           .block_width(j)
8691           .block_height(i)
8692           .element_size(4)
8693           .iterations(1)
8694           .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8695       }
8696     }
8697   }
8698 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_4_bw_4_is_8)8699   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_4_bw_4_is_8) {
8700     TEST_REQUIRES_ARM_NEON;
8701     TransposeMicrokernelTester()
8702       .input_stride(8)
8703       .output_stride(4)
8704       .block_width(4)
8705       .block_height(4)
8706       .element_size(4)
8707       .iterations(1)
8708       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8709   }
8710 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_4_bw_4_os_8)8711   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_4_bw_4_os_8) {
8712     TEST_REQUIRES_ARM_NEON;
8713     TransposeMicrokernelTester()
8714       .input_stride(4)
8715       .output_stride(8)
8716       .block_width(4)
8717       .block_height(4)
8718       .element_size(4)
8719       .iterations(1)
8720       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8721   }
8722 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_4_bw_4_is_8_os_8)8723   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_4_bw_4_is_8_os_8) {
8724     TEST_REQUIRES_ARM_NEON;
8725     TransposeMicrokernelTester()
8726       .input_stride(8)
8727       .output_stride(8)
8728       .block_width(4)
8729       .block_height(4)
8730       .element_size(4)
8731       .iterations(1)
8732       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8733   }
8734 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_68_bw_76_ies_15)8735   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_68_bw_76_ies_15) {
8736     TEST_REQUIRES_ARM_NEON;
8737     TransposeMicrokernelTester()
8738       .input_stride(76)
8739       .output_stride(68)
8740       .block_width(76)
8741       .block_height(68)
8742       .element_size(4)
8743       .input_element_stride(15)
8744       .iterations(1)
8745       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8746   }
8747 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_12_bw_20_oes_15)8748   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_12_bw_20_oes_15) {
8749     TEST_REQUIRES_ARM_NEON;
8750     TransposeMicrokernelTester()
8751       .input_stride(20)
8752       .output_stride(12)
8753       .block_width(20)
8754       .block_height(12)
8755       .element_size(4)
8756       .output_element_stride(15)
8757       .iterations(1)
8758       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8759   }
8760 
TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4,bh_28_bw_92_ies_21_oes_17)8761   TEST(X32_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
8762     TEST_REQUIRES_ARM_NEON;
8763     TransposeMicrokernelTester()
8764       .input_stride(97)
8765       .output_stride(34)
8766       .block_width(92)
8767       .block_height(28)
8768       .element_size(4)
8769       .input_element_stride(21)
8770       .output_element_stride(17)
8771       .iterations(1)
8772       .Test(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon);
8773   }
8774 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
8775 
8776 
8777 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_4_bw_4)8778   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_4_bw_4) {
8779     TEST_REQUIRES_ARM_NEON;
8780     TransposeMicrokernelTester()
8781       .input_stride(8)
8782       .output_stride(8)
8783       .block_width(4)
8784       .block_height(4)
8785       .element_size(4)
8786       .iterations(1)
8787       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8788   }
8789 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_1_8_bw_1_8)8790   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_1_8_bw_1_8) {
8791     TEST_REQUIRES_ARM_NEON;
8792     for(size_t i = 1; i <= 8; ++i){
8793       for(size_t j = 1; j <= 8; ++j){
8794         TransposeMicrokernelTester()
8795           .input_stride(j * 3)
8796           .output_stride(i * 7)
8797           .block_width(j)
8798           .block_height(i)
8799           .element_size(4)
8800           .iterations(1)
8801           .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8802       }
8803     }
8804   }
8805 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_4_bw_8)8806   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_4_bw_8) {
8807     TEST_REQUIRES_ARM_NEON;
8808     TransposeMicrokernelTester()
8809       .input_stride(8)
8810       .output_stride(4)
8811       .block_width(8)
8812       .block_height(4)
8813       .element_size(4)
8814       .iterations(1)
8815       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8816   }
8817 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_4_bw_5_8)8818   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_4_bw_5_8) {
8819     TEST_REQUIRES_ARM_NEON;
8820     for(size_t i = 5; i < 8; ++i){
8821       TransposeMicrokernelTester()
8822         .input_stride(i)
8823         .output_stride(8)
8824         .block_width(i)
8825         .block_height(4)
8826         .element_size(4)
8827         .iterations(1)
8828         .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8829     }
8830   }
8831 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_8_bw_5_8)8832   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_8_bw_5_8) {
8833     TEST_REQUIRES_ARM_NEON;
8834     for(size_t i = 5; i < 8; ++i){
8835       TransposeMicrokernelTester()
8836         .input_stride(i)
8837         .output_stride(8)
8838         .block_width(i)
8839         .block_height(8)
8840         .element_size(4)
8841         .iterations(1)
8842         .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8843     }
8844   }
8845 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_8_bw_4)8846   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_8_bw_4) {
8847     TEST_REQUIRES_ARM_NEON;
8848     TransposeMicrokernelTester()
8849       .input_stride(4)
8850       .output_stride(16)
8851       .block_width(4)
8852       .block_height(8)
8853       .element_size(4)
8854       .iterations(1)
8855       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8856   }
8857 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_5_8_bw_4)8858   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_5_8_bw_4){
8859     TEST_REQUIRES_ARM_NEON;
8860     for(size_t i = 5; i < 8; ++i){
8861       TransposeMicrokernelTester()
8862         .input_stride(21)
8863         .output_stride(i)
8864         .block_width(7)
8865         .block_height(i)
8866         .element_size(4)
8867         .iterations(1)
8868         .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8869     }
8870   }
8871 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_5_8_bw_8)8872   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_5_8_bw_8){
8873     TEST_REQUIRES_ARM_NEON;
8874     for(size_t i = 5; i < 8; ++i){
8875       TransposeMicrokernelTester()
8876         .input_stride(8)
8877         .output_stride(i)
8878         .block_width(8)
8879         .block_height(i)
8880         .element_size(4)
8881         .iterations(1)
8882         .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8883     }
8884   }
8885 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_5_8_bw_5_8)8886   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_5_8_bw_5_8) {
8887     TEST_REQUIRES_ARM_NEON;
8888     for(size_t i = 5; i < 8; ++i){
8889       for(size_t j = 5; j < 8; ++j){
8890         TransposeMicrokernelTester()
8891           .input_stride(j)
8892           .output_stride(i)
8893           .block_width(j)
8894           .block_height(i)
8895           .element_size(4)
8896           .iterations(1)
8897           .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8898       }
8899     }
8900   }
8901 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_4_bw_4_is_8)8902   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_4_bw_4_is_8) {
8903     TEST_REQUIRES_ARM_NEON;
8904     TransposeMicrokernelTester()
8905       .input_stride(8)
8906       .output_stride(4)
8907       .block_width(4)
8908       .block_height(4)
8909       .element_size(4)
8910       .iterations(1)
8911       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8912   }
8913 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_4_bw_4_os_8)8914   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_4_bw_4_os_8) {
8915     TEST_REQUIRES_ARM_NEON;
8916     TransposeMicrokernelTester()
8917       .input_stride(4)
8918       .output_stride(8)
8919       .block_width(4)
8920       .block_height(4)
8921       .element_size(4)
8922       .iterations(1)
8923       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8924   }
8925 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_4_bw_4_is_8_os_8)8926   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_4_bw_4_is_8_os_8) {
8927     TEST_REQUIRES_ARM_NEON;
8928     TransposeMicrokernelTester()
8929       .input_stride(8)
8930       .output_stride(8)
8931       .block_width(4)
8932       .block_height(4)
8933       .element_size(4)
8934       .iterations(1)
8935       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8936   }
8937 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_68_bw_76_ies_15)8938   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_68_bw_76_ies_15) {
8939     TEST_REQUIRES_ARM_NEON;
8940     TransposeMicrokernelTester()
8941       .input_stride(76)
8942       .output_stride(68)
8943       .block_width(76)
8944       .block_height(68)
8945       .element_size(4)
8946       .input_element_stride(15)
8947       .iterations(1)
8948       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8949   }
8950 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_12_bw_20_oes_15)8951   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_12_bw_20_oes_15) {
8952     TEST_REQUIRES_ARM_NEON;
8953     TransposeMicrokernelTester()
8954       .input_stride(20)
8955       .output_stride(12)
8956       .block_width(20)
8957       .block_height(12)
8958       .element_size(4)
8959       .output_element_stride(15)
8960       .iterations(1)
8961       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8962   }
8963 
TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4,bh_28_bw_92_ies_21_oes_17)8964   TEST(X32_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_4, bh_28_bw_92_ies_21_oes_17) {
8965     TEST_REQUIRES_ARM_NEON;
8966     TransposeMicrokernelTester()
8967       .input_stride(97)
8968       .output_stride(34)
8969       .block_width(92)
8970       .block_height(28)
8971       .element_size(4)
8972       .input_element_stride(21)
8973       .output_element_stride(17)
8974       .iterations(1)
8975       .Test(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon);
8976   }
8977 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
8978