xref: /aosp_15_r20/external/XNNPACK/test/x16-transpose.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/x16-transpose.yaml
8 //   Generator: tools/generate-transpose-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/transpose.h>
17 #include "transpose-microkernel-tester.h"
18 
19 
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_1_bw_2)20 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_1_bw_2) {
21   TransposeMicrokernelTester()
22     .input_stride(4)
23     .output_stride(2)
24     .block_width(2)
25     .block_height(1)
26     .element_size(2)
27     .iterations(1)
28     .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
29 }
30 
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_1_2_bw_1_4)31 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_1_2_bw_1_4) {
32   for(size_t i = 1; i <= 2; ++i){
33     for(size_t j = 1; j <= 4; ++j){
34       TransposeMicrokernelTester()
35         .input_stride(j * 3)
36         .output_stride(i * 7)
37         .block_width(j)
38         .block_height(i)
39         .element_size(2)
40         .iterations(1)
41         .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
42     }
43   }
44 }
45 
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_1_bw_4)46 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_1_bw_4) {
47   TransposeMicrokernelTester()
48     .input_stride(4)
49     .output_stride(1)
50     .block_width(4)
51     .block_height(1)
52     .element_size(2)
53     .iterations(1)
54     .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
55 }
56 
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_1_bw_3_4)57 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_1_bw_3_4) {
58   for(size_t i = 3; i < 4; ++i){
59     TransposeMicrokernelTester()
60       .input_stride(i)
61       .output_stride(2)
62       .block_width(i)
63       .block_height(1)
64       .element_size(2)
65       .iterations(1)
66       .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
67   }
68 }
69 
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_2_bw_3_4)70 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_2_bw_3_4) {
71   for(size_t i = 3; i < 4; ++i){
72     TransposeMicrokernelTester()
73       .input_stride(i)
74       .output_stride(2)
75       .block_width(i)
76       .block_height(2)
77       .element_size(2)
78       .iterations(1)
79       .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
80   }
81 }
82 
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_2_bw_2)83 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_2_bw_2) {
84   TransposeMicrokernelTester()
85     .input_stride(2)
86     .output_stride(7)
87     .block_width(2)
88     .block_height(2)
89     .element_size(2)
90     .iterations(1)
91     .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
92 }
93 
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_2_2_bw_2)94 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_2_2_bw_2){
95   for(size_t i = 2; i < 2; ++i){
96     TransposeMicrokernelTester()
97       .input_stride(19)
98       .output_stride(i)
99       .block_width(5)
100       .block_height(i)
101       .element_size(2)
102       .iterations(1)
103       .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
104   }
105 }
106 
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_2_2_bw_4)107 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_2_2_bw_4){
108   for(size_t i = 2; i < 2; ++i){
109     TransposeMicrokernelTester()
110       .input_stride(4)
111       .output_stride(i)
112       .block_width(4)
113       .block_height(i)
114       .element_size(2)
115       .iterations(1)
116       .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
117   }
118 }
119 
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_2_2_bw_3_4)120 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_2_2_bw_3_4) {
121   for(size_t i = 2; i < 2; ++i){
122     for(size_t j = 3; j < 4; ++j){
123       TransposeMicrokernelTester()
124         .input_stride(j)
125         .output_stride(i)
126         .block_width(j)
127         .block_height(i)
128         .element_size(2)
129         .iterations(1)
130         .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
131     }
132   }
133 }
134 
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_1_bw_2_is_4)135 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_1_bw_2_is_4) {
136   TransposeMicrokernelTester()
137     .input_stride(4)
138     .output_stride(1)
139     .block_width(2)
140     .block_height(1)
141     .element_size(2)
142     .iterations(1)
143     .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
144 }
145 
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_1_bw_2_os_2)146 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_1_bw_2_os_2) {
147   TransposeMicrokernelTester()
148     .input_stride(2)
149     .output_stride(2)
150     .block_width(2)
151     .block_height(1)
152     .element_size(2)
153     .iterations(1)
154     .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
155 }
156 
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_1_bw_2_is_4_os_2)157 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_1_bw_2_is_4_os_2) {
158   TransposeMicrokernelTester()
159     .input_stride(4)
160     .output_stride(2)
161     .block_width(2)
162     .block_height(1)
163     .element_size(2)
164     .iterations(1)
165     .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
166 }
167 
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_17_bw_38_ies_13)168 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_17_bw_38_ies_13) {
169   TransposeMicrokernelTester()
170     .input_stride(38)
171     .output_stride(17)
172     .block_width(38)
173     .block_height(17)
174     .element_size(2)
175     .input_element_stride(13)
176     .iterations(1)
177     .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
178 }
179 
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_3_bw_10_oes_13)180 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_3_bw_10_oes_13) {
181   TransposeMicrokernelTester()
182     .input_stride(10)
183     .output_stride(3)
184     .block_width(10)
185     .block_height(3)
186     .element_size(2)
187     .output_element_stride(13)
188     .iterations(1)
189     .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
190 }
191 
TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2,bh_7_bw_46_ies_19_oes_15)192 TEST(X16_TRANSPOSEC__1X2_SCALAR_INT_2, bh_7_bw_46_ies_19_oes_15) {
193   TransposeMicrokernelTester()
194     .input_stride(51)
195     .output_stride(13)
196     .block_width(46)
197     .block_height(7)
198     .element_size(2)
199     .input_element_stride(19)
200     .output_element_stride(15)
201     .iterations(1)
202     .Test(xnn_x16_transposec_ukernel__1x2_scalar_int);
203 }
204 
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_1_bw_4)205 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_1_bw_4) {
206   TransposeMicrokernelTester()
207     .input_stride(8)
208     .output_stride(2)
209     .block_width(4)
210     .block_height(1)
211     .element_size(2)
212     .iterations(1)
213     .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
214 }
215 
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_1_2_bw_1_8)216 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_1_2_bw_1_8) {
217   for(size_t i = 1; i <= 2; ++i){
218     for(size_t j = 1; j <= 8; ++j){
219       TransposeMicrokernelTester()
220         .input_stride(j * 3)
221         .output_stride(i * 7)
222         .block_width(j)
223         .block_height(i)
224         .element_size(2)
225         .iterations(1)
226         .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
227     }
228   }
229 }
230 
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_1_bw_8)231 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_1_bw_8) {
232   TransposeMicrokernelTester()
233     .input_stride(8)
234     .output_stride(1)
235     .block_width(8)
236     .block_height(1)
237     .element_size(2)
238     .iterations(1)
239     .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
240 }
241 
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_1_bw_5_8)242 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_1_bw_5_8) {
243   for(size_t i = 5; i < 8; ++i){
244     TransposeMicrokernelTester()
245       .input_stride(i)
246       .output_stride(2)
247       .block_width(i)
248       .block_height(1)
249       .element_size(2)
250       .iterations(1)
251       .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
252   }
253 }
254 
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_2_bw_5_8)255 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_2_bw_5_8) {
256   for(size_t i = 5; i < 8; ++i){
257     TransposeMicrokernelTester()
258       .input_stride(i)
259       .output_stride(2)
260       .block_width(i)
261       .block_height(2)
262       .element_size(2)
263       .iterations(1)
264       .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
265   }
266 }
267 
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_2_bw_4)268 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_2_bw_4) {
269   TransposeMicrokernelTester()
270     .input_stride(4)
271     .output_stride(7)
272     .block_width(4)
273     .block_height(2)
274     .element_size(2)
275     .iterations(1)
276     .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
277 }
278 
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_2_2_bw_4)279 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_2_2_bw_4){
280   for(size_t i = 2; i < 2; ++i){
281     TransposeMicrokernelTester()
282       .input_stride(21)
283       .output_stride(i)
284       .block_width(7)
285       .block_height(i)
286       .element_size(2)
287       .iterations(1)
288       .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
289   }
290 }
291 
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_2_2_bw_8)292 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_2_2_bw_8){
293   for(size_t i = 2; i < 2; ++i){
294     TransposeMicrokernelTester()
295       .input_stride(8)
296       .output_stride(i)
297       .block_width(8)
298       .block_height(i)
299       .element_size(2)
300       .iterations(1)
301       .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
302   }
303 }
304 
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_2_2_bw_5_8)305 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_2_2_bw_5_8) {
306   for(size_t i = 2; i < 2; ++i){
307     for(size_t j = 5; j < 8; ++j){
308       TransposeMicrokernelTester()
309         .input_stride(j)
310         .output_stride(i)
311         .block_width(j)
312         .block_height(i)
313         .element_size(2)
314         .iterations(1)
315         .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
316     }
317   }
318 }
319 
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_1_bw_4_is_8)320 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_1_bw_4_is_8) {
321   TransposeMicrokernelTester()
322     .input_stride(8)
323     .output_stride(1)
324     .block_width(4)
325     .block_height(1)
326     .element_size(2)
327     .iterations(1)
328     .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
329 }
330 
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_1_bw_4_os_2)331 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_1_bw_4_os_2) {
332   TransposeMicrokernelTester()
333     .input_stride(4)
334     .output_stride(2)
335     .block_width(4)
336     .block_height(1)
337     .element_size(2)
338     .iterations(1)
339     .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
340 }
341 
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_1_bw_4_is_8_os_2)342 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_1_bw_4_is_8_os_2) {
343   TransposeMicrokernelTester()
344     .input_stride(8)
345     .output_stride(2)
346     .block_width(4)
347     .block_height(1)
348     .element_size(2)
349     .iterations(1)
350     .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
351 }
352 
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_17_bw_76_ies_13)353 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_17_bw_76_ies_13) {
354   TransposeMicrokernelTester()
355     .input_stride(76)
356     .output_stride(17)
357     .block_width(76)
358     .block_height(17)
359     .element_size(2)
360     .input_element_stride(13)
361     .iterations(1)
362     .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
363 }
364 
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_3_bw_20_oes_13)365 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_3_bw_20_oes_13) {
366   TransposeMicrokernelTester()
367     .input_stride(20)
368     .output_stride(3)
369     .block_width(20)
370     .block_height(3)
371     .element_size(2)
372     .output_element_stride(13)
373     .iterations(1)
374     .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
375 }
376 
TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2,bh_7_bw_92_ies_19_oes_15)377 TEST(X16_TRANSPOSEC__1X4_SCALAR_INT_2, bh_7_bw_92_ies_19_oes_15) {
378   TransposeMicrokernelTester()
379     .input_stride(97)
380     .output_stride(13)
381     .block_width(92)
382     .block_height(7)
383     .element_size(2)
384     .input_element_stride(19)
385     .output_element_stride(15)
386     .iterations(1)
387     .Test(xnn_x16_transposec_ukernel__1x4_scalar_int);
388 }
389 
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_2_bw_1)390 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_2_bw_1) {
391   TransposeMicrokernelTester()
392     .input_stride(2)
393     .output_stride(4)
394     .block_width(1)
395     .block_height(2)
396     .element_size(2)
397     .iterations(1)
398     .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
399 }
400 
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_1_4_bw_1_2)401 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_1_4_bw_1_2) {
402   for(size_t i = 1; i <= 4; ++i){
403     for(size_t j = 1; j <= 2; ++j){
404       TransposeMicrokernelTester()
405         .input_stride(j * 3)
406         .output_stride(i * 7)
407         .block_width(j)
408         .block_height(i)
409         .element_size(2)
410         .iterations(1)
411         .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
412     }
413   }
414 }
415 
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_2_bw_2)416 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_2_bw_2) {
417   TransposeMicrokernelTester()
418     .input_stride(2)
419     .output_stride(2)
420     .block_width(2)
421     .block_height(2)
422     .element_size(2)
423     .iterations(1)
424     .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
425 }
426 
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_2_bw_2_2)427 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_2_bw_2_2) {
428   for(size_t i = 2; i < 2; ++i){
429     TransposeMicrokernelTester()
430       .input_stride(i)
431       .output_stride(4)
432       .block_width(i)
433       .block_height(2)
434       .element_size(2)
435       .iterations(1)
436       .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
437   }
438 }
439 
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_4_bw_2_2)440 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_4_bw_2_2) {
441   for(size_t i = 2; i < 2; ++i){
442     TransposeMicrokernelTester()
443       .input_stride(i)
444       .output_stride(4)
445       .block_width(i)
446       .block_height(4)
447       .element_size(2)
448       .iterations(1)
449       .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
450   }
451 }
452 
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_4_bw_1)453 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_4_bw_1) {
454   TransposeMicrokernelTester()
455     .input_stride(1)
456     .output_stride(10)
457     .block_width(1)
458     .block_height(4)
459     .element_size(2)
460     .iterations(1)
461     .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
462 }
463 
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_3_4_bw_1)464 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_3_4_bw_1){
465   for(size_t i = 3; i < 4; ++i){
466     TransposeMicrokernelTester()
467       .input_stride(18)
468       .output_stride(i)
469       .block_width(4)
470       .block_height(i)
471       .element_size(2)
472       .iterations(1)
473       .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
474   }
475 }
476 
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_3_4_bw_2)477 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_3_4_bw_2){
478   for(size_t i = 3; i < 4; ++i){
479     TransposeMicrokernelTester()
480       .input_stride(2)
481       .output_stride(i)
482       .block_width(2)
483       .block_height(i)
484       .element_size(2)
485       .iterations(1)
486       .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
487   }
488 }
489 
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_3_4_bw_2_2)490 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_3_4_bw_2_2) {
491   for(size_t i = 3; i < 4; ++i){
492     for(size_t j = 2; j < 2; ++j){
493       TransposeMicrokernelTester()
494         .input_stride(j)
495         .output_stride(i)
496         .block_width(j)
497         .block_height(i)
498         .element_size(2)
499         .iterations(1)
500         .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
501     }
502   }
503 }
504 
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_2_bw_1_is_2)505 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_2_bw_1_is_2) {
506   TransposeMicrokernelTester()
507     .input_stride(2)
508     .output_stride(2)
509     .block_width(1)
510     .block_height(2)
511     .element_size(2)
512     .iterations(1)
513     .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
514 }
515 
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_2_bw_1_os_4)516 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_2_bw_1_os_4) {
517   TransposeMicrokernelTester()
518     .input_stride(1)
519     .output_stride(4)
520     .block_width(1)
521     .block_height(2)
522     .element_size(2)
523     .iterations(1)
524     .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
525 }
526 
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_2_bw_1_is_2_os_4)527 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_2_bw_1_is_2_os_4) {
528   TransposeMicrokernelTester()
529     .input_stride(2)
530     .output_stride(4)
531     .block_width(1)
532     .block_height(2)
533     .element_size(2)
534     .iterations(1)
535     .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
536 }
537 
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_34_bw_19_ies_13)538 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_34_bw_19_ies_13) {
539   TransposeMicrokernelTester()
540     .input_stride(19)
541     .output_stride(34)
542     .block_width(19)
543     .block_height(34)
544     .element_size(2)
545     .input_element_stride(13)
546     .iterations(1)
547     .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
548 }
549 
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_6_bw_5_oes_13)550 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_6_bw_5_oes_13) {
551   TransposeMicrokernelTester()
552     .input_stride(5)
553     .output_stride(6)
554     .block_width(5)
555     .block_height(6)
556     .element_size(2)
557     .output_element_stride(13)
558     .iterations(1)
559     .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
560 }
561 
TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2,bh_14_bw_23_ies_19_oes_15)562 TEST(X16_TRANSPOSEC__2X1_SCALAR_INT_2, bh_14_bw_23_ies_19_oes_15) {
563   TransposeMicrokernelTester()
564     .input_stride(28)
565     .output_stride(20)
566     .block_width(23)
567     .block_height(14)
568     .element_size(2)
569     .input_element_stride(19)
570     .output_element_stride(15)
571     .iterations(1)
572     .Test(xnn_x16_transposec_ukernel__2x1_scalar_int);
573 }
574 
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_2_bw_2)575 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_2_bw_2) {
576   TransposeMicrokernelTester()
577     .input_stride(4)
578     .output_stride(4)
579     .block_width(2)
580     .block_height(2)
581     .element_size(2)
582     .iterations(1)
583     .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
584 }
585 
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_1_4_bw_1_4)586 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_1_4_bw_1_4) {
587   for(size_t i = 1; i <= 4; ++i){
588     for(size_t j = 1; j <= 4; ++j){
589       TransposeMicrokernelTester()
590         .input_stride(j * 3)
591         .output_stride(i * 7)
592         .block_width(j)
593         .block_height(i)
594         .element_size(2)
595         .iterations(1)
596         .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
597     }
598   }
599 }
600 
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_2_bw_4)601 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_2_bw_4) {
602   TransposeMicrokernelTester()
603     .input_stride(4)
604     .output_stride(2)
605     .block_width(4)
606     .block_height(2)
607     .element_size(2)
608     .iterations(1)
609     .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
610 }
611 
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_2_bw_3_4)612 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_2_bw_3_4) {
613   for(size_t i = 3; i < 4; ++i){
614     TransposeMicrokernelTester()
615       .input_stride(i)
616       .output_stride(4)
617       .block_width(i)
618       .block_height(2)
619       .element_size(2)
620       .iterations(1)
621       .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
622   }
623 }
624 
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_4_bw_3_4)625 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_4_bw_3_4) {
626   for(size_t i = 3; i < 4; ++i){
627     TransposeMicrokernelTester()
628       .input_stride(i)
629       .output_stride(4)
630       .block_width(i)
631       .block_height(4)
632       .element_size(2)
633       .iterations(1)
634       .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
635   }
636 }
637 
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_4_bw_2)638 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_4_bw_2) {
639   TransposeMicrokernelTester()
640     .input_stride(2)
641     .output_stride(10)
642     .block_width(2)
643     .block_height(4)
644     .element_size(2)
645     .iterations(1)
646     .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
647 }
648 
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_3_4_bw_2)649 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_3_4_bw_2){
650   for(size_t i = 3; i < 4; ++i){
651     TransposeMicrokernelTester()
652       .input_stride(19)
653       .output_stride(i)
654       .block_width(5)
655       .block_height(i)
656       .element_size(2)
657       .iterations(1)
658       .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
659   }
660 }
661 
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_3_4_bw_4)662 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_3_4_bw_4){
663   for(size_t i = 3; i < 4; ++i){
664     TransposeMicrokernelTester()
665       .input_stride(4)
666       .output_stride(i)
667       .block_width(4)
668       .block_height(i)
669       .element_size(2)
670       .iterations(1)
671       .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
672   }
673 }
674 
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_3_4_bw_3_4)675 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_3_4_bw_3_4) {
676   for(size_t i = 3; i < 4; ++i){
677     for(size_t j = 3; j < 4; ++j){
678       TransposeMicrokernelTester()
679         .input_stride(j)
680         .output_stride(i)
681         .block_width(j)
682         .block_height(i)
683         .element_size(2)
684         .iterations(1)
685         .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
686     }
687   }
688 }
689 
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_2_bw_2_is_4)690 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_2_bw_2_is_4) {
691   TransposeMicrokernelTester()
692     .input_stride(4)
693     .output_stride(2)
694     .block_width(2)
695     .block_height(2)
696     .element_size(2)
697     .iterations(1)
698     .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
699 }
700 
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_2_bw_2_os_4)701 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_2_bw_2_os_4) {
702   TransposeMicrokernelTester()
703     .input_stride(2)
704     .output_stride(4)
705     .block_width(2)
706     .block_height(2)
707     .element_size(2)
708     .iterations(1)
709     .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
710 }
711 
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_2_bw_2_is_4_os_4)712 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_2_bw_2_is_4_os_4) {
713   TransposeMicrokernelTester()
714     .input_stride(4)
715     .output_stride(4)
716     .block_width(2)
717     .block_height(2)
718     .element_size(2)
719     .iterations(1)
720     .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
721 }
722 
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_34_bw_38_ies_13)723 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_34_bw_38_ies_13) {
724   TransposeMicrokernelTester()
725     .input_stride(38)
726     .output_stride(34)
727     .block_width(38)
728     .block_height(34)
729     .element_size(2)
730     .input_element_stride(13)
731     .iterations(1)
732     .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
733 }
734 
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_6_bw_10_oes_13)735 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_6_bw_10_oes_13) {
736   TransposeMicrokernelTester()
737     .input_stride(10)
738     .output_stride(6)
739     .block_width(10)
740     .block_height(6)
741     .element_size(2)
742     .output_element_stride(13)
743     .iterations(1)
744     .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
745 }
746 
TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2,bh_14_bw_46_ies_19_oes_15)747 TEST(X16_TRANSPOSEC__2X2_SCALAR_INT_2, bh_14_bw_46_ies_19_oes_15) {
748   TransposeMicrokernelTester()
749     .input_stride(51)
750     .output_stride(20)
751     .block_width(46)
752     .block_height(14)
753     .element_size(2)
754     .input_element_stride(19)
755     .output_element_stride(15)
756     .iterations(1)
757     .Test(xnn_x16_transposec_ukernel__2x2_scalar_int);
758 }
759 
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_2_bw_4)760 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_2_bw_4) {
761   TransposeMicrokernelTester()
762     .input_stride(8)
763     .output_stride(4)
764     .block_width(4)
765     .block_height(2)
766     .element_size(2)
767     .iterations(1)
768     .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
769 }
770 
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_1_4_bw_1_8)771 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_1_4_bw_1_8) {
772   for(size_t i = 1; i <= 4; ++i){
773     for(size_t j = 1; j <= 8; ++j){
774       TransposeMicrokernelTester()
775         .input_stride(j * 3)
776         .output_stride(i * 7)
777         .block_width(j)
778         .block_height(i)
779         .element_size(2)
780         .iterations(1)
781         .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
782     }
783   }
784 }
785 
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_2_bw_8)786 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_2_bw_8) {
787   TransposeMicrokernelTester()
788     .input_stride(8)
789     .output_stride(2)
790     .block_width(8)
791     .block_height(2)
792     .element_size(2)
793     .iterations(1)
794     .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
795 }
796 
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_2_bw_5_8)797 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_2_bw_5_8) {
798   for(size_t i = 5; i < 8; ++i){
799     TransposeMicrokernelTester()
800       .input_stride(i)
801       .output_stride(4)
802       .block_width(i)
803       .block_height(2)
804       .element_size(2)
805       .iterations(1)
806       .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
807   }
808 }
809 
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_4_bw_5_8)810 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_4_bw_5_8) {
811   for(size_t i = 5; i < 8; ++i){
812     TransposeMicrokernelTester()
813       .input_stride(i)
814       .output_stride(4)
815       .block_width(i)
816       .block_height(4)
817       .element_size(2)
818       .iterations(1)
819       .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
820   }
821 }
822 
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_4_bw_4)823 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_4_bw_4) {
824   TransposeMicrokernelTester()
825     .input_stride(4)
826     .output_stride(10)
827     .block_width(4)
828     .block_height(4)
829     .element_size(2)
830     .iterations(1)
831     .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
832 }
833 
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_3_4_bw_4)834 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_3_4_bw_4){
835   for(size_t i = 3; i < 4; ++i){
836     TransposeMicrokernelTester()
837       .input_stride(21)
838       .output_stride(i)
839       .block_width(7)
840       .block_height(i)
841       .element_size(2)
842       .iterations(1)
843       .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
844   }
845 }
846 
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_3_4_bw_8)847 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_3_4_bw_8){
848   for(size_t i = 3; i < 4; ++i){
849     TransposeMicrokernelTester()
850       .input_stride(8)
851       .output_stride(i)
852       .block_width(8)
853       .block_height(i)
854       .element_size(2)
855       .iterations(1)
856       .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
857   }
858 }
859 
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_3_4_bw_5_8)860 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_3_4_bw_5_8) {
861   for(size_t i = 3; i < 4; ++i){
862     for(size_t j = 5; j < 8; ++j){
863       TransposeMicrokernelTester()
864         .input_stride(j)
865         .output_stride(i)
866         .block_width(j)
867         .block_height(i)
868         .element_size(2)
869         .iterations(1)
870         .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
871     }
872   }
873 }
874 
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_2_bw_4_is_8)875 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_2_bw_4_is_8) {
876   TransposeMicrokernelTester()
877     .input_stride(8)
878     .output_stride(2)
879     .block_width(4)
880     .block_height(2)
881     .element_size(2)
882     .iterations(1)
883     .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
884 }
885 
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_2_bw_4_os_4)886 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_2_bw_4_os_4) {
887   TransposeMicrokernelTester()
888     .input_stride(4)
889     .output_stride(4)
890     .block_width(4)
891     .block_height(2)
892     .element_size(2)
893     .iterations(1)
894     .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
895 }
896 
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_2_bw_4_is_8_os_4)897 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_2_bw_4_is_8_os_4) {
898   TransposeMicrokernelTester()
899     .input_stride(8)
900     .output_stride(4)
901     .block_width(4)
902     .block_height(2)
903     .element_size(2)
904     .iterations(1)
905     .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
906 }
907 
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_34_bw_76_ies_13)908 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_34_bw_76_ies_13) {
909   TransposeMicrokernelTester()
910     .input_stride(76)
911     .output_stride(34)
912     .block_width(76)
913     .block_height(34)
914     .element_size(2)
915     .input_element_stride(13)
916     .iterations(1)
917     .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
918 }
919 
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_6_bw_20_oes_13)920 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_6_bw_20_oes_13) {
921   TransposeMicrokernelTester()
922     .input_stride(20)
923     .output_stride(6)
924     .block_width(20)
925     .block_height(6)
926     .element_size(2)
927     .output_element_stride(13)
928     .iterations(1)
929     .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
930 }
931 
TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2,bh_14_bw_92_ies_19_oes_15)932 TEST(X16_TRANSPOSEC__2X4_SCALAR_INT_2, bh_14_bw_92_ies_19_oes_15) {
933   TransposeMicrokernelTester()
934     .input_stride(97)
935     .output_stride(20)
936     .block_width(92)
937     .block_height(14)
938     .element_size(2)
939     .input_element_stride(19)
940     .output_element_stride(15)
941     .iterations(1)
942     .Test(xnn_x16_transposec_ukernel__2x4_scalar_int);
943 }
944 
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_4_bw_1)945 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_4_bw_1) {
946   TransposeMicrokernelTester()
947     .input_stride(2)
948     .output_stride(8)
949     .block_width(1)
950     .block_height(4)
951     .element_size(2)
952     .iterations(1)
953     .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
954 }
955 
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_1_8_bw_1_2)956 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_1_8_bw_1_2) {
957   for(size_t i = 1; i <= 8; ++i){
958     for(size_t j = 1; j <= 2; ++j){
959       TransposeMicrokernelTester()
960         .input_stride(j * 3)
961         .output_stride(i * 7)
962         .block_width(j)
963         .block_height(i)
964         .element_size(2)
965         .iterations(1)
966         .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
967     }
968   }
969 }
970 
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_4_bw_2)971 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_4_bw_2) {
972   TransposeMicrokernelTester()
973     .input_stride(2)
974     .output_stride(4)
975     .block_width(2)
976     .block_height(4)
977     .element_size(2)
978     .iterations(1)
979     .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
980 }
981 
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_4_bw_2_2)982 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_4_bw_2_2) {
983   for(size_t i = 2; i < 2; ++i){
984     TransposeMicrokernelTester()
985       .input_stride(i)
986       .output_stride(8)
987       .block_width(i)
988       .block_height(4)
989       .element_size(2)
990       .iterations(1)
991       .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
992   }
993 }
994 
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_8_bw_2_2)995 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_8_bw_2_2) {
996   for(size_t i = 2; i < 2; ++i){
997     TransposeMicrokernelTester()
998       .input_stride(i)
999       .output_stride(8)
1000       .block_width(i)
1001       .block_height(8)
1002       .element_size(2)
1003       .iterations(1)
1004       .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1005   }
1006 }
1007 
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_8_bw_1)1008 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_8_bw_1) {
1009   TransposeMicrokernelTester()
1010     .input_stride(1)
1011     .output_stride(16)
1012     .block_width(1)
1013     .block_height(8)
1014     .element_size(2)
1015     .iterations(1)
1016     .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1017 }
1018 
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_5_8_bw_1)1019 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_5_8_bw_1){
1020   for(size_t i = 5; i < 8; ++i){
1021     TransposeMicrokernelTester()
1022       .input_stride(18)
1023       .output_stride(i)
1024       .block_width(4)
1025       .block_height(i)
1026       .element_size(2)
1027       .iterations(1)
1028       .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1029   }
1030 }
1031 
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_5_8_bw_2)1032 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_5_8_bw_2){
1033   for(size_t i = 5; i < 8; ++i){
1034     TransposeMicrokernelTester()
1035       .input_stride(2)
1036       .output_stride(i)
1037       .block_width(2)
1038       .block_height(i)
1039       .element_size(2)
1040       .iterations(1)
1041       .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1042   }
1043 }
1044 
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_5_8_bw_2_2)1045 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_5_8_bw_2_2) {
1046   for(size_t i = 5; i < 8; ++i){
1047     for(size_t j = 2; j < 2; ++j){
1048       TransposeMicrokernelTester()
1049         .input_stride(j)
1050         .output_stride(i)
1051         .block_width(j)
1052         .block_height(i)
1053         .element_size(2)
1054         .iterations(1)
1055         .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1056     }
1057   }
1058 }
1059 
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_4_bw_1_is_2)1060 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_4_bw_1_is_2) {
1061   TransposeMicrokernelTester()
1062     .input_stride(2)
1063     .output_stride(4)
1064     .block_width(1)
1065     .block_height(4)
1066     .element_size(2)
1067     .iterations(1)
1068     .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1069 }
1070 
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_4_bw_1_os_8)1071 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_4_bw_1_os_8) {
1072   TransposeMicrokernelTester()
1073     .input_stride(1)
1074     .output_stride(8)
1075     .block_width(1)
1076     .block_height(4)
1077     .element_size(2)
1078     .iterations(1)
1079     .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1080 }
1081 
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_4_bw_1_is_2_os_8)1082 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_4_bw_1_is_2_os_8) {
1083   TransposeMicrokernelTester()
1084     .input_stride(2)
1085     .output_stride(8)
1086     .block_width(1)
1087     .block_height(4)
1088     .element_size(2)
1089     .iterations(1)
1090     .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1091 }
1092 
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_68_bw_19_ies_13)1093 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_68_bw_19_ies_13) {
1094   TransposeMicrokernelTester()
1095     .input_stride(19)
1096     .output_stride(68)
1097     .block_width(19)
1098     .block_height(68)
1099     .element_size(2)
1100     .input_element_stride(13)
1101     .iterations(1)
1102     .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1103 }
1104 
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_12_bw_5_oes_13)1105 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_12_bw_5_oes_13) {
1106   TransposeMicrokernelTester()
1107     .input_stride(5)
1108     .output_stride(12)
1109     .block_width(5)
1110     .block_height(12)
1111     .element_size(2)
1112     .output_element_stride(13)
1113     .iterations(1)
1114     .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1115 }
1116 
TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2,bh_28_bw_23_ies_19_oes_15)1117 TEST(X16_TRANSPOSEC__4X1_SCALAR_INT_2, bh_28_bw_23_ies_19_oes_15) {
1118   TransposeMicrokernelTester()
1119     .input_stride(28)
1120     .output_stride(34)
1121     .block_width(23)
1122     .block_height(28)
1123     .element_size(2)
1124     .input_element_stride(19)
1125     .output_element_stride(15)
1126     .iterations(1)
1127     .Test(xnn_x16_transposec_ukernel__4x1_scalar_int);
1128 }
1129 
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_4_bw_2)1130 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_4_bw_2) {
1131   TransposeMicrokernelTester()
1132     .input_stride(4)
1133     .output_stride(8)
1134     .block_width(2)
1135     .block_height(4)
1136     .element_size(2)
1137     .iterations(1)
1138     .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1139 }
1140 
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_1_8_bw_1_4)1141 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_1_8_bw_1_4) {
1142   for(size_t i = 1; i <= 8; ++i){
1143     for(size_t j = 1; j <= 4; ++j){
1144       TransposeMicrokernelTester()
1145         .input_stride(j * 3)
1146         .output_stride(i * 7)
1147         .block_width(j)
1148         .block_height(i)
1149         .element_size(2)
1150         .iterations(1)
1151         .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1152     }
1153   }
1154 }
1155 
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_4_bw_4)1156 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_4_bw_4) {
1157   TransposeMicrokernelTester()
1158     .input_stride(4)
1159     .output_stride(4)
1160     .block_width(4)
1161     .block_height(4)
1162     .element_size(2)
1163     .iterations(1)
1164     .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1165 }
1166 
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_4_bw_3_4)1167 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_4_bw_3_4) {
1168   for(size_t i = 3; i < 4; ++i){
1169     TransposeMicrokernelTester()
1170       .input_stride(i)
1171       .output_stride(8)
1172       .block_width(i)
1173       .block_height(4)
1174       .element_size(2)
1175       .iterations(1)
1176       .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1177   }
1178 }
1179 
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_8_bw_3_4)1180 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_8_bw_3_4) {
1181   for(size_t i = 3; i < 4; ++i){
1182     TransposeMicrokernelTester()
1183       .input_stride(i)
1184       .output_stride(8)
1185       .block_width(i)
1186       .block_height(8)
1187       .element_size(2)
1188       .iterations(1)
1189       .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1190   }
1191 }
1192 
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_8_bw_2)1193 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_8_bw_2) {
1194   TransposeMicrokernelTester()
1195     .input_stride(2)
1196     .output_stride(16)
1197     .block_width(2)
1198     .block_height(8)
1199     .element_size(2)
1200     .iterations(1)
1201     .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1202 }
1203 
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_5_8_bw_2)1204 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_5_8_bw_2){
1205   for(size_t i = 5; i < 8; ++i){
1206     TransposeMicrokernelTester()
1207       .input_stride(19)
1208       .output_stride(i)
1209       .block_width(5)
1210       .block_height(i)
1211       .element_size(2)
1212       .iterations(1)
1213       .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1214   }
1215 }
1216 
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_5_8_bw_4)1217 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_5_8_bw_4){
1218   for(size_t i = 5; i < 8; ++i){
1219     TransposeMicrokernelTester()
1220       .input_stride(4)
1221       .output_stride(i)
1222       .block_width(4)
1223       .block_height(i)
1224       .element_size(2)
1225       .iterations(1)
1226       .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1227   }
1228 }
1229 
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_5_8_bw_3_4)1230 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_5_8_bw_3_4) {
1231   for(size_t i = 5; i < 8; ++i){
1232     for(size_t j = 3; j < 4; ++j){
1233       TransposeMicrokernelTester()
1234         .input_stride(j)
1235         .output_stride(i)
1236         .block_width(j)
1237         .block_height(i)
1238         .element_size(2)
1239         .iterations(1)
1240         .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1241     }
1242   }
1243 }
1244 
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_4_bw_2_is_4)1245 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_4_bw_2_is_4) {
1246   TransposeMicrokernelTester()
1247     .input_stride(4)
1248     .output_stride(4)
1249     .block_width(2)
1250     .block_height(4)
1251     .element_size(2)
1252     .iterations(1)
1253     .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1254 }
1255 
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_4_bw_2_os_8)1256 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_4_bw_2_os_8) {
1257   TransposeMicrokernelTester()
1258     .input_stride(2)
1259     .output_stride(8)
1260     .block_width(2)
1261     .block_height(4)
1262     .element_size(2)
1263     .iterations(1)
1264     .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1265 }
1266 
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_4_bw_2_is_4_os_8)1267 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_4_bw_2_is_4_os_8) {
1268   TransposeMicrokernelTester()
1269     .input_stride(4)
1270     .output_stride(8)
1271     .block_width(2)
1272     .block_height(4)
1273     .element_size(2)
1274     .iterations(1)
1275     .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1276 }
1277 
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_68_bw_38_ies_13)1278 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_68_bw_38_ies_13) {
1279   TransposeMicrokernelTester()
1280     .input_stride(38)
1281     .output_stride(68)
1282     .block_width(38)
1283     .block_height(68)
1284     .element_size(2)
1285     .input_element_stride(13)
1286     .iterations(1)
1287     .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1288 }
1289 
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_12_bw_10_oes_13)1290 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_12_bw_10_oes_13) {
1291   TransposeMicrokernelTester()
1292     .input_stride(10)
1293     .output_stride(12)
1294     .block_width(10)
1295     .block_height(12)
1296     .element_size(2)
1297     .output_element_stride(13)
1298     .iterations(1)
1299     .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1300 }
1301 
TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2,bh_28_bw_46_ies_19_oes_15)1302 TEST(X16_TRANSPOSEC__4X2_SCALAR_INT_2, bh_28_bw_46_ies_19_oes_15) {
1303   TransposeMicrokernelTester()
1304     .input_stride(51)
1305     .output_stride(34)
1306     .block_width(46)
1307     .block_height(28)
1308     .element_size(2)
1309     .input_element_stride(19)
1310     .output_element_stride(15)
1311     .iterations(1)
1312     .Test(xnn_x16_transposec_ukernel__4x2_scalar_int);
1313 }
1314 
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_4_bw_4)1315 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4) {
1316   TransposeMicrokernelTester()
1317     .input_stride(8)
1318     .output_stride(8)
1319     .block_width(4)
1320     .block_height(4)
1321     .element_size(2)
1322     .iterations(1)
1323     .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1324 }
1325 
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_1_8_bw_1_8)1326 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_1_8_bw_1_8) {
1327   for(size_t i = 1; i <= 8; ++i){
1328     for(size_t j = 1; j <= 8; ++j){
1329       TransposeMicrokernelTester()
1330         .input_stride(j * 3)
1331         .output_stride(i * 7)
1332         .block_width(j)
1333         .block_height(i)
1334         .element_size(2)
1335         .iterations(1)
1336         .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1337     }
1338   }
1339 }
1340 
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_4_bw_8)1341 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_8) {
1342   TransposeMicrokernelTester()
1343     .input_stride(8)
1344     .output_stride(4)
1345     .block_width(8)
1346     .block_height(4)
1347     .element_size(2)
1348     .iterations(1)
1349     .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1350 }
1351 
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_4_bw_5_8)1352 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_5_8) {
1353   for(size_t i = 5; i < 8; ++i){
1354     TransposeMicrokernelTester()
1355       .input_stride(i)
1356       .output_stride(8)
1357       .block_width(i)
1358       .block_height(4)
1359       .element_size(2)
1360       .iterations(1)
1361       .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1362   }
1363 }
1364 
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_8_bw_5_8)1365 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_8_bw_5_8) {
1366   for(size_t i = 5; i < 8; ++i){
1367     TransposeMicrokernelTester()
1368       .input_stride(i)
1369       .output_stride(8)
1370       .block_width(i)
1371       .block_height(8)
1372       .element_size(2)
1373       .iterations(1)
1374       .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1375   }
1376 }
1377 
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_8_bw_4)1378 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_8_bw_4) {
1379   TransposeMicrokernelTester()
1380     .input_stride(4)
1381     .output_stride(16)
1382     .block_width(4)
1383     .block_height(8)
1384     .element_size(2)
1385     .iterations(1)
1386     .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1387 }
1388 
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_5_8_bw_4)1389 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_5_8_bw_4){
1390   for(size_t i = 5; i < 8; ++i){
1391     TransposeMicrokernelTester()
1392       .input_stride(21)
1393       .output_stride(i)
1394       .block_width(7)
1395       .block_height(i)
1396       .element_size(2)
1397       .iterations(1)
1398       .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1399   }
1400 }
1401 
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_5_8_bw_8)1402 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_5_8_bw_8){
1403   for(size_t i = 5; i < 8; ++i){
1404     TransposeMicrokernelTester()
1405       .input_stride(8)
1406       .output_stride(i)
1407       .block_width(8)
1408       .block_height(i)
1409       .element_size(2)
1410       .iterations(1)
1411       .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1412   }
1413 }
1414 
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_5_8_bw_5_8)1415 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_5_8_bw_5_8) {
1416   for(size_t i = 5; i < 8; ++i){
1417     for(size_t j = 5; j < 8; ++j){
1418       TransposeMicrokernelTester()
1419         .input_stride(j)
1420         .output_stride(i)
1421         .block_width(j)
1422         .block_height(i)
1423         .element_size(2)
1424         .iterations(1)
1425         .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1426     }
1427   }
1428 }
1429 
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_4_bw_4_is_8)1430 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8) {
1431   TransposeMicrokernelTester()
1432     .input_stride(8)
1433     .output_stride(4)
1434     .block_width(4)
1435     .block_height(4)
1436     .element_size(2)
1437     .iterations(1)
1438     .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1439 }
1440 
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_4_bw_4_os_8)1441 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_os_8) {
1442   TransposeMicrokernelTester()
1443     .input_stride(4)
1444     .output_stride(8)
1445     .block_width(4)
1446     .block_height(4)
1447     .element_size(2)
1448     .iterations(1)
1449     .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1450 }
1451 
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_4_bw_4_is_8_os_8)1452 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_4_bw_4_is_8_os_8) {
1453   TransposeMicrokernelTester()
1454     .input_stride(8)
1455     .output_stride(8)
1456     .block_width(4)
1457     .block_height(4)
1458     .element_size(2)
1459     .iterations(1)
1460     .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1461 }
1462 
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_68_bw_76_ies_13)1463 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_68_bw_76_ies_13) {
1464   TransposeMicrokernelTester()
1465     .input_stride(76)
1466     .output_stride(68)
1467     .block_width(76)
1468     .block_height(68)
1469     .element_size(2)
1470     .input_element_stride(13)
1471     .iterations(1)
1472     .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1473 }
1474 
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_12_bw_20_oes_13)1475 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_12_bw_20_oes_13) {
1476   TransposeMicrokernelTester()
1477     .input_stride(20)
1478     .output_stride(12)
1479     .block_width(20)
1480     .block_height(12)
1481     .element_size(2)
1482     .output_element_stride(13)
1483     .iterations(1)
1484     .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1485 }
1486 
TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2,bh_28_bw_92_ies_19_oes_15)1487 TEST(X16_TRANSPOSEC__4X4_SCALAR_INT_2, bh_28_bw_92_ies_19_oes_15) {
1488   TransposeMicrokernelTester()
1489     .input_stride(97)
1490     .output_stride(34)
1491     .block_width(92)
1492     .block_height(28)
1493     .element_size(2)
1494     .input_element_stride(19)
1495     .output_element_stride(15)
1496     .iterations(1)
1497     .Test(xnn_x16_transposec_ukernel__4x4_scalar_int);
1498 }
1499 
1500 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_4_bw_8)1501   TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_4_bw_8) {
1502     TEST_REQUIRES_X86_SSE2;
1503     TransposeMicrokernelTester()
1504       .input_stride(16)
1505       .output_stride(8)
1506       .block_width(8)
1507       .block_height(4)
1508       .element_size(2)
1509       .iterations(1)
1510       .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1511   }
1512 
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_1_8_bw_1_16)1513   TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_1_8_bw_1_16) {
1514     TEST_REQUIRES_X86_SSE2;
1515     for(size_t i = 1; i <= 8; ++i){
1516       for(size_t j = 1; j <= 16; ++j){
1517         TransposeMicrokernelTester()
1518           .input_stride(j * 3)
1519           .output_stride(i * 7)
1520           .block_width(j)
1521           .block_height(i)
1522           .element_size(2)
1523           .iterations(1)
1524           .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1525       }
1526     }
1527   }
1528 
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_4_bw_16)1529   TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_4_bw_16) {
1530     TEST_REQUIRES_X86_SSE2;
1531     TransposeMicrokernelTester()
1532       .input_stride(16)
1533       .output_stride(4)
1534       .block_width(16)
1535       .block_height(4)
1536       .element_size(2)
1537       .iterations(1)
1538       .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1539   }
1540 
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_4_bw_9_16)1541   TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_4_bw_9_16) {
1542     TEST_REQUIRES_X86_SSE2;
1543     for(size_t i = 9; i < 16; ++i){
1544       TransposeMicrokernelTester()
1545         .input_stride(i)
1546         .output_stride(8)
1547         .block_width(i)
1548         .block_height(4)
1549         .element_size(2)
1550         .iterations(1)
1551         .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1552     }
1553   }
1554 
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_8_bw_9_16)1555   TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_8_bw_9_16) {
1556     TEST_REQUIRES_X86_SSE2;
1557     for(size_t i = 9; i < 16; ++i){
1558       TransposeMicrokernelTester()
1559         .input_stride(i)
1560         .output_stride(8)
1561         .block_width(i)
1562         .block_height(8)
1563         .element_size(2)
1564         .iterations(1)
1565         .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1566     }
1567   }
1568 
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_8_bw_8)1569   TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_8_bw_8) {
1570     TEST_REQUIRES_X86_SSE2;
1571     TransposeMicrokernelTester()
1572       .input_stride(8)
1573       .output_stride(16)
1574       .block_width(8)
1575       .block_height(8)
1576       .element_size(2)
1577       .iterations(1)
1578       .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1579   }
1580 
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_5_8_bw_8)1581   TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_5_8_bw_8){
1582     TEST_REQUIRES_X86_SSE2;
1583     for(size_t i = 5; i < 8; ++i){
1584       TransposeMicrokernelTester()
1585         .input_stride(25)
1586         .output_stride(i)
1587         .block_width(11)
1588         .block_height(i)
1589         .element_size(2)
1590         .iterations(1)
1591         .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1592     }
1593   }
1594 
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_5_8_bw_16)1595   TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_5_8_bw_16){
1596     TEST_REQUIRES_X86_SSE2;
1597     for(size_t i = 5; i < 8; ++i){
1598       TransposeMicrokernelTester()
1599         .input_stride(16)
1600         .output_stride(i)
1601         .block_width(16)
1602         .block_height(i)
1603         .element_size(2)
1604         .iterations(1)
1605         .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1606     }
1607   }
1608 
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_5_8_bw_9_16)1609   TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_5_8_bw_9_16) {
1610     TEST_REQUIRES_X86_SSE2;
1611     for(size_t i = 5; i < 8; ++i){
1612       for(size_t j = 9; j < 16; ++j){
1613         TransposeMicrokernelTester()
1614           .input_stride(j)
1615           .output_stride(i)
1616           .block_width(j)
1617           .block_height(i)
1618           .element_size(2)
1619           .iterations(1)
1620           .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1621       }
1622     }
1623   }
1624 
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_4_bw_8_is_16)1625   TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_4_bw_8_is_16) {
1626     TEST_REQUIRES_X86_SSE2;
1627     TransposeMicrokernelTester()
1628       .input_stride(16)
1629       .output_stride(4)
1630       .block_width(8)
1631       .block_height(4)
1632       .element_size(2)
1633       .iterations(1)
1634       .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1635   }
1636 
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_4_bw_8_os_8)1637   TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_4_bw_8_os_8) {
1638     TEST_REQUIRES_X86_SSE2;
1639     TransposeMicrokernelTester()
1640       .input_stride(8)
1641       .output_stride(8)
1642       .block_width(8)
1643       .block_height(4)
1644       .element_size(2)
1645       .iterations(1)
1646       .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1647   }
1648 
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_4_bw_8_is_16_os_8)1649   TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_4_bw_8_is_16_os_8) {
1650     TEST_REQUIRES_X86_SSE2;
1651     TransposeMicrokernelTester()
1652       .input_stride(16)
1653       .output_stride(8)
1654       .block_width(8)
1655       .block_height(4)
1656       .element_size(2)
1657       .iterations(1)
1658       .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1659   }
1660 
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_68_bw_152_ies_13)1661   TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_68_bw_152_ies_13) {
1662     TEST_REQUIRES_X86_SSE2;
1663     TransposeMicrokernelTester()
1664       .input_stride(152)
1665       .output_stride(68)
1666       .block_width(152)
1667       .block_height(68)
1668       .element_size(2)
1669       .input_element_stride(13)
1670       .iterations(1)
1671       .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1672   }
1673 
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_12_bw_40_oes_13)1674   TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_12_bw_40_oes_13) {
1675     TEST_REQUIRES_X86_SSE2;
1676     TransposeMicrokernelTester()
1677       .input_stride(40)
1678       .output_stride(12)
1679       .block_width(40)
1680       .block_height(12)
1681       .element_size(2)
1682       .output_element_stride(13)
1683       .iterations(1)
1684       .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1685   }
1686 
TEST(X16_TRANSPOSEC__4X8_SSE2_2,bh_28_bw_184_ies_19_oes_15)1687   TEST(X16_TRANSPOSEC__4X8_SSE2_2, bh_28_bw_184_ies_19_oes_15) {
1688     TEST_REQUIRES_X86_SSE2;
1689     TransposeMicrokernelTester()
1690       .input_stride(189)
1691       .output_stride(34)
1692       .block_width(184)
1693       .block_height(28)
1694       .element_size(2)
1695       .input_element_stride(19)
1696       .output_element_stride(15)
1697       .iterations(1)
1698       .Test(xnn_x16_transposec_ukernel__4x8_sse2);
1699   }
1700 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1701 
1702 
1703 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_8_bw_8)1704   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_8_bw_8) {
1705     TEST_REQUIRES_X86_SSE2;
1706     TransposeMicrokernelTester()
1707       .input_stride(16)
1708       .output_stride(16)
1709       .block_width(8)
1710       .block_height(8)
1711       .element_size(2)
1712       .iterations(1)
1713       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1714   }
1715 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_1_16_bw_1_16)1716   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_1_16_bw_1_16) {
1717     TEST_REQUIRES_X86_SSE2;
1718     for(size_t i = 1; i <= 16; ++i){
1719       for(size_t j = 1; j <= 16; ++j){
1720         TransposeMicrokernelTester()
1721           .input_stride(j * 3)
1722           .output_stride(i * 7)
1723           .block_width(j)
1724           .block_height(i)
1725           .element_size(2)
1726           .iterations(1)
1727           .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1728       }
1729     }
1730   }
1731 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_8_bw_16)1732   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_8_bw_16) {
1733     TEST_REQUIRES_X86_SSE2;
1734     TransposeMicrokernelTester()
1735       .input_stride(16)
1736       .output_stride(8)
1737       .block_width(16)
1738       .block_height(8)
1739       .element_size(2)
1740       .iterations(1)
1741       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1742   }
1743 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_8_bw_9_16)1744   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_8_bw_9_16) {
1745     TEST_REQUIRES_X86_SSE2;
1746     for(size_t i = 9; i < 16; ++i){
1747       TransposeMicrokernelTester()
1748         .input_stride(i)
1749         .output_stride(16)
1750         .block_width(i)
1751         .block_height(8)
1752         .element_size(2)
1753         .iterations(1)
1754         .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1755     }
1756   }
1757 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_16_bw_9_16)1758   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_16_bw_9_16) {
1759     TEST_REQUIRES_X86_SSE2;
1760     for(size_t i = 9; i < 16; ++i){
1761       TransposeMicrokernelTester()
1762         .input_stride(i)
1763         .output_stride(16)
1764         .block_width(i)
1765         .block_height(16)
1766         .element_size(2)
1767         .iterations(1)
1768         .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1769     }
1770   }
1771 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_16_bw_8)1772   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_16_bw_8) {
1773     TEST_REQUIRES_X86_SSE2;
1774     TransposeMicrokernelTester()
1775       .input_stride(8)
1776       .output_stride(28)
1777       .block_width(8)
1778       .block_height(16)
1779       .element_size(2)
1780       .iterations(1)
1781       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1782   }
1783 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_9_16_bw_8)1784   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_9_16_bw_8){
1785     TEST_REQUIRES_X86_SSE2;
1786     for(size_t i = 9; i < 16; ++i){
1787       TransposeMicrokernelTester()
1788         .input_stride(25)
1789         .output_stride(i)
1790         .block_width(11)
1791         .block_height(i)
1792         .element_size(2)
1793         .iterations(1)
1794         .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1795     }
1796   }
1797 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_9_16_bw_16)1798   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_9_16_bw_16){
1799     TEST_REQUIRES_X86_SSE2;
1800     for(size_t i = 9; i < 16; ++i){
1801       TransposeMicrokernelTester()
1802         .input_stride(16)
1803         .output_stride(i)
1804         .block_width(16)
1805         .block_height(i)
1806         .element_size(2)
1807         .iterations(1)
1808         .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1809     }
1810   }
1811 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_9_16_bw_9_16)1812   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_9_16_bw_9_16) {
1813     TEST_REQUIRES_X86_SSE2;
1814     for(size_t i = 9; i < 16; ++i){
1815       for(size_t j = 9; j < 16; ++j){
1816         TransposeMicrokernelTester()
1817           .input_stride(j)
1818           .output_stride(i)
1819           .block_width(j)
1820           .block_height(i)
1821           .element_size(2)
1822           .iterations(1)
1823           .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1824       }
1825     }
1826   }
1827 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_8_bw_8_is_16)1828   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_8_bw_8_is_16) {
1829     TEST_REQUIRES_X86_SSE2;
1830     TransposeMicrokernelTester()
1831       .input_stride(16)
1832       .output_stride(8)
1833       .block_width(8)
1834       .block_height(8)
1835       .element_size(2)
1836       .iterations(1)
1837       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1838   }
1839 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_8_bw_8_os_16)1840   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_8_bw_8_os_16) {
1841     TEST_REQUIRES_X86_SSE2;
1842     TransposeMicrokernelTester()
1843       .input_stride(8)
1844       .output_stride(16)
1845       .block_width(8)
1846       .block_height(8)
1847       .element_size(2)
1848       .iterations(1)
1849       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1850   }
1851 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_8_bw_8_is_16_os_16)1852   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_8_bw_8_is_16_os_16) {
1853     TEST_REQUIRES_X86_SSE2;
1854     TransposeMicrokernelTester()
1855       .input_stride(16)
1856       .output_stride(16)
1857       .block_width(8)
1858       .block_height(8)
1859       .element_size(2)
1860       .iterations(1)
1861       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1862   }
1863 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_136_bw_152_ies_13)1864   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_136_bw_152_ies_13) {
1865     TEST_REQUIRES_X86_SSE2;
1866     TransposeMicrokernelTester()
1867       .input_stride(152)
1868       .output_stride(136)
1869       .block_width(152)
1870       .block_height(136)
1871       .element_size(2)
1872       .input_element_stride(13)
1873       .iterations(1)
1874       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1875   }
1876 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_24_bw_40_oes_13)1877   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_24_bw_40_oes_13) {
1878     TEST_REQUIRES_X86_SSE2;
1879     TransposeMicrokernelTester()
1880       .input_stride(40)
1881       .output_stride(24)
1882       .block_width(40)
1883       .block_height(24)
1884       .element_size(2)
1885       .output_element_stride(13)
1886       .iterations(1)
1887       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1888   }
1889 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2,bh_56_bw_184_ies_19_oes_15)1890   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_SSE2_2, bh_56_bw_184_ies_19_oes_15) {
1891     TEST_REQUIRES_X86_SSE2;
1892     TransposeMicrokernelTester()
1893       .input_stride(189)
1894       .output_stride(62)
1895       .block_width(184)
1896       .block_height(56)
1897       .element_size(2)
1898       .input_element_stride(19)
1899       .output_element_stride(15)
1900       .iterations(1)
1901       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2);
1902   }
1903 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1904 
1905 
1906 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_8_bw_8)1907   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_8_bw_8) {
1908     TEST_REQUIRES_X86_SSE2;
1909     TransposeMicrokernelTester()
1910       .input_stride(16)
1911       .output_stride(16)
1912       .block_width(8)
1913       .block_height(8)
1914       .element_size(2)
1915       .iterations(1)
1916       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
1917   }
1918 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_1_16_bw_1_16)1919   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_1_16_bw_1_16) {
1920     TEST_REQUIRES_X86_SSE2;
1921     for(size_t i = 1; i <= 16; ++i){
1922       for(size_t j = 1; j <= 16; ++j){
1923         TransposeMicrokernelTester()
1924           .input_stride(j * 3)
1925           .output_stride(i * 7)
1926           .block_width(j)
1927           .block_height(i)
1928           .element_size(2)
1929           .iterations(1)
1930           .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
1931       }
1932     }
1933   }
1934 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_8_bw_16)1935   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_8_bw_16) {
1936     TEST_REQUIRES_X86_SSE2;
1937     TransposeMicrokernelTester()
1938       .input_stride(16)
1939       .output_stride(8)
1940       .block_width(16)
1941       .block_height(8)
1942       .element_size(2)
1943       .iterations(1)
1944       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
1945   }
1946 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_8_bw_9_16)1947   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_8_bw_9_16) {
1948     TEST_REQUIRES_X86_SSE2;
1949     for(size_t i = 9; i < 16; ++i){
1950       TransposeMicrokernelTester()
1951         .input_stride(i)
1952         .output_stride(16)
1953         .block_width(i)
1954         .block_height(8)
1955         .element_size(2)
1956         .iterations(1)
1957         .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
1958     }
1959   }
1960 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_16_bw_9_16)1961   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_16_bw_9_16) {
1962     TEST_REQUIRES_X86_SSE2;
1963     for(size_t i = 9; i < 16; ++i){
1964       TransposeMicrokernelTester()
1965         .input_stride(i)
1966         .output_stride(16)
1967         .block_width(i)
1968         .block_height(16)
1969         .element_size(2)
1970         .iterations(1)
1971         .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
1972     }
1973   }
1974 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_16_bw_8)1975   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_16_bw_8) {
1976     TEST_REQUIRES_X86_SSE2;
1977     TransposeMicrokernelTester()
1978       .input_stride(8)
1979       .output_stride(28)
1980       .block_width(8)
1981       .block_height(16)
1982       .element_size(2)
1983       .iterations(1)
1984       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
1985   }
1986 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_9_16_bw_8)1987   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_9_16_bw_8){
1988     TEST_REQUIRES_X86_SSE2;
1989     for(size_t i = 9; i < 16; ++i){
1990       TransposeMicrokernelTester()
1991         .input_stride(25)
1992         .output_stride(i)
1993         .block_width(11)
1994         .block_height(i)
1995         .element_size(2)
1996         .iterations(1)
1997         .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
1998     }
1999   }
2000 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_9_16_bw_16)2001   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_9_16_bw_16){
2002     TEST_REQUIRES_X86_SSE2;
2003     for(size_t i = 9; i < 16; ++i){
2004       TransposeMicrokernelTester()
2005         .input_stride(16)
2006         .output_stride(i)
2007         .block_width(16)
2008         .block_height(i)
2009         .element_size(2)
2010         .iterations(1)
2011         .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
2012     }
2013   }
2014 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_9_16_bw_9_16)2015   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_9_16_bw_9_16) {
2016     TEST_REQUIRES_X86_SSE2;
2017     for(size_t i = 9; i < 16; ++i){
2018       for(size_t j = 9; j < 16; ++j){
2019         TransposeMicrokernelTester()
2020           .input_stride(j)
2021           .output_stride(i)
2022           .block_width(j)
2023           .block_height(i)
2024           .element_size(2)
2025           .iterations(1)
2026           .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
2027       }
2028     }
2029   }
2030 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_8_bw_8_is_16)2031   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_8_bw_8_is_16) {
2032     TEST_REQUIRES_X86_SSE2;
2033     TransposeMicrokernelTester()
2034       .input_stride(16)
2035       .output_stride(8)
2036       .block_width(8)
2037       .block_height(8)
2038       .element_size(2)
2039       .iterations(1)
2040       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
2041   }
2042 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_8_bw_8_os_16)2043   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_8_bw_8_os_16) {
2044     TEST_REQUIRES_X86_SSE2;
2045     TransposeMicrokernelTester()
2046       .input_stride(8)
2047       .output_stride(16)
2048       .block_width(8)
2049       .block_height(8)
2050       .element_size(2)
2051       .iterations(1)
2052       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
2053   }
2054 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_8_bw_8_is_16_os_16)2055   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_8_bw_8_is_16_os_16) {
2056     TEST_REQUIRES_X86_SSE2;
2057     TransposeMicrokernelTester()
2058       .input_stride(16)
2059       .output_stride(16)
2060       .block_width(8)
2061       .block_height(8)
2062       .element_size(2)
2063       .iterations(1)
2064       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
2065   }
2066 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_136_bw_152_ies_13)2067   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_136_bw_152_ies_13) {
2068     TEST_REQUIRES_X86_SSE2;
2069     TransposeMicrokernelTester()
2070       .input_stride(152)
2071       .output_stride(136)
2072       .block_width(152)
2073       .block_height(136)
2074       .element_size(2)
2075       .input_element_stride(13)
2076       .iterations(1)
2077       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
2078   }
2079 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_24_bw_40_oes_13)2080   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_24_bw_40_oes_13) {
2081     TEST_REQUIRES_X86_SSE2;
2082     TransposeMicrokernelTester()
2083       .input_stride(40)
2084       .output_stride(24)
2085       .block_width(40)
2086       .block_height(24)
2087       .element_size(2)
2088       .output_element_stride(13)
2089       .iterations(1)
2090       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
2091   }
2092 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2,bh_56_bw_184_ies_19_oes_15)2093   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_SSE2_2, bh_56_bw_184_ies_19_oes_15) {
2094     TEST_REQUIRES_X86_SSE2;
2095     TransposeMicrokernelTester()
2096       .input_stride(189)
2097       .output_stride(62)
2098       .block_width(184)
2099       .block_height(56)
2100       .element_size(2)
2101       .input_element_stride(19)
2102       .output_element_stride(15)
2103       .iterations(1)
2104       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2);
2105   }
2106 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2107 
2108 
2109 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_8_bw_8)2110   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_8_bw_8) {
2111     TEST_REQUIRES_X86_SSE2;
2112     TransposeMicrokernelTester()
2113       .input_stride(16)
2114       .output_stride(16)
2115       .block_width(8)
2116       .block_height(8)
2117       .element_size(2)
2118       .iterations(1)
2119       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2120   }
2121 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_1_16_bw_1_16)2122   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_1_16_bw_1_16) {
2123     TEST_REQUIRES_X86_SSE2;
2124     for(size_t i = 1; i <= 16; ++i){
2125       for(size_t j = 1; j <= 16; ++j){
2126         TransposeMicrokernelTester()
2127           .input_stride(j * 3)
2128           .output_stride(i * 7)
2129           .block_width(j)
2130           .block_height(i)
2131           .element_size(2)
2132           .iterations(1)
2133           .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2134       }
2135     }
2136   }
2137 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_8_bw_16)2138   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_8_bw_16) {
2139     TEST_REQUIRES_X86_SSE2;
2140     TransposeMicrokernelTester()
2141       .input_stride(16)
2142       .output_stride(8)
2143       .block_width(16)
2144       .block_height(8)
2145       .element_size(2)
2146       .iterations(1)
2147       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2148   }
2149 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_8_bw_9_16)2150   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_8_bw_9_16) {
2151     TEST_REQUIRES_X86_SSE2;
2152     for(size_t i = 9; i < 16; ++i){
2153       TransposeMicrokernelTester()
2154         .input_stride(i)
2155         .output_stride(16)
2156         .block_width(i)
2157         .block_height(8)
2158         .element_size(2)
2159         .iterations(1)
2160         .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2161     }
2162   }
2163 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_16_bw_9_16)2164   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_16_bw_9_16) {
2165     TEST_REQUIRES_X86_SSE2;
2166     for(size_t i = 9; i < 16; ++i){
2167       TransposeMicrokernelTester()
2168         .input_stride(i)
2169         .output_stride(16)
2170         .block_width(i)
2171         .block_height(16)
2172         .element_size(2)
2173         .iterations(1)
2174         .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2175     }
2176   }
2177 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_16_bw_8)2178   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_16_bw_8) {
2179     TEST_REQUIRES_X86_SSE2;
2180     TransposeMicrokernelTester()
2181       .input_stride(8)
2182       .output_stride(28)
2183       .block_width(8)
2184       .block_height(16)
2185       .element_size(2)
2186       .iterations(1)
2187       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2188   }
2189 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_9_16_bw_8)2190   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_9_16_bw_8){
2191     TEST_REQUIRES_X86_SSE2;
2192     for(size_t i = 9; i < 16; ++i){
2193       TransposeMicrokernelTester()
2194         .input_stride(25)
2195         .output_stride(i)
2196         .block_width(11)
2197         .block_height(i)
2198         .element_size(2)
2199         .iterations(1)
2200         .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2201     }
2202   }
2203 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_9_16_bw_16)2204   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_9_16_bw_16){
2205     TEST_REQUIRES_X86_SSE2;
2206     for(size_t i = 9; i < 16; ++i){
2207       TransposeMicrokernelTester()
2208         .input_stride(16)
2209         .output_stride(i)
2210         .block_width(16)
2211         .block_height(i)
2212         .element_size(2)
2213         .iterations(1)
2214         .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2215     }
2216   }
2217 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_9_16_bw_9_16)2218   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_9_16_bw_9_16) {
2219     TEST_REQUIRES_X86_SSE2;
2220     for(size_t i = 9; i < 16; ++i){
2221       for(size_t j = 9; j < 16; ++j){
2222         TransposeMicrokernelTester()
2223           .input_stride(j)
2224           .output_stride(i)
2225           .block_width(j)
2226           .block_height(i)
2227           .element_size(2)
2228           .iterations(1)
2229           .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2230       }
2231     }
2232   }
2233 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_8_bw_8_is_16)2234   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_8_bw_8_is_16) {
2235     TEST_REQUIRES_X86_SSE2;
2236     TransposeMicrokernelTester()
2237       .input_stride(16)
2238       .output_stride(8)
2239       .block_width(8)
2240       .block_height(8)
2241       .element_size(2)
2242       .iterations(1)
2243       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2244   }
2245 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_8_bw_8_os_16)2246   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_8_bw_8_os_16) {
2247     TEST_REQUIRES_X86_SSE2;
2248     TransposeMicrokernelTester()
2249       .input_stride(8)
2250       .output_stride(16)
2251       .block_width(8)
2252       .block_height(8)
2253       .element_size(2)
2254       .iterations(1)
2255       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2256   }
2257 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_8_bw_8_is_16_os_16)2258   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_8_bw_8_is_16_os_16) {
2259     TEST_REQUIRES_X86_SSE2;
2260     TransposeMicrokernelTester()
2261       .input_stride(16)
2262       .output_stride(16)
2263       .block_width(8)
2264       .block_height(8)
2265       .element_size(2)
2266       .iterations(1)
2267       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2268   }
2269 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_136_bw_152_ies_13)2270   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_136_bw_152_ies_13) {
2271     TEST_REQUIRES_X86_SSE2;
2272     TransposeMicrokernelTester()
2273       .input_stride(152)
2274       .output_stride(136)
2275       .block_width(152)
2276       .block_height(136)
2277       .element_size(2)
2278       .input_element_stride(13)
2279       .iterations(1)
2280       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2281   }
2282 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_24_bw_40_oes_13)2283   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_24_bw_40_oes_13) {
2284     TEST_REQUIRES_X86_SSE2;
2285     TransposeMicrokernelTester()
2286       .input_stride(40)
2287       .output_stride(24)
2288       .block_width(40)
2289       .block_height(24)
2290       .element_size(2)
2291       .output_element_stride(13)
2292       .iterations(1)
2293       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2294   }
2295 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2,bh_56_bw_184_ies_19_oes_15)2296   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_SSE2_2, bh_56_bw_184_ies_19_oes_15) {
2297     TEST_REQUIRES_X86_SSE2;
2298     TransposeMicrokernelTester()
2299       .input_stride(189)
2300       .output_stride(62)
2301       .block_width(184)
2302       .block_height(56)
2303       .element_size(2)
2304       .input_element_stride(19)
2305       .output_element_stride(15)
2306       .iterations(1)
2307       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2);
2308   }
2309 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2310 
2311 
2312 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_8_bw_8)2313   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_8_bw_8) {
2314     TEST_REQUIRES_X86_SSE2;
2315     TransposeMicrokernelTester()
2316       .input_stride(16)
2317       .output_stride(16)
2318       .block_width(8)
2319       .block_height(8)
2320       .element_size(2)
2321       .iterations(1)
2322       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2323   }
2324 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_1_16_bw_1_16)2325   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_1_16_bw_1_16) {
2326     TEST_REQUIRES_X86_SSE2;
2327     for(size_t i = 1; i <= 16; ++i){
2328       for(size_t j = 1; j <= 16; ++j){
2329         TransposeMicrokernelTester()
2330           .input_stride(j * 3)
2331           .output_stride(i * 7)
2332           .block_width(j)
2333           .block_height(i)
2334           .element_size(2)
2335           .iterations(1)
2336           .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2337       }
2338     }
2339   }
2340 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_8_bw_16)2341   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_8_bw_16) {
2342     TEST_REQUIRES_X86_SSE2;
2343     TransposeMicrokernelTester()
2344       .input_stride(16)
2345       .output_stride(8)
2346       .block_width(16)
2347       .block_height(8)
2348       .element_size(2)
2349       .iterations(1)
2350       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2351   }
2352 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_8_bw_9_16)2353   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_8_bw_9_16) {
2354     TEST_REQUIRES_X86_SSE2;
2355     for(size_t i = 9; i < 16; ++i){
2356       TransposeMicrokernelTester()
2357         .input_stride(i)
2358         .output_stride(16)
2359         .block_width(i)
2360         .block_height(8)
2361         .element_size(2)
2362         .iterations(1)
2363         .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2364     }
2365   }
2366 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_16_bw_9_16)2367   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_16_bw_9_16) {
2368     TEST_REQUIRES_X86_SSE2;
2369     for(size_t i = 9; i < 16; ++i){
2370       TransposeMicrokernelTester()
2371         .input_stride(i)
2372         .output_stride(16)
2373         .block_width(i)
2374         .block_height(16)
2375         .element_size(2)
2376         .iterations(1)
2377         .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2378     }
2379   }
2380 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_16_bw_8)2381   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_16_bw_8) {
2382     TEST_REQUIRES_X86_SSE2;
2383     TransposeMicrokernelTester()
2384       .input_stride(8)
2385       .output_stride(28)
2386       .block_width(8)
2387       .block_height(16)
2388       .element_size(2)
2389       .iterations(1)
2390       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2391   }
2392 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_9_16_bw_8)2393   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_9_16_bw_8){
2394     TEST_REQUIRES_X86_SSE2;
2395     for(size_t i = 9; i < 16; ++i){
2396       TransposeMicrokernelTester()
2397         .input_stride(25)
2398         .output_stride(i)
2399         .block_width(11)
2400         .block_height(i)
2401         .element_size(2)
2402         .iterations(1)
2403         .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2404     }
2405   }
2406 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_9_16_bw_16)2407   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_9_16_bw_16){
2408     TEST_REQUIRES_X86_SSE2;
2409     for(size_t i = 9; i < 16; ++i){
2410       TransposeMicrokernelTester()
2411         .input_stride(16)
2412         .output_stride(i)
2413         .block_width(16)
2414         .block_height(i)
2415         .element_size(2)
2416         .iterations(1)
2417         .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2418     }
2419   }
2420 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_9_16_bw_9_16)2421   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_9_16_bw_9_16) {
2422     TEST_REQUIRES_X86_SSE2;
2423     for(size_t i = 9; i < 16; ++i){
2424       for(size_t j = 9; j < 16; ++j){
2425         TransposeMicrokernelTester()
2426           .input_stride(j)
2427           .output_stride(i)
2428           .block_width(j)
2429           .block_height(i)
2430           .element_size(2)
2431           .iterations(1)
2432           .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2433       }
2434     }
2435   }
2436 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_8_bw_8_is_16)2437   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_8_bw_8_is_16) {
2438     TEST_REQUIRES_X86_SSE2;
2439     TransposeMicrokernelTester()
2440       .input_stride(16)
2441       .output_stride(8)
2442       .block_width(8)
2443       .block_height(8)
2444       .element_size(2)
2445       .iterations(1)
2446       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2447   }
2448 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_8_bw_8_os_16)2449   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_8_bw_8_os_16) {
2450     TEST_REQUIRES_X86_SSE2;
2451     TransposeMicrokernelTester()
2452       .input_stride(8)
2453       .output_stride(16)
2454       .block_width(8)
2455       .block_height(8)
2456       .element_size(2)
2457       .iterations(1)
2458       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2459   }
2460 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_8_bw_8_is_16_os_16)2461   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_8_bw_8_is_16_os_16) {
2462     TEST_REQUIRES_X86_SSE2;
2463     TransposeMicrokernelTester()
2464       .input_stride(16)
2465       .output_stride(16)
2466       .block_width(8)
2467       .block_height(8)
2468       .element_size(2)
2469       .iterations(1)
2470       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2471   }
2472 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_136_bw_152_ies_13)2473   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_136_bw_152_ies_13) {
2474     TEST_REQUIRES_X86_SSE2;
2475     TransposeMicrokernelTester()
2476       .input_stride(152)
2477       .output_stride(136)
2478       .block_width(152)
2479       .block_height(136)
2480       .element_size(2)
2481       .input_element_stride(13)
2482       .iterations(1)
2483       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2484   }
2485 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_24_bw_40_oes_13)2486   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_24_bw_40_oes_13) {
2487     TEST_REQUIRES_X86_SSE2;
2488     TransposeMicrokernelTester()
2489       .input_stride(40)
2490       .output_stride(24)
2491       .block_width(40)
2492       .block_height(24)
2493       .element_size(2)
2494       .output_element_stride(13)
2495       .iterations(1)
2496       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2497   }
2498 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2,bh_56_bw_184_ies_19_oes_15)2499   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_SSE2_2, bh_56_bw_184_ies_19_oes_15) {
2500     TEST_REQUIRES_X86_SSE2;
2501     TransposeMicrokernelTester()
2502       .input_stride(189)
2503       .output_stride(62)
2504       .block_width(184)
2505       .block_height(56)
2506       .element_size(2)
2507       .input_element_stride(19)
2508       .output_element_stride(15)
2509       .iterations(1)
2510       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2);
2511   }
2512 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2513 
2514 
2515 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_8_bw_8)2516   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_8_bw_8) {
2517     TEST_REQUIRES_X86_SSE2;
2518     TransposeMicrokernelTester()
2519       .input_stride(16)
2520       .output_stride(16)
2521       .block_width(8)
2522       .block_height(8)
2523       .element_size(2)
2524       .iterations(1)
2525       .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2526   }
2527 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_1_16_bw_1_16)2528   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_1_16_bw_1_16) {
2529     TEST_REQUIRES_X86_SSE2;
2530     for(size_t i = 1; i <= 16; ++i){
2531       for(size_t j = 1; j <= 16; ++j){
2532         TransposeMicrokernelTester()
2533           .input_stride(j * 3)
2534           .output_stride(i * 7)
2535           .block_width(j)
2536           .block_height(i)
2537           .element_size(2)
2538           .iterations(1)
2539           .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2540       }
2541     }
2542   }
2543 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_8_bw_16)2544   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_8_bw_16) {
2545     TEST_REQUIRES_X86_SSE2;
2546     TransposeMicrokernelTester()
2547       .input_stride(16)
2548       .output_stride(8)
2549       .block_width(16)
2550       .block_height(8)
2551       .element_size(2)
2552       .iterations(1)
2553       .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2554   }
2555 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_8_bw_9_16)2556   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_8_bw_9_16) {
2557     TEST_REQUIRES_X86_SSE2;
2558     for(size_t i = 9; i < 16; ++i){
2559       TransposeMicrokernelTester()
2560         .input_stride(i)
2561         .output_stride(16)
2562         .block_width(i)
2563         .block_height(8)
2564         .element_size(2)
2565         .iterations(1)
2566         .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2567     }
2568   }
2569 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_16_bw_9_16)2570   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_16_bw_9_16) {
2571     TEST_REQUIRES_X86_SSE2;
2572     for(size_t i = 9; i < 16; ++i){
2573       TransposeMicrokernelTester()
2574         .input_stride(i)
2575         .output_stride(16)
2576         .block_width(i)
2577         .block_height(16)
2578         .element_size(2)
2579         .iterations(1)
2580         .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2581     }
2582   }
2583 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_16_bw_8)2584   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_16_bw_8) {
2585     TEST_REQUIRES_X86_SSE2;
2586     TransposeMicrokernelTester()
2587       .input_stride(8)
2588       .output_stride(28)
2589       .block_width(8)
2590       .block_height(16)
2591       .element_size(2)
2592       .iterations(1)
2593       .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2594   }
2595 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_9_16_bw_8)2596   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_9_16_bw_8){
2597     TEST_REQUIRES_X86_SSE2;
2598     for(size_t i = 9; i < 16; ++i){
2599       TransposeMicrokernelTester()
2600         .input_stride(25)
2601         .output_stride(i)
2602         .block_width(11)
2603         .block_height(i)
2604         .element_size(2)
2605         .iterations(1)
2606         .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2607     }
2608   }
2609 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_9_16_bw_16)2610   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_9_16_bw_16){
2611     TEST_REQUIRES_X86_SSE2;
2612     for(size_t i = 9; i < 16; ++i){
2613       TransposeMicrokernelTester()
2614         .input_stride(16)
2615         .output_stride(i)
2616         .block_width(16)
2617         .block_height(i)
2618         .element_size(2)
2619         .iterations(1)
2620         .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2621     }
2622   }
2623 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_9_16_bw_9_16)2624   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_9_16_bw_9_16) {
2625     TEST_REQUIRES_X86_SSE2;
2626     for(size_t i = 9; i < 16; ++i){
2627       for(size_t j = 9; j < 16; ++j){
2628         TransposeMicrokernelTester()
2629           .input_stride(j)
2630           .output_stride(i)
2631           .block_width(j)
2632           .block_height(i)
2633           .element_size(2)
2634           .iterations(1)
2635           .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2636       }
2637     }
2638   }
2639 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_8_bw_8_is_16)2640   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_8_bw_8_is_16) {
2641     TEST_REQUIRES_X86_SSE2;
2642     TransposeMicrokernelTester()
2643       .input_stride(16)
2644       .output_stride(8)
2645       .block_width(8)
2646       .block_height(8)
2647       .element_size(2)
2648       .iterations(1)
2649       .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2650   }
2651 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_8_bw_8_os_16)2652   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_8_bw_8_os_16) {
2653     TEST_REQUIRES_X86_SSE2;
2654     TransposeMicrokernelTester()
2655       .input_stride(8)
2656       .output_stride(16)
2657       .block_width(8)
2658       .block_height(8)
2659       .element_size(2)
2660       .iterations(1)
2661       .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2662   }
2663 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_8_bw_8_is_16_os_16)2664   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_8_bw_8_is_16_os_16) {
2665     TEST_REQUIRES_X86_SSE2;
2666     TransposeMicrokernelTester()
2667       .input_stride(16)
2668       .output_stride(16)
2669       .block_width(8)
2670       .block_height(8)
2671       .element_size(2)
2672       .iterations(1)
2673       .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2674   }
2675 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_136_bw_152_ies_13)2676   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_136_bw_152_ies_13) {
2677     TEST_REQUIRES_X86_SSE2;
2678     TransposeMicrokernelTester()
2679       .input_stride(152)
2680       .output_stride(136)
2681       .block_width(152)
2682       .block_height(136)
2683       .element_size(2)
2684       .input_element_stride(13)
2685       .iterations(1)
2686       .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2687   }
2688 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_24_bw_40_oes_13)2689   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_24_bw_40_oes_13) {
2690     TEST_REQUIRES_X86_SSE2;
2691     TransposeMicrokernelTester()
2692       .input_stride(40)
2693       .output_stride(24)
2694       .block_width(40)
2695       .block_height(24)
2696       .element_size(2)
2697       .output_element_stride(13)
2698       .iterations(1)
2699       .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2700   }
2701 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2,bh_56_bw_184_ies_19_oes_15)2702   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_SSE2_2, bh_56_bw_184_ies_19_oes_15) {
2703     TEST_REQUIRES_X86_SSE2;
2704     TransposeMicrokernelTester()
2705       .input_stride(189)
2706       .output_stride(62)
2707       .block_width(184)
2708       .block_height(56)
2709       .element_size(2)
2710       .input_element_stride(19)
2711       .output_element_stride(15)
2712       .iterations(1)
2713       .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2);
2714   }
2715 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2716 
2717 
2718 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_8_bw_8)2719   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_8_bw_8) {
2720     TransposeMicrokernelTester()
2721       .input_stride(16)
2722       .output_stride(16)
2723       .block_width(8)
2724       .block_height(8)
2725       .element_size(2)
2726       .iterations(1)
2727       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2728   }
2729 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_1_16_bw_1_16)2730   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_1_16_bw_1_16) {
2731     for(size_t i = 1; i <= 16; ++i){
2732       for(size_t j = 1; j <= 16; ++j){
2733         TransposeMicrokernelTester()
2734           .input_stride(j * 3)
2735           .output_stride(i * 7)
2736           .block_width(j)
2737           .block_height(i)
2738           .element_size(2)
2739           .iterations(1)
2740           .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2741       }
2742     }
2743   }
2744 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_8_bw_16)2745   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_8_bw_16) {
2746     TransposeMicrokernelTester()
2747       .input_stride(16)
2748       .output_stride(8)
2749       .block_width(16)
2750       .block_height(8)
2751       .element_size(2)
2752       .iterations(1)
2753       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2754   }
2755 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_8_bw_9_16)2756   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_8_bw_9_16) {
2757     for(size_t i = 9; i < 16; ++i){
2758       TransposeMicrokernelTester()
2759         .input_stride(i)
2760         .output_stride(16)
2761         .block_width(i)
2762         .block_height(8)
2763         .element_size(2)
2764         .iterations(1)
2765         .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2766     }
2767   }
2768 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_16_bw_9_16)2769   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_16_bw_9_16) {
2770     for(size_t i = 9; i < 16; ++i){
2771       TransposeMicrokernelTester()
2772         .input_stride(i)
2773         .output_stride(16)
2774         .block_width(i)
2775         .block_height(16)
2776         .element_size(2)
2777         .iterations(1)
2778         .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2779     }
2780   }
2781 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_16_bw_8)2782   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_16_bw_8) {
2783     TransposeMicrokernelTester()
2784       .input_stride(8)
2785       .output_stride(28)
2786       .block_width(8)
2787       .block_height(16)
2788       .element_size(2)
2789       .iterations(1)
2790       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2791   }
2792 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_9_16_bw_8)2793   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_9_16_bw_8){
2794     for(size_t i = 9; i < 16; ++i){
2795       TransposeMicrokernelTester()
2796         .input_stride(25)
2797         .output_stride(i)
2798         .block_width(11)
2799         .block_height(i)
2800         .element_size(2)
2801         .iterations(1)
2802         .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2803     }
2804   }
2805 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_9_16_bw_16)2806   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_9_16_bw_16){
2807     for(size_t i = 9; i < 16; ++i){
2808       TransposeMicrokernelTester()
2809         .input_stride(16)
2810         .output_stride(i)
2811         .block_width(16)
2812         .block_height(i)
2813         .element_size(2)
2814         .iterations(1)
2815         .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2816     }
2817   }
2818 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_9_16_bw_9_16)2819   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_9_16_bw_9_16) {
2820     for(size_t i = 9; i < 16; ++i){
2821       for(size_t j = 9; j < 16; ++j){
2822         TransposeMicrokernelTester()
2823           .input_stride(j)
2824           .output_stride(i)
2825           .block_width(j)
2826           .block_height(i)
2827           .element_size(2)
2828           .iterations(1)
2829           .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2830       }
2831     }
2832   }
2833 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_8_bw_8_is_16)2834   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_8_bw_8_is_16) {
2835     TransposeMicrokernelTester()
2836       .input_stride(16)
2837       .output_stride(8)
2838       .block_width(8)
2839       .block_height(8)
2840       .element_size(2)
2841       .iterations(1)
2842       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2843   }
2844 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_8_bw_8_os_16)2845   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_8_bw_8_os_16) {
2846     TransposeMicrokernelTester()
2847       .input_stride(8)
2848       .output_stride(16)
2849       .block_width(8)
2850       .block_height(8)
2851       .element_size(2)
2852       .iterations(1)
2853       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2854   }
2855 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_8_bw_8_is_16_os_16)2856   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_8_bw_8_is_16_os_16) {
2857     TransposeMicrokernelTester()
2858       .input_stride(16)
2859       .output_stride(16)
2860       .block_width(8)
2861       .block_height(8)
2862       .element_size(2)
2863       .iterations(1)
2864       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2865   }
2866 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_136_bw_152_ies_13)2867   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_136_bw_152_ies_13) {
2868     TransposeMicrokernelTester()
2869       .input_stride(152)
2870       .output_stride(136)
2871       .block_width(152)
2872       .block_height(136)
2873       .element_size(2)
2874       .input_element_stride(13)
2875       .iterations(1)
2876       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2877   }
2878 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_24_bw_40_oes_13)2879   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_24_bw_40_oes_13) {
2880     TransposeMicrokernelTester()
2881       .input_stride(40)
2882       .output_stride(24)
2883       .block_width(40)
2884       .block_height(24)
2885       .element_size(2)
2886       .output_element_stride(13)
2887       .iterations(1)
2888       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2889   }
2890 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2,bh_56_bw_184_ies_19_oes_15)2891   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_WASMSIMD_2, bh_56_bw_184_ies_19_oes_15) {
2892     TransposeMicrokernelTester()
2893       .input_stride(189)
2894       .output_stride(62)
2895       .block_width(184)
2896       .block_height(56)
2897       .element_size(2)
2898       .input_element_stride(19)
2899       .output_element_stride(15)
2900       .iterations(1)
2901       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd);
2902   }
2903 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2904 
2905 
2906 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_8_bw_8)2907   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_8_bw_8) {
2908     TransposeMicrokernelTester()
2909       .input_stride(16)
2910       .output_stride(16)
2911       .block_width(8)
2912       .block_height(8)
2913       .element_size(2)
2914       .iterations(1)
2915       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
2916   }
2917 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_1_16_bw_1_16)2918   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_1_16_bw_1_16) {
2919     for(size_t i = 1; i <= 16; ++i){
2920       for(size_t j = 1; j <= 16; ++j){
2921         TransposeMicrokernelTester()
2922           .input_stride(j * 3)
2923           .output_stride(i * 7)
2924           .block_width(j)
2925           .block_height(i)
2926           .element_size(2)
2927           .iterations(1)
2928           .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
2929       }
2930     }
2931   }
2932 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_8_bw_16)2933   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_8_bw_16) {
2934     TransposeMicrokernelTester()
2935       .input_stride(16)
2936       .output_stride(8)
2937       .block_width(16)
2938       .block_height(8)
2939       .element_size(2)
2940       .iterations(1)
2941       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
2942   }
2943 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_8_bw_9_16)2944   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_8_bw_9_16) {
2945     for(size_t i = 9; i < 16; ++i){
2946       TransposeMicrokernelTester()
2947         .input_stride(i)
2948         .output_stride(16)
2949         .block_width(i)
2950         .block_height(8)
2951         .element_size(2)
2952         .iterations(1)
2953         .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
2954     }
2955   }
2956 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_16_bw_9_16)2957   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_16_bw_9_16) {
2958     for(size_t i = 9; i < 16; ++i){
2959       TransposeMicrokernelTester()
2960         .input_stride(i)
2961         .output_stride(16)
2962         .block_width(i)
2963         .block_height(16)
2964         .element_size(2)
2965         .iterations(1)
2966         .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
2967     }
2968   }
2969 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_16_bw_8)2970   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_16_bw_8) {
2971     TransposeMicrokernelTester()
2972       .input_stride(8)
2973       .output_stride(28)
2974       .block_width(8)
2975       .block_height(16)
2976       .element_size(2)
2977       .iterations(1)
2978       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
2979   }
2980 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_9_16_bw_8)2981   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_9_16_bw_8){
2982     for(size_t i = 9; i < 16; ++i){
2983       TransposeMicrokernelTester()
2984         .input_stride(25)
2985         .output_stride(i)
2986         .block_width(11)
2987         .block_height(i)
2988         .element_size(2)
2989         .iterations(1)
2990         .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
2991     }
2992   }
2993 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_9_16_bw_16)2994   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_9_16_bw_16){
2995     for(size_t i = 9; i < 16; ++i){
2996       TransposeMicrokernelTester()
2997         .input_stride(16)
2998         .output_stride(i)
2999         .block_width(16)
3000         .block_height(i)
3001         .element_size(2)
3002         .iterations(1)
3003         .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
3004     }
3005   }
3006 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_9_16_bw_9_16)3007   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_9_16_bw_9_16) {
3008     for(size_t i = 9; i < 16; ++i){
3009       for(size_t j = 9; j < 16; ++j){
3010         TransposeMicrokernelTester()
3011           .input_stride(j)
3012           .output_stride(i)
3013           .block_width(j)
3014           .block_height(i)
3015           .element_size(2)
3016           .iterations(1)
3017           .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
3018       }
3019     }
3020   }
3021 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_8_bw_8_is_16)3022   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_8_bw_8_is_16) {
3023     TransposeMicrokernelTester()
3024       .input_stride(16)
3025       .output_stride(8)
3026       .block_width(8)
3027       .block_height(8)
3028       .element_size(2)
3029       .iterations(1)
3030       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
3031   }
3032 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_8_bw_8_os_16)3033   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_8_bw_8_os_16) {
3034     TransposeMicrokernelTester()
3035       .input_stride(8)
3036       .output_stride(16)
3037       .block_width(8)
3038       .block_height(8)
3039       .element_size(2)
3040       .iterations(1)
3041       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
3042   }
3043 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_8_bw_8_is_16_os_16)3044   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_8_bw_8_is_16_os_16) {
3045     TransposeMicrokernelTester()
3046       .input_stride(16)
3047       .output_stride(16)
3048       .block_width(8)
3049       .block_height(8)
3050       .element_size(2)
3051       .iterations(1)
3052       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
3053   }
3054 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_136_bw_152_ies_13)3055   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_136_bw_152_ies_13) {
3056     TransposeMicrokernelTester()
3057       .input_stride(152)
3058       .output_stride(136)
3059       .block_width(152)
3060       .block_height(136)
3061       .element_size(2)
3062       .input_element_stride(13)
3063       .iterations(1)
3064       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
3065   }
3066 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_24_bw_40_oes_13)3067   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_24_bw_40_oes_13) {
3068     TransposeMicrokernelTester()
3069       .input_stride(40)
3070       .output_stride(24)
3071       .block_width(40)
3072       .block_height(24)
3073       .element_size(2)
3074       .output_element_stride(13)
3075       .iterations(1)
3076       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
3077   }
3078 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2,bh_56_bw_184_ies_19_oes_15)3079   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_WASMSIMD_2, bh_56_bw_184_ies_19_oes_15) {
3080     TransposeMicrokernelTester()
3081       .input_stride(189)
3082       .output_stride(62)
3083       .block_width(184)
3084       .block_height(56)
3085       .element_size(2)
3086       .input_element_stride(19)
3087       .output_element_stride(15)
3088       .iterations(1)
3089       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd);
3090   }
3091 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3092 
3093 
3094 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_8_bw_8)3095   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_8_bw_8) {
3096     TransposeMicrokernelTester()
3097       .input_stride(16)
3098       .output_stride(16)
3099       .block_width(8)
3100       .block_height(8)
3101       .element_size(2)
3102       .iterations(1)
3103       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3104   }
3105 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_1_16_bw_1_16)3106   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_1_16_bw_1_16) {
3107     for(size_t i = 1; i <= 16; ++i){
3108       for(size_t j = 1; j <= 16; ++j){
3109         TransposeMicrokernelTester()
3110           .input_stride(j * 3)
3111           .output_stride(i * 7)
3112           .block_width(j)
3113           .block_height(i)
3114           .element_size(2)
3115           .iterations(1)
3116           .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3117       }
3118     }
3119   }
3120 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_8_bw_16)3121   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_8_bw_16) {
3122     TransposeMicrokernelTester()
3123       .input_stride(16)
3124       .output_stride(8)
3125       .block_width(16)
3126       .block_height(8)
3127       .element_size(2)
3128       .iterations(1)
3129       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3130   }
3131 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_8_bw_9_16)3132   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_8_bw_9_16) {
3133     for(size_t i = 9; i < 16; ++i){
3134       TransposeMicrokernelTester()
3135         .input_stride(i)
3136         .output_stride(16)
3137         .block_width(i)
3138         .block_height(8)
3139         .element_size(2)
3140         .iterations(1)
3141         .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3142     }
3143   }
3144 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_16_bw_9_16)3145   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_16_bw_9_16) {
3146     for(size_t i = 9; i < 16; ++i){
3147       TransposeMicrokernelTester()
3148         .input_stride(i)
3149         .output_stride(16)
3150         .block_width(i)
3151         .block_height(16)
3152         .element_size(2)
3153         .iterations(1)
3154         .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3155     }
3156   }
3157 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_16_bw_8)3158   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_16_bw_8) {
3159     TransposeMicrokernelTester()
3160       .input_stride(8)
3161       .output_stride(28)
3162       .block_width(8)
3163       .block_height(16)
3164       .element_size(2)
3165       .iterations(1)
3166       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3167   }
3168 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_9_16_bw_8)3169   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_9_16_bw_8){
3170     for(size_t i = 9; i < 16; ++i){
3171       TransposeMicrokernelTester()
3172         .input_stride(25)
3173         .output_stride(i)
3174         .block_width(11)
3175         .block_height(i)
3176         .element_size(2)
3177         .iterations(1)
3178         .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3179     }
3180   }
3181 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_9_16_bw_16)3182   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_9_16_bw_16){
3183     for(size_t i = 9; i < 16; ++i){
3184       TransposeMicrokernelTester()
3185         .input_stride(16)
3186         .output_stride(i)
3187         .block_width(16)
3188         .block_height(i)
3189         .element_size(2)
3190         .iterations(1)
3191         .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3192     }
3193   }
3194 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_9_16_bw_9_16)3195   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_9_16_bw_9_16) {
3196     for(size_t i = 9; i < 16; ++i){
3197       for(size_t j = 9; j < 16; ++j){
3198         TransposeMicrokernelTester()
3199           .input_stride(j)
3200           .output_stride(i)
3201           .block_width(j)
3202           .block_height(i)
3203           .element_size(2)
3204           .iterations(1)
3205           .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3206       }
3207     }
3208   }
3209 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_8_bw_8_is_16)3210   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_8_bw_8_is_16) {
3211     TransposeMicrokernelTester()
3212       .input_stride(16)
3213       .output_stride(8)
3214       .block_width(8)
3215       .block_height(8)
3216       .element_size(2)
3217       .iterations(1)
3218       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3219   }
3220 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_8_bw_8_os_16)3221   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_8_bw_8_os_16) {
3222     TransposeMicrokernelTester()
3223       .input_stride(8)
3224       .output_stride(16)
3225       .block_width(8)
3226       .block_height(8)
3227       .element_size(2)
3228       .iterations(1)
3229       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3230   }
3231 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_8_bw_8_is_16_os_16)3232   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_8_bw_8_is_16_os_16) {
3233     TransposeMicrokernelTester()
3234       .input_stride(16)
3235       .output_stride(16)
3236       .block_width(8)
3237       .block_height(8)
3238       .element_size(2)
3239       .iterations(1)
3240       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3241   }
3242 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_136_bw_152_ies_13)3243   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_136_bw_152_ies_13) {
3244     TransposeMicrokernelTester()
3245       .input_stride(152)
3246       .output_stride(136)
3247       .block_width(152)
3248       .block_height(136)
3249       .element_size(2)
3250       .input_element_stride(13)
3251       .iterations(1)
3252       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3253   }
3254 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_24_bw_40_oes_13)3255   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_24_bw_40_oes_13) {
3256     TransposeMicrokernelTester()
3257       .input_stride(40)
3258       .output_stride(24)
3259       .block_width(40)
3260       .block_height(24)
3261       .element_size(2)
3262       .output_element_stride(13)
3263       .iterations(1)
3264       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3265   }
3266 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2,bh_56_bw_184_ies_19_oes_15)3267   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_WASMSIMD_2, bh_56_bw_184_ies_19_oes_15) {
3268     TransposeMicrokernelTester()
3269       .input_stride(189)
3270       .output_stride(62)
3271       .block_width(184)
3272       .block_height(56)
3273       .element_size(2)
3274       .input_element_stride(19)
3275       .output_element_stride(15)
3276       .iterations(1)
3277       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd);
3278   }
3279 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3280 
3281 
3282 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_8_bw_8)3283   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_8_bw_8) {
3284     TransposeMicrokernelTester()
3285       .input_stride(16)
3286       .output_stride(16)
3287       .block_width(8)
3288       .block_height(8)
3289       .element_size(2)
3290       .iterations(1)
3291       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3292   }
3293 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_1_16_bw_1_16)3294   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_1_16_bw_1_16) {
3295     for(size_t i = 1; i <= 16; ++i){
3296       for(size_t j = 1; j <= 16; ++j){
3297         TransposeMicrokernelTester()
3298           .input_stride(j * 3)
3299           .output_stride(i * 7)
3300           .block_width(j)
3301           .block_height(i)
3302           .element_size(2)
3303           .iterations(1)
3304           .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3305       }
3306     }
3307   }
3308 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_8_bw_16)3309   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_8_bw_16) {
3310     TransposeMicrokernelTester()
3311       .input_stride(16)
3312       .output_stride(8)
3313       .block_width(16)
3314       .block_height(8)
3315       .element_size(2)
3316       .iterations(1)
3317       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3318   }
3319 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_8_bw_9_16)3320   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_8_bw_9_16) {
3321     for(size_t i = 9; i < 16; ++i){
3322       TransposeMicrokernelTester()
3323         .input_stride(i)
3324         .output_stride(16)
3325         .block_width(i)
3326         .block_height(8)
3327         .element_size(2)
3328         .iterations(1)
3329         .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3330     }
3331   }
3332 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_16_bw_9_16)3333   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_16_bw_9_16) {
3334     for(size_t i = 9; i < 16; ++i){
3335       TransposeMicrokernelTester()
3336         .input_stride(i)
3337         .output_stride(16)
3338         .block_width(i)
3339         .block_height(16)
3340         .element_size(2)
3341         .iterations(1)
3342         .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3343     }
3344   }
3345 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_16_bw_8)3346   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_16_bw_8) {
3347     TransposeMicrokernelTester()
3348       .input_stride(8)
3349       .output_stride(28)
3350       .block_width(8)
3351       .block_height(16)
3352       .element_size(2)
3353       .iterations(1)
3354       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3355   }
3356 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_9_16_bw_8)3357   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_9_16_bw_8){
3358     for(size_t i = 9; i < 16; ++i){
3359       TransposeMicrokernelTester()
3360         .input_stride(25)
3361         .output_stride(i)
3362         .block_width(11)
3363         .block_height(i)
3364         .element_size(2)
3365         .iterations(1)
3366         .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3367     }
3368   }
3369 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_9_16_bw_16)3370   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_9_16_bw_16){
3371     for(size_t i = 9; i < 16; ++i){
3372       TransposeMicrokernelTester()
3373         .input_stride(16)
3374         .output_stride(i)
3375         .block_width(16)
3376         .block_height(i)
3377         .element_size(2)
3378         .iterations(1)
3379         .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3380     }
3381   }
3382 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_9_16_bw_9_16)3383   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_9_16_bw_9_16) {
3384     for(size_t i = 9; i < 16; ++i){
3385       for(size_t j = 9; j < 16; ++j){
3386         TransposeMicrokernelTester()
3387           .input_stride(j)
3388           .output_stride(i)
3389           .block_width(j)
3390           .block_height(i)
3391           .element_size(2)
3392           .iterations(1)
3393           .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3394       }
3395     }
3396   }
3397 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_8_bw_8_is_16)3398   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_8_bw_8_is_16) {
3399     TransposeMicrokernelTester()
3400       .input_stride(16)
3401       .output_stride(8)
3402       .block_width(8)
3403       .block_height(8)
3404       .element_size(2)
3405       .iterations(1)
3406       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3407   }
3408 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_8_bw_8_os_16)3409   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_8_bw_8_os_16) {
3410     TransposeMicrokernelTester()
3411       .input_stride(8)
3412       .output_stride(16)
3413       .block_width(8)
3414       .block_height(8)
3415       .element_size(2)
3416       .iterations(1)
3417       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3418   }
3419 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_8_bw_8_is_16_os_16)3420   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_8_bw_8_is_16_os_16) {
3421     TransposeMicrokernelTester()
3422       .input_stride(16)
3423       .output_stride(16)
3424       .block_width(8)
3425       .block_height(8)
3426       .element_size(2)
3427       .iterations(1)
3428       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3429   }
3430 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_136_bw_152_ies_13)3431   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_136_bw_152_ies_13) {
3432     TransposeMicrokernelTester()
3433       .input_stride(152)
3434       .output_stride(136)
3435       .block_width(152)
3436       .block_height(136)
3437       .element_size(2)
3438       .input_element_stride(13)
3439       .iterations(1)
3440       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3441   }
3442 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_24_bw_40_oes_13)3443   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_24_bw_40_oes_13) {
3444     TransposeMicrokernelTester()
3445       .input_stride(40)
3446       .output_stride(24)
3447       .block_width(40)
3448       .block_height(24)
3449       .element_size(2)
3450       .output_element_stride(13)
3451       .iterations(1)
3452       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3453   }
3454 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2,bh_56_bw_184_ies_19_oes_15)3455   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_WASMSIMD_2, bh_56_bw_184_ies_19_oes_15) {
3456     TransposeMicrokernelTester()
3457       .input_stride(189)
3458       .output_stride(62)
3459       .block_width(184)
3460       .block_height(56)
3461       .element_size(2)
3462       .input_element_stride(19)
3463       .output_element_stride(15)
3464       .iterations(1)
3465       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd);
3466   }
3467 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3468 
3469 
3470 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_4_bw_4)3471   TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_4_bw_4) {
3472     TEST_REQUIRES_ARM_NEON;
3473     TransposeMicrokernelTester()
3474       .input_stride(8)
3475       .output_stride(8)
3476       .block_width(4)
3477       .block_height(4)
3478       .element_size(2)
3479       .iterations(1)
3480       .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3481   }
3482 
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_1_8_bw_1_8)3483   TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_1_8_bw_1_8) {
3484     TEST_REQUIRES_ARM_NEON;
3485     for(size_t i = 1; i <= 8; ++i){
3486       for(size_t j = 1; j <= 8; ++j){
3487         TransposeMicrokernelTester()
3488           .input_stride(j * 3)
3489           .output_stride(i * 7)
3490           .block_width(j)
3491           .block_height(i)
3492           .element_size(2)
3493           .iterations(1)
3494           .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3495       }
3496     }
3497   }
3498 
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_4_bw_8)3499   TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_4_bw_8) {
3500     TEST_REQUIRES_ARM_NEON;
3501     TransposeMicrokernelTester()
3502       .input_stride(8)
3503       .output_stride(4)
3504       .block_width(8)
3505       .block_height(4)
3506       .element_size(2)
3507       .iterations(1)
3508       .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3509   }
3510 
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_4_bw_5_8)3511   TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_4_bw_5_8) {
3512     TEST_REQUIRES_ARM_NEON;
3513     for(size_t i = 5; i < 8; ++i){
3514       TransposeMicrokernelTester()
3515         .input_stride(i)
3516         .output_stride(8)
3517         .block_width(i)
3518         .block_height(4)
3519         .element_size(2)
3520         .iterations(1)
3521         .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3522     }
3523   }
3524 
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_8_bw_5_8)3525   TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_8_bw_5_8) {
3526     TEST_REQUIRES_ARM_NEON;
3527     for(size_t i = 5; i < 8; ++i){
3528       TransposeMicrokernelTester()
3529         .input_stride(i)
3530         .output_stride(8)
3531         .block_width(i)
3532         .block_height(8)
3533         .element_size(2)
3534         .iterations(1)
3535         .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3536     }
3537   }
3538 
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_8_bw_4)3539   TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_8_bw_4) {
3540     TEST_REQUIRES_ARM_NEON;
3541     TransposeMicrokernelTester()
3542       .input_stride(4)
3543       .output_stride(16)
3544       .block_width(4)
3545       .block_height(8)
3546       .element_size(2)
3547       .iterations(1)
3548       .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3549   }
3550 
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_5_8_bw_4)3551   TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_5_8_bw_4){
3552     TEST_REQUIRES_ARM_NEON;
3553     for(size_t i = 5; i < 8; ++i){
3554       TransposeMicrokernelTester()
3555         .input_stride(21)
3556         .output_stride(i)
3557         .block_width(7)
3558         .block_height(i)
3559         .element_size(2)
3560         .iterations(1)
3561         .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3562     }
3563   }
3564 
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_5_8_bw_8)3565   TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_5_8_bw_8){
3566     TEST_REQUIRES_ARM_NEON;
3567     for(size_t i = 5; i < 8; ++i){
3568       TransposeMicrokernelTester()
3569         .input_stride(8)
3570         .output_stride(i)
3571         .block_width(8)
3572         .block_height(i)
3573         .element_size(2)
3574         .iterations(1)
3575         .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3576     }
3577   }
3578 
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_5_8_bw_5_8)3579   TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_5_8_bw_5_8) {
3580     TEST_REQUIRES_ARM_NEON;
3581     for(size_t i = 5; i < 8; ++i){
3582       for(size_t j = 5; j < 8; ++j){
3583         TransposeMicrokernelTester()
3584           .input_stride(j)
3585           .output_stride(i)
3586           .block_width(j)
3587           .block_height(i)
3588           .element_size(2)
3589           .iterations(1)
3590           .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3591       }
3592     }
3593   }
3594 
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_4_bw_4_is_8)3595   TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_4_bw_4_is_8) {
3596     TEST_REQUIRES_ARM_NEON;
3597     TransposeMicrokernelTester()
3598       .input_stride(8)
3599       .output_stride(4)
3600       .block_width(4)
3601       .block_height(4)
3602       .element_size(2)
3603       .iterations(1)
3604       .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3605   }
3606 
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_4_bw_4_os_8)3607   TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_4_bw_4_os_8) {
3608     TEST_REQUIRES_ARM_NEON;
3609     TransposeMicrokernelTester()
3610       .input_stride(4)
3611       .output_stride(8)
3612       .block_width(4)
3613       .block_height(4)
3614       .element_size(2)
3615       .iterations(1)
3616       .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3617   }
3618 
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_4_bw_4_is_8_os_8)3619   TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_4_bw_4_is_8_os_8) {
3620     TEST_REQUIRES_ARM_NEON;
3621     TransposeMicrokernelTester()
3622       .input_stride(8)
3623       .output_stride(8)
3624       .block_width(4)
3625       .block_height(4)
3626       .element_size(2)
3627       .iterations(1)
3628       .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3629   }
3630 
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_68_bw_76_ies_13)3631   TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_68_bw_76_ies_13) {
3632     TEST_REQUIRES_ARM_NEON;
3633     TransposeMicrokernelTester()
3634       .input_stride(76)
3635       .output_stride(68)
3636       .block_width(76)
3637       .block_height(68)
3638       .element_size(2)
3639       .input_element_stride(13)
3640       .iterations(1)
3641       .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3642   }
3643 
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_12_bw_20_oes_13)3644   TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_12_bw_20_oes_13) {
3645     TEST_REQUIRES_ARM_NEON;
3646     TransposeMicrokernelTester()
3647       .input_stride(20)
3648       .output_stride(12)
3649       .block_width(20)
3650       .block_height(12)
3651       .element_size(2)
3652       .output_element_stride(13)
3653       .iterations(1)
3654       .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3655   }
3656 
TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2,bh_28_bw_92_ies_19_oes_15)3657   TEST(X16_TRANSPOSEC__4X4_MULTI_DEC_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
3658     TEST_REQUIRES_ARM_NEON;
3659     TransposeMicrokernelTester()
3660       .input_stride(97)
3661       .output_stride(34)
3662       .block_width(92)
3663       .block_height(28)
3664       .element_size(2)
3665       .input_element_stride(19)
3666       .output_element_stride(15)
3667       .iterations(1)
3668       .Test(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon);
3669   }
3670 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3671 
3672 
3673 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_4_bw_4)3674   TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_4_bw_4) {
3675     TEST_REQUIRES_ARM_NEON;
3676     TransposeMicrokernelTester()
3677       .input_stride(8)
3678       .output_stride(8)
3679       .block_width(4)
3680       .block_height(4)
3681       .element_size(2)
3682       .iterations(1)
3683       .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3684   }
3685 
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_1_8_bw_1_8)3686   TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_1_8_bw_1_8) {
3687     TEST_REQUIRES_ARM_NEON;
3688     for(size_t i = 1; i <= 8; ++i){
3689       for(size_t j = 1; j <= 8; ++j){
3690         TransposeMicrokernelTester()
3691           .input_stride(j * 3)
3692           .output_stride(i * 7)
3693           .block_width(j)
3694           .block_height(i)
3695           .element_size(2)
3696           .iterations(1)
3697           .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3698       }
3699     }
3700   }
3701 
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_4_bw_8)3702   TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_4_bw_8) {
3703     TEST_REQUIRES_ARM_NEON;
3704     TransposeMicrokernelTester()
3705       .input_stride(8)
3706       .output_stride(4)
3707       .block_width(8)
3708       .block_height(4)
3709       .element_size(2)
3710       .iterations(1)
3711       .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3712   }
3713 
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_4_bw_5_8)3714   TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_4_bw_5_8) {
3715     TEST_REQUIRES_ARM_NEON;
3716     for(size_t i = 5; i < 8; ++i){
3717       TransposeMicrokernelTester()
3718         .input_stride(i)
3719         .output_stride(8)
3720         .block_width(i)
3721         .block_height(4)
3722         .element_size(2)
3723         .iterations(1)
3724         .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3725     }
3726   }
3727 
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_8_bw_5_8)3728   TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_8_bw_5_8) {
3729     TEST_REQUIRES_ARM_NEON;
3730     for(size_t i = 5; i < 8; ++i){
3731       TransposeMicrokernelTester()
3732         .input_stride(i)
3733         .output_stride(8)
3734         .block_width(i)
3735         .block_height(8)
3736         .element_size(2)
3737         .iterations(1)
3738         .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3739     }
3740   }
3741 
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_8_bw_4)3742   TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_8_bw_4) {
3743     TEST_REQUIRES_ARM_NEON;
3744     TransposeMicrokernelTester()
3745       .input_stride(4)
3746       .output_stride(16)
3747       .block_width(4)
3748       .block_height(8)
3749       .element_size(2)
3750       .iterations(1)
3751       .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3752   }
3753 
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_5_8_bw_4)3754   TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_5_8_bw_4){
3755     TEST_REQUIRES_ARM_NEON;
3756     for(size_t i = 5; i < 8; ++i){
3757       TransposeMicrokernelTester()
3758         .input_stride(21)
3759         .output_stride(i)
3760         .block_width(7)
3761         .block_height(i)
3762         .element_size(2)
3763         .iterations(1)
3764         .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3765     }
3766   }
3767 
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_5_8_bw_8)3768   TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_5_8_bw_8){
3769     TEST_REQUIRES_ARM_NEON;
3770     for(size_t i = 5; i < 8; ++i){
3771       TransposeMicrokernelTester()
3772         .input_stride(8)
3773         .output_stride(i)
3774         .block_width(8)
3775         .block_height(i)
3776         .element_size(2)
3777         .iterations(1)
3778         .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3779     }
3780   }
3781 
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_5_8_bw_5_8)3782   TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_5_8_bw_5_8) {
3783     TEST_REQUIRES_ARM_NEON;
3784     for(size_t i = 5; i < 8; ++i){
3785       for(size_t j = 5; j < 8; ++j){
3786         TransposeMicrokernelTester()
3787           .input_stride(j)
3788           .output_stride(i)
3789           .block_width(j)
3790           .block_height(i)
3791           .element_size(2)
3792           .iterations(1)
3793           .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3794       }
3795     }
3796   }
3797 
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_4_bw_4_is_8)3798   TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_4_bw_4_is_8) {
3799     TEST_REQUIRES_ARM_NEON;
3800     TransposeMicrokernelTester()
3801       .input_stride(8)
3802       .output_stride(4)
3803       .block_width(4)
3804       .block_height(4)
3805       .element_size(2)
3806       .iterations(1)
3807       .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3808   }
3809 
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_4_bw_4_os_8)3810   TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_4_bw_4_os_8) {
3811     TEST_REQUIRES_ARM_NEON;
3812     TransposeMicrokernelTester()
3813       .input_stride(4)
3814       .output_stride(8)
3815       .block_width(4)
3816       .block_height(4)
3817       .element_size(2)
3818       .iterations(1)
3819       .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3820   }
3821 
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_4_bw_4_is_8_os_8)3822   TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_4_bw_4_is_8_os_8) {
3823     TEST_REQUIRES_ARM_NEON;
3824     TransposeMicrokernelTester()
3825       .input_stride(8)
3826       .output_stride(8)
3827       .block_width(4)
3828       .block_height(4)
3829       .element_size(2)
3830       .iterations(1)
3831       .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3832   }
3833 
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_68_bw_76_ies_13)3834   TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_68_bw_76_ies_13) {
3835     TEST_REQUIRES_ARM_NEON;
3836     TransposeMicrokernelTester()
3837       .input_stride(76)
3838       .output_stride(68)
3839       .block_width(76)
3840       .block_height(68)
3841       .element_size(2)
3842       .input_element_stride(13)
3843       .iterations(1)
3844       .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3845   }
3846 
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_12_bw_20_oes_13)3847   TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_12_bw_20_oes_13) {
3848     TEST_REQUIRES_ARM_NEON;
3849     TransposeMicrokernelTester()
3850       .input_stride(20)
3851       .output_stride(12)
3852       .block_width(20)
3853       .block_height(12)
3854       .element_size(2)
3855       .output_element_stride(13)
3856       .iterations(1)
3857       .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3858   }
3859 
TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2,bh_28_bw_92_ies_19_oes_15)3860   TEST(X16_TRANSPOSEC__4X4_MULTI_MOV_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
3861     TEST_REQUIRES_ARM_NEON;
3862     TransposeMicrokernelTester()
3863       .input_stride(97)
3864       .output_stride(34)
3865       .block_width(92)
3866       .block_height(28)
3867       .element_size(2)
3868       .input_element_stride(19)
3869       .output_element_stride(15)
3870       .iterations(1)
3871       .Test(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon);
3872   }
3873 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3874 
3875 
3876 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_4_bw_4)3877   TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_4_bw_4) {
3878     TEST_REQUIRES_ARM_NEON;
3879     TransposeMicrokernelTester()
3880       .input_stride(8)
3881       .output_stride(8)
3882       .block_width(4)
3883       .block_height(4)
3884       .element_size(2)
3885       .iterations(1)
3886       .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
3887   }
3888 
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_1_8_bw_1_8)3889   TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_1_8_bw_1_8) {
3890     TEST_REQUIRES_ARM_NEON;
3891     for(size_t i = 1; i <= 8; ++i){
3892       for(size_t j = 1; j <= 8; ++j){
3893         TransposeMicrokernelTester()
3894           .input_stride(j * 3)
3895           .output_stride(i * 7)
3896           .block_width(j)
3897           .block_height(i)
3898           .element_size(2)
3899           .iterations(1)
3900           .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
3901       }
3902     }
3903   }
3904 
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_4_bw_8)3905   TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_4_bw_8) {
3906     TEST_REQUIRES_ARM_NEON;
3907     TransposeMicrokernelTester()
3908       .input_stride(8)
3909       .output_stride(4)
3910       .block_width(8)
3911       .block_height(4)
3912       .element_size(2)
3913       .iterations(1)
3914       .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
3915   }
3916 
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_4_bw_5_8)3917   TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_4_bw_5_8) {
3918     TEST_REQUIRES_ARM_NEON;
3919     for(size_t i = 5; i < 8; ++i){
3920       TransposeMicrokernelTester()
3921         .input_stride(i)
3922         .output_stride(8)
3923         .block_width(i)
3924         .block_height(4)
3925         .element_size(2)
3926         .iterations(1)
3927         .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
3928     }
3929   }
3930 
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_8_bw_5_8)3931   TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_8_bw_5_8) {
3932     TEST_REQUIRES_ARM_NEON;
3933     for(size_t i = 5; i < 8; ++i){
3934       TransposeMicrokernelTester()
3935         .input_stride(i)
3936         .output_stride(8)
3937         .block_width(i)
3938         .block_height(8)
3939         .element_size(2)
3940         .iterations(1)
3941         .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
3942     }
3943   }
3944 
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_8_bw_4)3945   TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_8_bw_4) {
3946     TEST_REQUIRES_ARM_NEON;
3947     TransposeMicrokernelTester()
3948       .input_stride(4)
3949       .output_stride(16)
3950       .block_width(4)
3951       .block_height(8)
3952       .element_size(2)
3953       .iterations(1)
3954       .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
3955   }
3956 
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_5_8_bw_4)3957   TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_5_8_bw_4){
3958     TEST_REQUIRES_ARM_NEON;
3959     for(size_t i = 5; i < 8; ++i){
3960       TransposeMicrokernelTester()
3961         .input_stride(21)
3962         .output_stride(i)
3963         .block_width(7)
3964         .block_height(i)
3965         .element_size(2)
3966         .iterations(1)
3967         .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
3968     }
3969   }
3970 
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_5_8_bw_8)3971   TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_5_8_bw_8){
3972     TEST_REQUIRES_ARM_NEON;
3973     for(size_t i = 5; i < 8; ++i){
3974       TransposeMicrokernelTester()
3975         .input_stride(8)
3976         .output_stride(i)
3977         .block_width(8)
3978         .block_height(i)
3979         .element_size(2)
3980         .iterations(1)
3981         .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
3982     }
3983   }
3984 
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_5_8_bw_5_8)3985   TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_5_8_bw_5_8) {
3986     TEST_REQUIRES_ARM_NEON;
3987     for(size_t i = 5; i < 8; ++i){
3988       for(size_t j = 5; j < 8; ++j){
3989         TransposeMicrokernelTester()
3990           .input_stride(j)
3991           .output_stride(i)
3992           .block_width(j)
3993           .block_height(i)
3994           .element_size(2)
3995           .iterations(1)
3996           .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
3997       }
3998     }
3999   }
4000 
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_4_bw_4_is_8)4001   TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_4_bw_4_is_8) {
4002     TEST_REQUIRES_ARM_NEON;
4003     TransposeMicrokernelTester()
4004       .input_stride(8)
4005       .output_stride(4)
4006       .block_width(4)
4007       .block_height(4)
4008       .element_size(2)
4009       .iterations(1)
4010       .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
4011   }
4012 
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_4_bw_4_os_8)4013   TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_4_bw_4_os_8) {
4014     TEST_REQUIRES_ARM_NEON;
4015     TransposeMicrokernelTester()
4016       .input_stride(4)
4017       .output_stride(8)
4018       .block_width(4)
4019       .block_height(4)
4020       .element_size(2)
4021       .iterations(1)
4022       .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
4023   }
4024 
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_4_bw_4_is_8_os_8)4025   TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_4_bw_4_is_8_os_8) {
4026     TEST_REQUIRES_ARM_NEON;
4027     TransposeMicrokernelTester()
4028       .input_stride(8)
4029       .output_stride(8)
4030       .block_width(4)
4031       .block_height(4)
4032       .element_size(2)
4033       .iterations(1)
4034       .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
4035   }
4036 
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_68_bw_76_ies_13)4037   TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_68_bw_76_ies_13) {
4038     TEST_REQUIRES_ARM_NEON;
4039     TransposeMicrokernelTester()
4040       .input_stride(76)
4041       .output_stride(68)
4042       .block_width(76)
4043       .block_height(68)
4044       .element_size(2)
4045       .input_element_stride(13)
4046       .iterations(1)
4047       .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
4048   }
4049 
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_12_bw_20_oes_13)4050   TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_12_bw_20_oes_13) {
4051     TEST_REQUIRES_ARM_NEON;
4052     TransposeMicrokernelTester()
4053       .input_stride(20)
4054       .output_stride(12)
4055       .block_width(20)
4056       .block_height(12)
4057       .element_size(2)
4058       .output_element_stride(13)
4059       .iterations(1)
4060       .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
4061   }
4062 
TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2,bh_28_bw_92_ies_19_oes_15)4063   TEST(X16_TRANSPOSEC__4X4_MULTI_MULTI_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
4064     TEST_REQUIRES_ARM_NEON;
4065     TransposeMicrokernelTester()
4066       .input_stride(97)
4067       .output_stride(34)
4068       .block_width(92)
4069       .block_height(28)
4070       .element_size(2)
4071       .input_element_stride(19)
4072       .output_element_stride(15)
4073       .iterations(1)
4074       .Test(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon);
4075   }
4076 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
4077 
4078 
4079 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_4_bw_4)4080   TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_4_bw_4) {
4081     TEST_REQUIRES_ARM_NEON;
4082     TransposeMicrokernelTester()
4083       .input_stride(8)
4084       .output_stride(8)
4085       .block_width(4)
4086       .block_height(4)
4087       .element_size(2)
4088       .iterations(1)
4089       .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4090   }
4091 
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_1_8_bw_1_8)4092   TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_1_8_bw_1_8) {
4093     TEST_REQUIRES_ARM_NEON;
4094     for(size_t i = 1; i <= 8; ++i){
4095       for(size_t j = 1; j <= 8; ++j){
4096         TransposeMicrokernelTester()
4097           .input_stride(j * 3)
4098           .output_stride(i * 7)
4099           .block_width(j)
4100           .block_height(i)
4101           .element_size(2)
4102           .iterations(1)
4103           .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4104       }
4105     }
4106   }
4107 
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_4_bw_8)4108   TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_4_bw_8) {
4109     TEST_REQUIRES_ARM_NEON;
4110     TransposeMicrokernelTester()
4111       .input_stride(8)
4112       .output_stride(4)
4113       .block_width(8)
4114       .block_height(4)
4115       .element_size(2)
4116       .iterations(1)
4117       .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4118   }
4119 
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_4_bw_5_8)4120   TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_4_bw_5_8) {
4121     TEST_REQUIRES_ARM_NEON;
4122     for(size_t i = 5; i < 8; ++i){
4123       TransposeMicrokernelTester()
4124         .input_stride(i)
4125         .output_stride(8)
4126         .block_width(i)
4127         .block_height(4)
4128         .element_size(2)
4129         .iterations(1)
4130         .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4131     }
4132   }
4133 
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_8_bw_5_8)4134   TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_8_bw_5_8) {
4135     TEST_REQUIRES_ARM_NEON;
4136     for(size_t i = 5; i < 8; ++i){
4137       TransposeMicrokernelTester()
4138         .input_stride(i)
4139         .output_stride(8)
4140         .block_width(i)
4141         .block_height(8)
4142         .element_size(2)
4143         .iterations(1)
4144         .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4145     }
4146   }
4147 
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_8_bw_4)4148   TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_8_bw_4) {
4149     TEST_REQUIRES_ARM_NEON;
4150     TransposeMicrokernelTester()
4151       .input_stride(4)
4152       .output_stride(16)
4153       .block_width(4)
4154       .block_height(8)
4155       .element_size(2)
4156       .iterations(1)
4157       .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4158   }
4159 
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_5_8_bw_4)4160   TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_5_8_bw_4){
4161     TEST_REQUIRES_ARM_NEON;
4162     for(size_t i = 5; i < 8; ++i){
4163       TransposeMicrokernelTester()
4164         .input_stride(21)
4165         .output_stride(i)
4166         .block_width(7)
4167         .block_height(i)
4168         .element_size(2)
4169         .iterations(1)
4170         .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4171     }
4172   }
4173 
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_5_8_bw_8)4174   TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_5_8_bw_8){
4175     TEST_REQUIRES_ARM_NEON;
4176     for(size_t i = 5; i < 8; ++i){
4177       TransposeMicrokernelTester()
4178         .input_stride(8)
4179         .output_stride(i)
4180         .block_width(8)
4181         .block_height(i)
4182         .element_size(2)
4183         .iterations(1)
4184         .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4185     }
4186   }
4187 
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_5_8_bw_5_8)4188   TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_5_8_bw_5_8) {
4189     TEST_REQUIRES_ARM_NEON;
4190     for(size_t i = 5; i < 8; ++i){
4191       for(size_t j = 5; j < 8; ++j){
4192         TransposeMicrokernelTester()
4193           .input_stride(j)
4194           .output_stride(i)
4195           .block_width(j)
4196           .block_height(i)
4197           .element_size(2)
4198           .iterations(1)
4199           .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4200       }
4201     }
4202   }
4203 
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_4_bw_4_is_8)4204   TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_4_bw_4_is_8) {
4205     TEST_REQUIRES_ARM_NEON;
4206     TransposeMicrokernelTester()
4207       .input_stride(8)
4208       .output_stride(4)
4209       .block_width(4)
4210       .block_height(4)
4211       .element_size(2)
4212       .iterations(1)
4213       .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4214   }
4215 
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_4_bw_4_os_8)4216   TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_4_bw_4_os_8) {
4217     TEST_REQUIRES_ARM_NEON;
4218     TransposeMicrokernelTester()
4219       .input_stride(4)
4220       .output_stride(8)
4221       .block_width(4)
4222       .block_height(4)
4223       .element_size(2)
4224       .iterations(1)
4225       .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4226   }
4227 
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_4_bw_4_is_8_os_8)4228   TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_4_bw_4_is_8_os_8) {
4229     TEST_REQUIRES_ARM_NEON;
4230     TransposeMicrokernelTester()
4231       .input_stride(8)
4232       .output_stride(8)
4233       .block_width(4)
4234       .block_height(4)
4235       .element_size(2)
4236       .iterations(1)
4237       .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4238   }
4239 
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_68_bw_76_ies_13)4240   TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_68_bw_76_ies_13) {
4241     TEST_REQUIRES_ARM_NEON;
4242     TransposeMicrokernelTester()
4243       .input_stride(76)
4244       .output_stride(68)
4245       .block_width(76)
4246       .block_height(68)
4247       .element_size(2)
4248       .input_element_stride(13)
4249       .iterations(1)
4250       .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4251   }
4252 
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_12_bw_20_oes_13)4253   TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_12_bw_20_oes_13) {
4254     TEST_REQUIRES_ARM_NEON;
4255     TransposeMicrokernelTester()
4256       .input_stride(20)
4257       .output_stride(12)
4258       .block_width(20)
4259       .block_height(12)
4260       .element_size(2)
4261       .output_element_stride(13)
4262       .iterations(1)
4263       .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4264   }
4265 
TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2,bh_28_bw_92_ies_19_oes_15)4266   TEST(X16_TRANSPOSEC__4X4_MULTI_SWITCH_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
4267     TEST_REQUIRES_ARM_NEON;
4268     TransposeMicrokernelTester()
4269       .input_stride(97)
4270       .output_stride(34)
4271       .block_width(92)
4272       .block_height(28)
4273       .element_size(2)
4274       .input_element_stride(19)
4275       .output_element_stride(15)
4276       .iterations(1)
4277       .Test(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon);
4278   }
4279 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
4280 
4281 
4282 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_4_bw_4)4283   TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_4_bw_4) {
4284     TEST_REQUIRES_ARM_NEON;
4285     TransposeMicrokernelTester()
4286       .input_stride(8)
4287       .output_stride(8)
4288       .block_width(4)
4289       .block_height(4)
4290       .element_size(2)
4291       .iterations(1)
4292       .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4293   }
4294 
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_1_8_bw_1_8)4295   TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_1_8_bw_1_8) {
4296     TEST_REQUIRES_ARM_NEON;
4297     for(size_t i = 1; i <= 8; ++i){
4298       for(size_t j = 1; j <= 8; ++j){
4299         TransposeMicrokernelTester()
4300           .input_stride(j * 3)
4301           .output_stride(i * 7)
4302           .block_width(j)
4303           .block_height(i)
4304           .element_size(2)
4305           .iterations(1)
4306           .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4307       }
4308     }
4309   }
4310 
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_4_bw_8)4311   TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_4_bw_8) {
4312     TEST_REQUIRES_ARM_NEON;
4313     TransposeMicrokernelTester()
4314       .input_stride(8)
4315       .output_stride(4)
4316       .block_width(8)
4317       .block_height(4)
4318       .element_size(2)
4319       .iterations(1)
4320       .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4321   }
4322 
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_4_bw_5_8)4323   TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_4_bw_5_8) {
4324     TEST_REQUIRES_ARM_NEON;
4325     for(size_t i = 5; i < 8; ++i){
4326       TransposeMicrokernelTester()
4327         .input_stride(i)
4328         .output_stride(8)
4329         .block_width(i)
4330         .block_height(4)
4331         .element_size(2)
4332         .iterations(1)
4333         .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4334     }
4335   }
4336 
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_8_bw_5_8)4337   TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_8_bw_5_8) {
4338     TEST_REQUIRES_ARM_NEON;
4339     for(size_t i = 5; i < 8; ++i){
4340       TransposeMicrokernelTester()
4341         .input_stride(i)
4342         .output_stride(8)
4343         .block_width(i)
4344         .block_height(8)
4345         .element_size(2)
4346         .iterations(1)
4347         .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4348     }
4349   }
4350 
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_8_bw_4)4351   TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_8_bw_4) {
4352     TEST_REQUIRES_ARM_NEON;
4353     TransposeMicrokernelTester()
4354       .input_stride(4)
4355       .output_stride(16)
4356       .block_width(4)
4357       .block_height(8)
4358       .element_size(2)
4359       .iterations(1)
4360       .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4361   }
4362 
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_5_8_bw_4)4363   TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_5_8_bw_4){
4364     TEST_REQUIRES_ARM_NEON;
4365     for(size_t i = 5; i < 8; ++i){
4366       TransposeMicrokernelTester()
4367         .input_stride(21)
4368         .output_stride(i)
4369         .block_width(7)
4370         .block_height(i)
4371         .element_size(2)
4372         .iterations(1)
4373         .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4374     }
4375   }
4376 
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_5_8_bw_8)4377   TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_5_8_bw_8){
4378     TEST_REQUIRES_ARM_NEON;
4379     for(size_t i = 5; i < 8; ++i){
4380       TransposeMicrokernelTester()
4381         .input_stride(8)
4382         .output_stride(i)
4383         .block_width(8)
4384         .block_height(i)
4385         .element_size(2)
4386         .iterations(1)
4387         .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4388     }
4389   }
4390 
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_5_8_bw_5_8)4391   TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_5_8_bw_5_8) {
4392     TEST_REQUIRES_ARM_NEON;
4393     for(size_t i = 5; i < 8; ++i){
4394       for(size_t j = 5; j < 8; ++j){
4395         TransposeMicrokernelTester()
4396           .input_stride(j)
4397           .output_stride(i)
4398           .block_width(j)
4399           .block_height(i)
4400           .element_size(2)
4401           .iterations(1)
4402           .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4403       }
4404     }
4405   }
4406 
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_4_bw_4_is_8)4407   TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_4_bw_4_is_8) {
4408     TEST_REQUIRES_ARM_NEON;
4409     TransposeMicrokernelTester()
4410       .input_stride(8)
4411       .output_stride(4)
4412       .block_width(4)
4413       .block_height(4)
4414       .element_size(2)
4415       .iterations(1)
4416       .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4417   }
4418 
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_4_bw_4_os_8)4419   TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_4_bw_4_os_8) {
4420     TEST_REQUIRES_ARM_NEON;
4421     TransposeMicrokernelTester()
4422       .input_stride(4)
4423       .output_stride(8)
4424       .block_width(4)
4425       .block_height(4)
4426       .element_size(2)
4427       .iterations(1)
4428       .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4429   }
4430 
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_4_bw_4_is_8_os_8)4431   TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_4_bw_4_is_8_os_8) {
4432     TEST_REQUIRES_ARM_NEON;
4433     TransposeMicrokernelTester()
4434       .input_stride(8)
4435       .output_stride(8)
4436       .block_width(4)
4437       .block_height(4)
4438       .element_size(2)
4439       .iterations(1)
4440       .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4441   }
4442 
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_68_bw_76_ies_13)4443   TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_68_bw_76_ies_13) {
4444     TEST_REQUIRES_ARM_NEON;
4445     TransposeMicrokernelTester()
4446       .input_stride(76)
4447       .output_stride(68)
4448       .block_width(76)
4449       .block_height(68)
4450       .element_size(2)
4451       .input_element_stride(13)
4452       .iterations(1)
4453       .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4454   }
4455 
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_12_bw_20_oes_13)4456   TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_12_bw_20_oes_13) {
4457     TEST_REQUIRES_ARM_NEON;
4458     TransposeMicrokernelTester()
4459       .input_stride(20)
4460       .output_stride(12)
4461       .block_width(20)
4462       .block_height(12)
4463       .element_size(2)
4464       .output_element_stride(13)
4465       .iterations(1)
4466       .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4467   }
4468 
TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2,bh_28_bw_92_ies_19_oes_15)4469   TEST(X16_TRANSPOSEC__4X4_REUSE_DEC_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
4470     TEST_REQUIRES_ARM_NEON;
4471     TransposeMicrokernelTester()
4472       .input_stride(97)
4473       .output_stride(34)
4474       .block_width(92)
4475       .block_height(28)
4476       .element_size(2)
4477       .input_element_stride(19)
4478       .output_element_stride(15)
4479       .iterations(1)
4480       .Test(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon);
4481   }
4482 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
4483 
4484 
4485 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_4_bw_4)4486   TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_4_bw_4) {
4487     TEST_REQUIRES_ARM_NEON;
4488     TransposeMicrokernelTester()
4489       .input_stride(8)
4490       .output_stride(8)
4491       .block_width(4)
4492       .block_height(4)
4493       .element_size(2)
4494       .iterations(1)
4495       .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4496   }
4497 
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_1_8_bw_1_8)4498   TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_1_8_bw_1_8) {
4499     TEST_REQUIRES_ARM_NEON;
4500     for(size_t i = 1; i <= 8; ++i){
4501       for(size_t j = 1; j <= 8; ++j){
4502         TransposeMicrokernelTester()
4503           .input_stride(j * 3)
4504           .output_stride(i * 7)
4505           .block_width(j)
4506           .block_height(i)
4507           .element_size(2)
4508           .iterations(1)
4509           .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4510       }
4511     }
4512   }
4513 
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_4_bw_8)4514   TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_4_bw_8) {
4515     TEST_REQUIRES_ARM_NEON;
4516     TransposeMicrokernelTester()
4517       .input_stride(8)
4518       .output_stride(4)
4519       .block_width(8)
4520       .block_height(4)
4521       .element_size(2)
4522       .iterations(1)
4523       .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4524   }
4525 
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_4_bw_5_8)4526   TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_4_bw_5_8) {
4527     TEST_REQUIRES_ARM_NEON;
4528     for(size_t i = 5; i < 8; ++i){
4529       TransposeMicrokernelTester()
4530         .input_stride(i)
4531         .output_stride(8)
4532         .block_width(i)
4533         .block_height(4)
4534         .element_size(2)
4535         .iterations(1)
4536         .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4537     }
4538   }
4539 
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_8_bw_5_8)4540   TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_8_bw_5_8) {
4541     TEST_REQUIRES_ARM_NEON;
4542     for(size_t i = 5; i < 8; ++i){
4543       TransposeMicrokernelTester()
4544         .input_stride(i)
4545         .output_stride(8)
4546         .block_width(i)
4547         .block_height(8)
4548         .element_size(2)
4549         .iterations(1)
4550         .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4551     }
4552   }
4553 
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_8_bw_4)4554   TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_8_bw_4) {
4555     TEST_REQUIRES_ARM_NEON;
4556     TransposeMicrokernelTester()
4557       .input_stride(4)
4558       .output_stride(16)
4559       .block_width(4)
4560       .block_height(8)
4561       .element_size(2)
4562       .iterations(1)
4563       .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4564   }
4565 
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_5_8_bw_4)4566   TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_5_8_bw_4){
4567     TEST_REQUIRES_ARM_NEON;
4568     for(size_t i = 5; i < 8; ++i){
4569       TransposeMicrokernelTester()
4570         .input_stride(21)
4571         .output_stride(i)
4572         .block_width(7)
4573         .block_height(i)
4574         .element_size(2)
4575         .iterations(1)
4576         .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4577     }
4578   }
4579 
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_5_8_bw_8)4580   TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_5_8_bw_8){
4581     TEST_REQUIRES_ARM_NEON;
4582     for(size_t i = 5; i < 8; ++i){
4583       TransposeMicrokernelTester()
4584         .input_stride(8)
4585         .output_stride(i)
4586         .block_width(8)
4587         .block_height(i)
4588         .element_size(2)
4589         .iterations(1)
4590         .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4591     }
4592   }
4593 
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_5_8_bw_5_8)4594   TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_5_8_bw_5_8) {
4595     TEST_REQUIRES_ARM_NEON;
4596     for(size_t i = 5; i < 8; ++i){
4597       for(size_t j = 5; j < 8; ++j){
4598         TransposeMicrokernelTester()
4599           .input_stride(j)
4600           .output_stride(i)
4601           .block_width(j)
4602           .block_height(i)
4603           .element_size(2)
4604           .iterations(1)
4605           .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4606       }
4607     }
4608   }
4609 
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_4_bw_4_is_8)4610   TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_4_bw_4_is_8) {
4611     TEST_REQUIRES_ARM_NEON;
4612     TransposeMicrokernelTester()
4613       .input_stride(8)
4614       .output_stride(4)
4615       .block_width(4)
4616       .block_height(4)
4617       .element_size(2)
4618       .iterations(1)
4619       .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4620   }
4621 
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_4_bw_4_os_8)4622   TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_4_bw_4_os_8) {
4623     TEST_REQUIRES_ARM_NEON;
4624     TransposeMicrokernelTester()
4625       .input_stride(4)
4626       .output_stride(8)
4627       .block_width(4)
4628       .block_height(4)
4629       .element_size(2)
4630       .iterations(1)
4631       .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4632   }
4633 
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_4_bw_4_is_8_os_8)4634   TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_4_bw_4_is_8_os_8) {
4635     TEST_REQUIRES_ARM_NEON;
4636     TransposeMicrokernelTester()
4637       .input_stride(8)
4638       .output_stride(8)
4639       .block_width(4)
4640       .block_height(4)
4641       .element_size(2)
4642       .iterations(1)
4643       .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4644   }
4645 
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_68_bw_76_ies_13)4646   TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_68_bw_76_ies_13) {
4647     TEST_REQUIRES_ARM_NEON;
4648     TransposeMicrokernelTester()
4649       .input_stride(76)
4650       .output_stride(68)
4651       .block_width(76)
4652       .block_height(68)
4653       .element_size(2)
4654       .input_element_stride(13)
4655       .iterations(1)
4656       .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4657   }
4658 
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_12_bw_20_oes_13)4659   TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_12_bw_20_oes_13) {
4660     TEST_REQUIRES_ARM_NEON;
4661     TransposeMicrokernelTester()
4662       .input_stride(20)
4663       .output_stride(12)
4664       .block_width(20)
4665       .block_height(12)
4666       .element_size(2)
4667       .output_element_stride(13)
4668       .iterations(1)
4669       .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4670   }
4671 
TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2,bh_28_bw_92_ies_19_oes_15)4672   TEST(X16_TRANSPOSEC__4X4_REUSE_MOV_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
4673     TEST_REQUIRES_ARM_NEON;
4674     TransposeMicrokernelTester()
4675       .input_stride(97)
4676       .output_stride(34)
4677       .block_width(92)
4678       .block_height(28)
4679       .element_size(2)
4680       .input_element_stride(19)
4681       .output_element_stride(15)
4682       .iterations(1)
4683       .Test(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon);
4684   }
4685 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
4686 
4687 
4688 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_4_bw_4)4689   TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_4_bw_4) {
4690     TEST_REQUIRES_ARM_NEON;
4691     TransposeMicrokernelTester()
4692       .input_stride(8)
4693       .output_stride(8)
4694       .block_width(4)
4695       .block_height(4)
4696       .element_size(2)
4697       .iterations(1)
4698       .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4699   }
4700 
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_1_8_bw_1_8)4701   TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_1_8_bw_1_8) {
4702     TEST_REQUIRES_ARM_NEON;
4703     for(size_t i = 1; i <= 8; ++i){
4704       for(size_t j = 1; j <= 8; ++j){
4705         TransposeMicrokernelTester()
4706           .input_stride(j * 3)
4707           .output_stride(i * 7)
4708           .block_width(j)
4709           .block_height(i)
4710           .element_size(2)
4711           .iterations(1)
4712           .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4713       }
4714     }
4715   }
4716 
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_4_bw_8)4717   TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_4_bw_8) {
4718     TEST_REQUIRES_ARM_NEON;
4719     TransposeMicrokernelTester()
4720       .input_stride(8)
4721       .output_stride(4)
4722       .block_width(8)
4723       .block_height(4)
4724       .element_size(2)
4725       .iterations(1)
4726       .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4727   }
4728 
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_4_bw_5_8)4729   TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_4_bw_5_8) {
4730     TEST_REQUIRES_ARM_NEON;
4731     for(size_t i = 5; i < 8; ++i){
4732       TransposeMicrokernelTester()
4733         .input_stride(i)
4734         .output_stride(8)
4735         .block_width(i)
4736         .block_height(4)
4737         .element_size(2)
4738         .iterations(1)
4739         .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4740     }
4741   }
4742 
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_8_bw_5_8)4743   TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_8_bw_5_8) {
4744     TEST_REQUIRES_ARM_NEON;
4745     for(size_t i = 5; i < 8; ++i){
4746       TransposeMicrokernelTester()
4747         .input_stride(i)
4748         .output_stride(8)
4749         .block_width(i)
4750         .block_height(8)
4751         .element_size(2)
4752         .iterations(1)
4753         .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4754     }
4755   }
4756 
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_8_bw_4)4757   TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_8_bw_4) {
4758     TEST_REQUIRES_ARM_NEON;
4759     TransposeMicrokernelTester()
4760       .input_stride(4)
4761       .output_stride(16)
4762       .block_width(4)
4763       .block_height(8)
4764       .element_size(2)
4765       .iterations(1)
4766       .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4767   }
4768 
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_5_8_bw_4)4769   TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_5_8_bw_4){
4770     TEST_REQUIRES_ARM_NEON;
4771     for(size_t i = 5; i < 8; ++i){
4772       TransposeMicrokernelTester()
4773         .input_stride(21)
4774         .output_stride(i)
4775         .block_width(7)
4776         .block_height(i)
4777         .element_size(2)
4778         .iterations(1)
4779         .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4780     }
4781   }
4782 
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_5_8_bw_8)4783   TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_5_8_bw_8){
4784     TEST_REQUIRES_ARM_NEON;
4785     for(size_t i = 5; i < 8; ++i){
4786       TransposeMicrokernelTester()
4787         .input_stride(8)
4788         .output_stride(i)
4789         .block_width(8)
4790         .block_height(i)
4791         .element_size(2)
4792         .iterations(1)
4793         .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4794     }
4795   }
4796 
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_5_8_bw_5_8)4797   TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_5_8_bw_5_8) {
4798     TEST_REQUIRES_ARM_NEON;
4799     for(size_t i = 5; i < 8; ++i){
4800       for(size_t j = 5; j < 8; ++j){
4801         TransposeMicrokernelTester()
4802           .input_stride(j)
4803           .output_stride(i)
4804           .block_width(j)
4805           .block_height(i)
4806           .element_size(2)
4807           .iterations(1)
4808           .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4809       }
4810     }
4811   }
4812 
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_4_bw_4_is_8)4813   TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_4_bw_4_is_8) {
4814     TEST_REQUIRES_ARM_NEON;
4815     TransposeMicrokernelTester()
4816       .input_stride(8)
4817       .output_stride(4)
4818       .block_width(4)
4819       .block_height(4)
4820       .element_size(2)
4821       .iterations(1)
4822       .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4823   }
4824 
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_4_bw_4_os_8)4825   TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_4_bw_4_os_8) {
4826     TEST_REQUIRES_ARM_NEON;
4827     TransposeMicrokernelTester()
4828       .input_stride(4)
4829       .output_stride(8)
4830       .block_width(4)
4831       .block_height(4)
4832       .element_size(2)
4833       .iterations(1)
4834       .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4835   }
4836 
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_4_bw_4_is_8_os_8)4837   TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_4_bw_4_is_8_os_8) {
4838     TEST_REQUIRES_ARM_NEON;
4839     TransposeMicrokernelTester()
4840       .input_stride(8)
4841       .output_stride(8)
4842       .block_width(4)
4843       .block_height(4)
4844       .element_size(2)
4845       .iterations(1)
4846       .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4847   }
4848 
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_68_bw_76_ies_13)4849   TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_68_bw_76_ies_13) {
4850     TEST_REQUIRES_ARM_NEON;
4851     TransposeMicrokernelTester()
4852       .input_stride(76)
4853       .output_stride(68)
4854       .block_width(76)
4855       .block_height(68)
4856       .element_size(2)
4857       .input_element_stride(13)
4858       .iterations(1)
4859       .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4860   }
4861 
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_12_bw_20_oes_13)4862   TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_12_bw_20_oes_13) {
4863     TEST_REQUIRES_ARM_NEON;
4864     TransposeMicrokernelTester()
4865       .input_stride(20)
4866       .output_stride(12)
4867       .block_width(20)
4868       .block_height(12)
4869       .element_size(2)
4870       .output_element_stride(13)
4871       .iterations(1)
4872       .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4873   }
4874 
TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2,bh_28_bw_92_ies_19_oes_15)4875   TEST(X16_TRANSPOSEC__4X4_REUSE_MULTI_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
4876     TEST_REQUIRES_ARM_NEON;
4877     TransposeMicrokernelTester()
4878       .input_stride(97)
4879       .output_stride(34)
4880       .block_width(92)
4881       .block_height(28)
4882       .element_size(2)
4883       .input_element_stride(19)
4884       .output_element_stride(15)
4885       .iterations(1)
4886       .Test(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon);
4887   }
4888 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
4889 
4890 
4891 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_4_bw_4)4892   TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_4_bw_4) {
4893     TEST_REQUIRES_ARM_NEON;
4894     TransposeMicrokernelTester()
4895       .input_stride(8)
4896       .output_stride(8)
4897       .block_width(4)
4898       .block_height(4)
4899       .element_size(2)
4900       .iterations(1)
4901       .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
4902   }
4903 
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_1_8_bw_1_8)4904   TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_1_8_bw_1_8) {
4905     TEST_REQUIRES_ARM_NEON;
4906     for(size_t i = 1; i <= 8; ++i){
4907       for(size_t j = 1; j <= 8; ++j){
4908         TransposeMicrokernelTester()
4909           .input_stride(j * 3)
4910           .output_stride(i * 7)
4911           .block_width(j)
4912           .block_height(i)
4913           .element_size(2)
4914           .iterations(1)
4915           .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
4916       }
4917     }
4918   }
4919 
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_4_bw_8)4920   TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_4_bw_8) {
4921     TEST_REQUIRES_ARM_NEON;
4922     TransposeMicrokernelTester()
4923       .input_stride(8)
4924       .output_stride(4)
4925       .block_width(8)
4926       .block_height(4)
4927       .element_size(2)
4928       .iterations(1)
4929       .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
4930   }
4931 
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_4_bw_5_8)4932   TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_4_bw_5_8) {
4933     TEST_REQUIRES_ARM_NEON;
4934     for(size_t i = 5; i < 8; ++i){
4935       TransposeMicrokernelTester()
4936         .input_stride(i)
4937         .output_stride(8)
4938         .block_width(i)
4939         .block_height(4)
4940         .element_size(2)
4941         .iterations(1)
4942         .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
4943     }
4944   }
4945 
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_8_bw_5_8)4946   TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_8_bw_5_8) {
4947     TEST_REQUIRES_ARM_NEON;
4948     for(size_t i = 5; i < 8; ++i){
4949       TransposeMicrokernelTester()
4950         .input_stride(i)
4951         .output_stride(8)
4952         .block_width(i)
4953         .block_height(8)
4954         .element_size(2)
4955         .iterations(1)
4956         .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
4957     }
4958   }
4959 
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_8_bw_4)4960   TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_8_bw_4) {
4961     TEST_REQUIRES_ARM_NEON;
4962     TransposeMicrokernelTester()
4963       .input_stride(4)
4964       .output_stride(16)
4965       .block_width(4)
4966       .block_height(8)
4967       .element_size(2)
4968       .iterations(1)
4969       .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
4970   }
4971 
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_5_8_bw_4)4972   TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_5_8_bw_4){
4973     TEST_REQUIRES_ARM_NEON;
4974     for(size_t i = 5; i < 8; ++i){
4975       TransposeMicrokernelTester()
4976         .input_stride(21)
4977         .output_stride(i)
4978         .block_width(7)
4979         .block_height(i)
4980         .element_size(2)
4981         .iterations(1)
4982         .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
4983     }
4984   }
4985 
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_5_8_bw_8)4986   TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_5_8_bw_8){
4987     TEST_REQUIRES_ARM_NEON;
4988     for(size_t i = 5; i < 8; ++i){
4989       TransposeMicrokernelTester()
4990         .input_stride(8)
4991         .output_stride(i)
4992         .block_width(8)
4993         .block_height(i)
4994         .element_size(2)
4995         .iterations(1)
4996         .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
4997     }
4998   }
4999 
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_5_8_bw_5_8)5000   TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_5_8_bw_5_8) {
5001     TEST_REQUIRES_ARM_NEON;
5002     for(size_t i = 5; i < 8; ++i){
5003       for(size_t j = 5; j < 8; ++j){
5004         TransposeMicrokernelTester()
5005           .input_stride(j)
5006           .output_stride(i)
5007           .block_width(j)
5008           .block_height(i)
5009           .element_size(2)
5010           .iterations(1)
5011           .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
5012       }
5013     }
5014   }
5015 
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_4_bw_4_is_8)5016   TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_4_bw_4_is_8) {
5017     TEST_REQUIRES_ARM_NEON;
5018     TransposeMicrokernelTester()
5019       .input_stride(8)
5020       .output_stride(4)
5021       .block_width(4)
5022       .block_height(4)
5023       .element_size(2)
5024       .iterations(1)
5025       .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
5026   }
5027 
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_4_bw_4_os_8)5028   TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_4_bw_4_os_8) {
5029     TEST_REQUIRES_ARM_NEON;
5030     TransposeMicrokernelTester()
5031       .input_stride(4)
5032       .output_stride(8)
5033       .block_width(4)
5034       .block_height(4)
5035       .element_size(2)
5036       .iterations(1)
5037       .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
5038   }
5039 
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_4_bw_4_is_8_os_8)5040   TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_4_bw_4_is_8_os_8) {
5041     TEST_REQUIRES_ARM_NEON;
5042     TransposeMicrokernelTester()
5043       .input_stride(8)
5044       .output_stride(8)
5045       .block_width(4)
5046       .block_height(4)
5047       .element_size(2)
5048       .iterations(1)
5049       .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
5050   }
5051 
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_68_bw_76_ies_13)5052   TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_68_bw_76_ies_13) {
5053     TEST_REQUIRES_ARM_NEON;
5054     TransposeMicrokernelTester()
5055       .input_stride(76)
5056       .output_stride(68)
5057       .block_width(76)
5058       .block_height(68)
5059       .element_size(2)
5060       .input_element_stride(13)
5061       .iterations(1)
5062       .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
5063   }
5064 
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_12_bw_20_oes_13)5065   TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_12_bw_20_oes_13) {
5066     TEST_REQUIRES_ARM_NEON;
5067     TransposeMicrokernelTester()
5068       .input_stride(20)
5069       .output_stride(12)
5070       .block_width(20)
5071       .block_height(12)
5072       .element_size(2)
5073       .output_element_stride(13)
5074       .iterations(1)
5075       .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
5076   }
5077 
TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2,bh_28_bw_92_ies_19_oes_15)5078   TEST(X16_TRANSPOSEC__4X4_REUSE_SWITCH_ZIP_NEON_2, bh_28_bw_92_ies_19_oes_15) {
5079     TEST_REQUIRES_ARM_NEON;
5080     TransposeMicrokernelTester()
5081       .input_stride(97)
5082       .output_stride(34)
5083       .block_width(92)
5084       .block_height(28)
5085       .element_size(2)
5086       .input_element_stride(19)
5087       .output_element_stride(15)
5088       .iterations(1)
5089       .Test(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon);
5090   }
5091 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
5092 
5093 
5094 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_8_bw_8)5095   TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_8_bw_8) {
5096     TEST_REQUIRES_ARM_NEON;
5097     TransposeMicrokernelTester()
5098       .input_stride(16)
5099       .output_stride(16)
5100       .block_width(8)
5101       .block_height(8)
5102       .element_size(2)
5103       .iterations(1)
5104       .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5105   }
5106 
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_1_16_bw_1_16)5107   TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_1_16_bw_1_16) {
5108     TEST_REQUIRES_ARM_NEON;
5109     for(size_t i = 1; i <= 16; ++i){
5110       for(size_t j = 1; j <= 16; ++j){
5111         TransposeMicrokernelTester()
5112           .input_stride(j * 3)
5113           .output_stride(i * 7)
5114           .block_width(j)
5115           .block_height(i)
5116           .element_size(2)
5117           .iterations(1)
5118           .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5119       }
5120     }
5121   }
5122 
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_8_bw_16)5123   TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_8_bw_16) {
5124     TEST_REQUIRES_ARM_NEON;
5125     TransposeMicrokernelTester()
5126       .input_stride(16)
5127       .output_stride(8)
5128       .block_width(16)
5129       .block_height(8)
5130       .element_size(2)
5131       .iterations(1)
5132       .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5133   }
5134 
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_8_bw_9_16)5135   TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_8_bw_9_16) {
5136     TEST_REQUIRES_ARM_NEON;
5137     for(size_t i = 9; i < 16; ++i){
5138       TransposeMicrokernelTester()
5139         .input_stride(i)
5140         .output_stride(16)
5141         .block_width(i)
5142         .block_height(8)
5143         .element_size(2)
5144         .iterations(1)
5145         .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5146     }
5147   }
5148 
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_16_bw_9_16)5149   TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_16_bw_9_16) {
5150     TEST_REQUIRES_ARM_NEON;
5151     for(size_t i = 9; i < 16; ++i){
5152       TransposeMicrokernelTester()
5153         .input_stride(i)
5154         .output_stride(16)
5155         .block_width(i)
5156         .block_height(16)
5157         .element_size(2)
5158         .iterations(1)
5159         .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5160     }
5161   }
5162 
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_16_bw_8)5163   TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_16_bw_8) {
5164     TEST_REQUIRES_ARM_NEON;
5165     TransposeMicrokernelTester()
5166       .input_stride(8)
5167       .output_stride(28)
5168       .block_width(8)
5169       .block_height(16)
5170       .element_size(2)
5171       .iterations(1)
5172       .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5173   }
5174 
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_9_16_bw_8)5175   TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_9_16_bw_8){
5176     TEST_REQUIRES_ARM_NEON;
5177     for(size_t i = 9; i < 16; ++i){
5178       TransposeMicrokernelTester()
5179         .input_stride(25)
5180         .output_stride(i)
5181         .block_width(11)
5182         .block_height(i)
5183         .element_size(2)
5184         .iterations(1)
5185         .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5186     }
5187   }
5188 
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_9_16_bw_16)5189   TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_9_16_bw_16){
5190     TEST_REQUIRES_ARM_NEON;
5191     for(size_t i = 9; i < 16; ++i){
5192       TransposeMicrokernelTester()
5193         .input_stride(16)
5194         .output_stride(i)
5195         .block_width(16)
5196         .block_height(i)
5197         .element_size(2)
5198         .iterations(1)
5199         .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5200     }
5201   }
5202 
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_9_16_bw_9_16)5203   TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_9_16_bw_9_16) {
5204     TEST_REQUIRES_ARM_NEON;
5205     for(size_t i = 9; i < 16; ++i){
5206       for(size_t j = 9; j < 16; ++j){
5207         TransposeMicrokernelTester()
5208           .input_stride(j)
5209           .output_stride(i)
5210           .block_width(j)
5211           .block_height(i)
5212           .element_size(2)
5213           .iterations(1)
5214           .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5215       }
5216     }
5217   }
5218 
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_8_bw_8_is_16)5219   TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_8_bw_8_is_16) {
5220     TEST_REQUIRES_ARM_NEON;
5221     TransposeMicrokernelTester()
5222       .input_stride(16)
5223       .output_stride(8)
5224       .block_width(8)
5225       .block_height(8)
5226       .element_size(2)
5227       .iterations(1)
5228       .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5229   }
5230 
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_8_bw_8_os_16)5231   TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_8_bw_8_os_16) {
5232     TEST_REQUIRES_ARM_NEON;
5233     TransposeMicrokernelTester()
5234       .input_stride(8)
5235       .output_stride(16)
5236       .block_width(8)
5237       .block_height(8)
5238       .element_size(2)
5239       .iterations(1)
5240       .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5241   }
5242 
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_8_bw_8_is_16_os_16)5243   TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_8_bw_8_is_16_os_16) {
5244     TEST_REQUIRES_ARM_NEON;
5245     TransposeMicrokernelTester()
5246       .input_stride(16)
5247       .output_stride(16)
5248       .block_width(8)
5249       .block_height(8)
5250       .element_size(2)
5251       .iterations(1)
5252       .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5253   }
5254 
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_136_bw_152_ies_13)5255   TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_136_bw_152_ies_13) {
5256     TEST_REQUIRES_ARM_NEON;
5257     TransposeMicrokernelTester()
5258       .input_stride(152)
5259       .output_stride(136)
5260       .block_width(152)
5261       .block_height(136)
5262       .element_size(2)
5263       .input_element_stride(13)
5264       .iterations(1)
5265       .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5266   }
5267 
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_24_bw_40_oes_13)5268   TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_24_bw_40_oes_13) {
5269     TEST_REQUIRES_ARM_NEON;
5270     TransposeMicrokernelTester()
5271       .input_stride(40)
5272       .output_stride(24)
5273       .block_width(40)
5274       .block_height(24)
5275       .element_size(2)
5276       .output_element_stride(13)
5277       .iterations(1)
5278       .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5279   }
5280 
TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2,bh_56_bw_184_ies_19_oes_15)5281   TEST(X16_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_2, bh_56_bw_184_ies_19_oes_15) {
5282     TEST_REQUIRES_ARM_NEON;
5283     TransposeMicrokernelTester()
5284       .input_stride(189)
5285       .output_stride(62)
5286       .block_width(184)
5287       .block_height(56)
5288       .element_size(2)
5289       .input_element_stride(19)
5290       .output_element_stride(15)
5291       .iterations(1)
5292       .Test(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon);
5293   }
5294 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
5295 
5296 
5297 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_8_bw_8)5298   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_8_bw_8) {
5299     TEST_REQUIRES_ARM_NEON;
5300     TransposeMicrokernelTester()
5301       .input_stride(16)
5302       .output_stride(16)
5303       .block_width(8)
5304       .block_height(8)
5305       .element_size(2)
5306       .iterations(1)
5307       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5308   }
5309 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_1_16_bw_1_16)5310   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_1_16_bw_1_16) {
5311     TEST_REQUIRES_ARM_NEON;
5312     for(size_t i = 1; i <= 16; ++i){
5313       for(size_t j = 1; j <= 16; ++j){
5314         TransposeMicrokernelTester()
5315           .input_stride(j * 3)
5316           .output_stride(i * 7)
5317           .block_width(j)
5318           .block_height(i)
5319           .element_size(2)
5320           .iterations(1)
5321           .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5322       }
5323     }
5324   }
5325 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_8_bw_16)5326   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_8_bw_16) {
5327     TEST_REQUIRES_ARM_NEON;
5328     TransposeMicrokernelTester()
5329       .input_stride(16)
5330       .output_stride(8)
5331       .block_width(16)
5332       .block_height(8)
5333       .element_size(2)
5334       .iterations(1)
5335       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5336   }
5337 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_8_bw_9_16)5338   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_8_bw_9_16) {
5339     TEST_REQUIRES_ARM_NEON;
5340     for(size_t i = 9; i < 16; ++i){
5341       TransposeMicrokernelTester()
5342         .input_stride(i)
5343         .output_stride(16)
5344         .block_width(i)
5345         .block_height(8)
5346         .element_size(2)
5347         .iterations(1)
5348         .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5349     }
5350   }
5351 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_16_bw_9_16)5352   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_16_bw_9_16) {
5353     TEST_REQUIRES_ARM_NEON;
5354     for(size_t i = 9; i < 16; ++i){
5355       TransposeMicrokernelTester()
5356         .input_stride(i)
5357         .output_stride(16)
5358         .block_width(i)
5359         .block_height(16)
5360         .element_size(2)
5361         .iterations(1)
5362         .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5363     }
5364   }
5365 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_16_bw_8)5366   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_16_bw_8) {
5367     TEST_REQUIRES_ARM_NEON;
5368     TransposeMicrokernelTester()
5369       .input_stride(8)
5370       .output_stride(28)
5371       .block_width(8)
5372       .block_height(16)
5373       .element_size(2)
5374       .iterations(1)
5375       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5376   }
5377 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_9_16_bw_8)5378   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_9_16_bw_8){
5379     TEST_REQUIRES_ARM_NEON;
5380     for(size_t i = 9; i < 16; ++i){
5381       TransposeMicrokernelTester()
5382         .input_stride(25)
5383         .output_stride(i)
5384         .block_width(11)
5385         .block_height(i)
5386         .element_size(2)
5387         .iterations(1)
5388         .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5389     }
5390   }
5391 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_9_16_bw_16)5392   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_9_16_bw_16){
5393     TEST_REQUIRES_ARM_NEON;
5394     for(size_t i = 9; i < 16; ++i){
5395       TransposeMicrokernelTester()
5396         .input_stride(16)
5397         .output_stride(i)
5398         .block_width(16)
5399         .block_height(i)
5400         .element_size(2)
5401         .iterations(1)
5402         .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5403     }
5404   }
5405 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_9_16_bw_9_16)5406   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_9_16_bw_9_16) {
5407     TEST_REQUIRES_ARM_NEON;
5408     for(size_t i = 9; i < 16; ++i){
5409       for(size_t j = 9; j < 16; ++j){
5410         TransposeMicrokernelTester()
5411           .input_stride(j)
5412           .output_stride(i)
5413           .block_width(j)
5414           .block_height(i)
5415           .element_size(2)
5416           .iterations(1)
5417           .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5418       }
5419     }
5420   }
5421 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_8_bw_8_is_16)5422   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_8_bw_8_is_16) {
5423     TEST_REQUIRES_ARM_NEON;
5424     TransposeMicrokernelTester()
5425       .input_stride(16)
5426       .output_stride(8)
5427       .block_width(8)
5428       .block_height(8)
5429       .element_size(2)
5430       .iterations(1)
5431       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5432   }
5433 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_8_bw_8_os_16)5434   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_8_bw_8_os_16) {
5435     TEST_REQUIRES_ARM_NEON;
5436     TransposeMicrokernelTester()
5437       .input_stride(8)
5438       .output_stride(16)
5439       .block_width(8)
5440       .block_height(8)
5441       .element_size(2)
5442       .iterations(1)
5443       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5444   }
5445 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_8_bw_8_is_16_os_16)5446   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_8_bw_8_is_16_os_16) {
5447     TEST_REQUIRES_ARM_NEON;
5448     TransposeMicrokernelTester()
5449       .input_stride(16)
5450       .output_stride(16)
5451       .block_width(8)
5452       .block_height(8)
5453       .element_size(2)
5454       .iterations(1)
5455       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5456   }
5457 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_136_bw_152_ies_13)5458   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_136_bw_152_ies_13) {
5459     TEST_REQUIRES_ARM_NEON;
5460     TransposeMicrokernelTester()
5461       .input_stride(152)
5462       .output_stride(136)
5463       .block_width(152)
5464       .block_height(136)
5465       .element_size(2)
5466       .input_element_stride(13)
5467       .iterations(1)
5468       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5469   }
5470 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_24_bw_40_oes_13)5471   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_24_bw_40_oes_13) {
5472     TEST_REQUIRES_ARM_NEON;
5473     TransposeMicrokernelTester()
5474       .input_stride(40)
5475       .output_stride(24)
5476       .block_width(40)
5477       .block_height(24)
5478       .element_size(2)
5479       .output_element_stride(13)
5480       .iterations(1)
5481       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5482   }
5483 
TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2,bh_56_bw_184_ies_19_oes_15)5484   TEST(X16_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_2, bh_56_bw_184_ies_19_oes_15) {
5485     TEST_REQUIRES_ARM_NEON;
5486     TransposeMicrokernelTester()
5487       .input_stride(189)
5488       .output_stride(62)
5489       .block_width(184)
5490       .block_height(56)
5491       .element_size(2)
5492       .input_element_stride(19)
5493       .output_element_stride(15)
5494       .iterations(1)
5495       .Test(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon);
5496   }
5497 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
5498 
5499 
5500 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_8_bw_8)5501   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_8_bw_8) {
5502     TEST_REQUIRES_ARM_NEON;
5503     TransposeMicrokernelTester()
5504       .input_stride(16)
5505       .output_stride(16)
5506       .block_width(8)
5507       .block_height(8)
5508       .element_size(2)
5509       .iterations(1)
5510       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5511   }
5512 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_1_16_bw_1_16)5513   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_1_16_bw_1_16) {
5514     TEST_REQUIRES_ARM_NEON;
5515     for(size_t i = 1; i <= 16; ++i){
5516       for(size_t j = 1; j <= 16; ++j){
5517         TransposeMicrokernelTester()
5518           .input_stride(j * 3)
5519           .output_stride(i * 7)
5520           .block_width(j)
5521           .block_height(i)
5522           .element_size(2)
5523           .iterations(1)
5524           .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5525       }
5526     }
5527   }
5528 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_8_bw_16)5529   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_8_bw_16) {
5530     TEST_REQUIRES_ARM_NEON;
5531     TransposeMicrokernelTester()
5532       .input_stride(16)
5533       .output_stride(8)
5534       .block_width(16)
5535       .block_height(8)
5536       .element_size(2)
5537       .iterations(1)
5538       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5539   }
5540 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_8_bw_9_16)5541   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_8_bw_9_16) {
5542     TEST_REQUIRES_ARM_NEON;
5543     for(size_t i = 9; i < 16; ++i){
5544       TransposeMicrokernelTester()
5545         .input_stride(i)
5546         .output_stride(16)
5547         .block_width(i)
5548         .block_height(8)
5549         .element_size(2)
5550         .iterations(1)
5551         .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5552     }
5553   }
5554 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_16_bw_9_16)5555   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_16_bw_9_16) {
5556     TEST_REQUIRES_ARM_NEON;
5557     for(size_t i = 9; i < 16; ++i){
5558       TransposeMicrokernelTester()
5559         .input_stride(i)
5560         .output_stride(16)
5561         .block_width(i)
5562         .block_height(16)
5563         .element_size(2)
5564         .iterations(1)
5565         .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5566     }
5567   }
5568 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_16_bw_8)5569   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_16_bw_8) {
5570     TEST_REQUIRES_ARM_NEON;
5571     TransposeMicrokernelTester()
5572       .input_stride(8)
5573       .output_stride(28)
5574       .block_width(8)
5575       .block_height(16)
5576       .element_size(2)
5577       .iterations(1)
5578       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5579   }
5580 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_9_16_bw_8)5581   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_9_16_bw_8){
5582     TEST_REQUIRES_ARM_NEON;
5583     for(size_t i = 9; i < 16; ++i){
5584       TransposeMicrokernelTester()
5585         .input_stride(25)
5586         .output_stride(i)
5587         .block_width(11)
5588         .block_height(i)
5589         .element_size(2)
5590         .iterations(1)
5591         .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5592     }
5593   }
5594 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_9_16_bw_16)5595   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_9_16_bw_16){
5596     TEST_REQUIRES_ARM_NEON;
5597     for(size_t i = 9; i < 16; ++i){
5598       TransposeMicrokernelTester()
5599         .input_stride(16)
5600         .output_stride(i)
5601         .block_width(16)
5602         .block_height(i)
5603         .element_size(2)
5604         .iterations(1)
5605         .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5606     }
5607   }
5608 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_9_16_bw_9_16)5609   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_9_16_bw_9_16) {
5610     TEST_REQUIRES_ARM_NEON;
5611     for(size_t i = 9; i < 16; ++i){
5612       for(size_t j = 9; j < 16; ++j){
5613         TransposeMicrokernelTester()
5614           .input_stride(j)
5615           .output_stride(i)
5616           .block_width(j)
5617           .block_height(i)
5618           .element_size(2)
5619           .iterations(1)
5620           .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5621       }
5622     }
5623   }
5624 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_8_bw_8_is_16)5625   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_8_bw_8_is_16) {
5626     TEST_REQUIRES_ARM_NEON;
5627     TransposeMicrokernelTester()
5628       .input_stride(16)
5629       .output_stride(8)
5630       .block_width(8)
5631       .block_height(8)
5632       .element_size(2)
5633       .iterations(1)
5634       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5635   }
5636 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_8_bw_8_os_16)5637   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_8_bw_8_os_16) {
5638     TEST_REQUIRES_ARM_NEON;
5639     TransposeMicrokernelTester()
5640       .input_stride(8)
5641       .output_stride(16)
5642       .block_width(8)
5643       .block_height(8)
5644       .element_size(2)
5645       .iterations(1)
5646       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5647   }
5648 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_8_bw_8_is_16_os_16)5649   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_8_bw_8_is_16_os_16) {
5650     TEST_REQUIRES_ARM_NEON;
5651     TransposeMicrokernelTester()
5652       .input_stride(16)
5653       .output_stride(16)
5654       .block_width(8)
5655       .block_height(8)
5656       .element_size(2)
5657       .iterations(1)
5658       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5659   }
5660 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_136_bw_152_ies_13)5661   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_136_bw_152_ies_13) {
5662     TEST_REQUIRES_ARM_NEON;
5663     TransposeMicrokernelTester()
5664       .input_stride(152)
5665       .output_stride(136)
5666       .block_width(152)
5667       .block_height(136)
5668       .element_size(2)
5669       .input_element_stride(13)
5670       .iterations(1)
5671       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5672   }
5673 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_24_bw_40_oes_13)5674   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_24_bw_40_oes_13) {
5675     TEST_REQUIRES_ARM_NEON;
5676     TransposeMicrokernelTester()
5677       .input_stride(40)
5678       .output_stride(24)
5679       .block_width(40)
5680       .block_height(24)
5681       .element_size(2)
5682       .output_element_stride(13)
5683       .iterations(1)
5684       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5685   }
5686 
TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2,bh_56_bw_184_ies_19_oes_15)5687   TEST(X16_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_2, bh_56_bw_184_ies_19_oes_15) {
5688     TEST_REQUIRES_ARM_NEON;
5689     TransposeMicrokernelTester()
5690       .input_stride(189)
5691       .output_stride(62)
5692       .block_width(184)
5693       .block_height(56)
5694       .element_size(2)
5695       .input_element_stride(19)
5696       .output_element_stride(15)
5697       .iterations(1)
5698       .Test(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon);
5699   }
5700 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
5701 
5702 
5703 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_8_bw_8)5704   TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_8_bw_8) {
5705     TEST_REQUIRES_ARM_NEON;
5706     TransposeMicrokernelTester()
5707       .input_stride(16)
5708       .output_stride(16)
5709       .block_width(8)
5710       .block_height(8)
5711       .element_size(2)
5712       .iterations(1)
5713       .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5714   }
5715 
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_1_16_bw_1_16)5716   TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_1_16_bw_1_16) {
5717     TEST_REQUIRES_ARM_NEON;
5718     for(size_t i = 1; i <= 16; ++i){
5719       for(size_t j = 1; j <= 16; ++j){
5720         TransposeMicrokernelTester()
5721           .input_stride(j * 3)
5722           .output_stride(i * 7)
5723           .block_width(j)
5724           .block_height(i)
5725           .element_size(2)
5726           .iterations(1)
5727           .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5728       }
5729     }
5730   }
5731 
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_8_bw_16)5732   TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_8_bw_16) {
5733     TEST_REQUIRES_ARM_NEON;
5734     TransposeMicrokernelTester()
5735       .input_stride(16)
5736       .output_stride(8)
5737       .block_width(16)
5738       .block_height(8)
5739       .element_size(2)
5740       .iterations(1)
5741       .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5742   }
5743 
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_8_bw_9_16)5744   TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_8_bw_9_16) {
5745     TEST_REQUIRES_ARM_NEON;
5746     for(size_t i = 9; i < 16; ++i){
5747       TransposeMicrokernelTester()
5748         .input_stride(i)
5749         .output_stride(16)
5750         .block_width(i)
5751         .block_height(8)
5752         .element_size(2)
5753         .iterations(1)
5754         .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5755     }
5756   }
5757 
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_16_bw_9_16)5758   TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_16_bw_9_16) {
5759     TEST_REQUIRES_ARM_NEON;
5760     for(size_t i = 9; i < 16; ++i){
5761       TransposeMicrokernelTester()
5762         .input_stride(i)
5763         .output_stride(16)
5764         .block_width(i)
5765         .block_height(16)
5766         .element_size(2)
5767         .iterations(1)
5768         .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5769     }
5770   }
5771 
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_16_bw_8)5772   TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_16_bw_8) {
5773     TEST_REQUIRES_ARM_NEON;
5774     TransposeMicrokernelTester()
5775       .input_stride(8)
5776       .output_stride(28)
5777       .block_width(8)
5778       .block_height(16)
5779       .element_size(2)
5780       .iterations(1)
5781       .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5782   }
5783 
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_9_16_bw_8)5784   TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_9_16_bw_8){
5785     TEST_REQUIRES_ARM_NEON;
5786     for(size_t i = 9; i < 16; ++i){
5787       TransposeMicrokernelTester()
5788         .input_stride(25)
5789         .output_stride(i)
5790         .block_width(11)
5791         .block_height(i)
5792         .element_size(2)
5793         .iterations(1)
5794         .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5795     }
5796   }
5797 
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_9_16_bw_16)5798   TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_9_16_bw_16){
5799     TEST_REQUIRES_ARM_NEON;
5800     for(size_t i = 9; i < 16; ++i){
5801       TransposeMicrokernelTester()
5802         .input_stride(16)
5803         .output_stride(i)
5804         .block_width(16)
5805         .block_height(i)
5806         .element_size(2)
5807         .iterations(1)
5808         .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5809     }
5810   }
5811 
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_9_16_bw_9_16)5812   TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_9_16_bw_9_16) {
5813     TEST_REQUIRES_ARM_NEON;
5814     for(size_t i = 9; i < 16; ++i){
5815       for(size_t j = 9; j < 16; ++j){
5816         TransposeMicrokernelTester()
5817           .input_stride(j)
5818           .output_stride(i)
5819           .block_width(j)
5820           .block_height(i)
5821           .element_size(2)
5822           .iterations(1)
5823           .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5824       }
5825     }
5826   }
5827 
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_8_bw_8_is_16)5828   TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_8_bw_8_is_16) {
5829     TEST_REQUIRES_ARM_NEON;
5830     TransposeMicrokernelTester()
5831       .input_stride(16)
5832       .output_stride(8)
5833       .block_width(8)
5834       .block_height(8)
5835       .element_size(2)
5836       .iterations(1)
5837       .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5838   }
5839 
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_8_bw_8_os_16)5840   TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_8_bw_8_os_16) {
5841     TEST_REQUIRES_ARM_NEON;
5842     TransposeMicrokernelTester()
5843       .input_stride(8)
5844       .output_stride(16)
5845       .block_width(8)
5846       .block_height(8)
5847       .element_size(2)
5848       .iterations(1)
5849       .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5850   }
5851 
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_8_bw_8_is_16_os_16)5852   TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_8_bw_8_is_16_os_16) {
5853     TEST_REQUIRES_ARM_NEON;
5854     TransposeMicrokernelTester()
5855       .input_stride(16)
5856       .output_stride(16)
5857       .block_width(8)
5858       .block_height(8)
5859       .element_size(2)
5860       .iterations(1)
5861       .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5862   }
5863 
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_136_bw_152_ies_13)5864   TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_136_bw_152_ies_13) {
5865     TEST_REQUIRES_ARM_NEON;
5866     TransposeMicrokernelTester()
5867       .input_stride(152)
5868       .output_stride(136)
5869       .block_width(152)
5870       .block_height(136)
5871       .element_size(2)
5872       .input_element_stride(13)
5873       .iterations(1)
5874       .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5875   }
5876 
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_24_bw_40_oes_13)5877   TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_24_bw_40_oes_13) {
5878     TEST_REQUIRES_ARM_NEON;
5879     TransposeMicrokernelTester()
5880       .input_stride(40)
5881       .output_stride(24)
5882       .block_width(40)
5883       .block_height(24)
5884       .element_size(2)
5885       .output_element_stride(13)
5886       .iterations(1)
5887       .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5888   }
5889 
TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2,bh_56_bw_184_ies_19_oes_15)5890   TEST(X16_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_2, bh_56_bw_184_ies_19_oes_15) {
5891     TEST_REQUIRES_ARM_NEON;
5892     TransposeMicrokernelTester()
5893       .input_stride(189)
5894       .output_stride(62)
5895       .block_width(184)
5896       .block_height(56)
5897       .element_size(2)
5898       .input_element_stride(19)
5899       .output_element_stride(15)
5900       .iterations(1)
5901       .Test(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon);
5902   }
5903 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
5904 
5905 
5906 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_8_bw_8)5907   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_8_bw_8) {
5908     TEST_REQUIRES_ARM_NEON;
5909     TransposeMicrokernelTester()
5910       .input_stride(16)
5911       .output_stride(16)
5912       .block_width(8)
5913       .block_height(8)
5914       .element_size(2)
5915       .iterations(1)
5916       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
5917   }
5918 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_1_16_bw_1_16)5919   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_1_16_bw_1_16) {
5920     TEST_REQUIRES_ARM_NEON;
5921     for(size_t i = 1; i <= 16; ++i){
5922       for(size_t j = 1; j <= 16; ++j){
5923         TransposeMicrokernelTester()
5924           .input_stride(j * 3)
5925           .output_stride(i * 7)
5926           .block_width(j)
5927           .block_height(i)
5928           .element_size(2)
5929           .iterations(1)
5930           .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
5931       }
5932     }
5933   }
5934 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_8_bw_16)5935   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_8_bw_16) {
5936     TEST_REQUIRES_ARM_NEON;
5937     TransposeMicrokernelTester()
5938       .input_stride(16)
5939       .output_stride(8)
5940       .block_width(16)
5941       .block_height(8)
5942       .element_size(2)
5943       .iterations(1)
5944       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
5945   }
5946 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_8_bw_9_16)5947   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_8_bw_9_16) {
5948     TEST_REQUIRES_ARM_NEON;
5949     for(size_t i = 9; i < 16; ++i){
5950       TransposeMicrokernelTester()
5951         .input_stride(i)
5952         .output_stride(16)
5953         .block_width(i)
5954         .block_height(8)
5955         .element_size(2)
5956         .iterations(1)
5957         .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
5958     }
5959   }
5960 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_16_bw_9_16)5961   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_16_bw_9_16) {
5962     TEST_REQUIRES_ARM_NEON;
5963     for(size_t i = 9; i < 16; ++i){
5964       TransposeMicrokernelTester()
5965         .input_stride(i)
5966         .output_stride(16)
5967         .block_width(i)
5968         .block_height(16)
5969         .element_size(2)
5970         .iterations(1)
5971         .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
5972     }
5973   }
5974 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_16_bw_8)5975   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_16_bw_8) {
5976     TEST_REQUIRES_ARM_NEON;
5977     TransposeMicrokernelTester()
5978       .input_stride(8)
5979       .output_stride(28)
5980       .block_width(8)
5981       .block_height(16)
5982       .element_size(2)
5983       .iterations(1)
5984       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
5985   }
5986 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_9_16_bw_8)5987   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_9_16_bw_8){
5988     TEST_REQUIRES_ARM_NEON;
5989     for(size_t i = 9; i < 16; ++i){
5990       TransposeMicrokernelTester()
5991         .input_stride(25)
5992         .output_stride(i)
5993         .block_width(11)
5994         .block_height(i)
5995         .element_size(2)
5996         .iterations(1)
5997         .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
5998     }
5999   }
6000 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_9_16_bw_16)6001   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_9_16_bw_16){
6002     TEST_REQUIRES_ARM_NEON;
6003     for(size_t i = 9; i < 16; ++i){
6004       TransposeMicrokernelTester()
6005         .input_stride(16)
6006         .output_stride(i)
6007         .block_width(16)
6008         .block_height(i)
6009         .element_size(2)
6010         .iterations(1)
6011         .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
6012     }
6013   }
6014 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_9_16_bw_9_16)6015   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_9_16_bw_9_16) {
6016     TEST_REQUIRES_ARM_NEON;
6017     for(size_t i = 9; i < 16; ++i){
6018       for(size_t j = 9; j < 16; ++j){
6019         TransposeMicrokernelTester()
6020           .input_stride(j)
6021           .output_stride(i)
6022           .block_width(j)
6023           .block_height(i)
6024           .element_size(2)
6025           .iterations(1)
6026           .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
6027       }
6028     }
6029   }
6030 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_8_bw_8_is_16)6031   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_8_bw_8_is_16) {
6032     TEST_REQUIRES_ARM_NEON;
6033     TransposeMicrokernelTester()
6034       .input_stride(16)
6035       .output_stride(8)
6036       .block_width(8)
6037       .block_height(8)
6038       .element_size(2)
6039       .iterations(1)
6040       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
6041   }
6042 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_8_bw_8_os_16)6043   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_8_bw_8_os_16) {
6044     TEST_REQUIRES_ARM_NEON;
6045     TransposeMicrokernelTester()
6046       .input_stride(8)
6047       .output_stride(16)
6048       .block_width(8)
6049       .block_height(8)
6050       .element_size(2)
6051       .iterations(1)
6052       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
6053   }
6054 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_8_bw_8_is_16_os_16)6055   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_8_bw_8_is_16_os_16) {
6056     TEST_REQUIRES_ARM_NEON;
6057     TransposeMicrokernelTester()
6058       .input_stride(16)
6059       .output_stride(16)
6060       .block_width(8)
6061       .block_height(8)
6062       .element_size(2)
6063       .iterations(1)
6064       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
6065   }
6066 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_136_bw_152_ies_13)6067   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_136_bw_152_ies_13) {
6068     TEST_REQUIRES_ARM_NEON;
6069     TransposeMicrokernelTester()
6070       .input_stride(152)
6071       .output_stride(136)
6072       .block_width(152)
6073       .block_height(136)
6074       .element_size(2)
6075       .input_element_stride(13)
6076       .iterations(1)
6077       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
6078   }
6079 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_24_bw_40_oes_13)6080   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_24_bw_40_oes_13) {
6081     TEST_REQUIRES_ARM_NEON;
6082     TransposeMicrokernelTester()
6083       .input_stride(40)
6084       .output_stride(24)
6085       .block_width(40)
6086       .block_height(24)
6087       .element_size(2)
6088       .output_element_stride(13)
6089       .iterations(1)
6090       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
6091   }
6092 
TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2,bh_56_bw_184_ies_19_oes_15)6093   TEST(X16_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_2, bh_56_bw_184_ies_19_oes_15) {
6094     TEST_REQUIRES_ARM_NEON;
6095     TransposeMicrokernelTester()
6096       .input_stride(189)
6097       .output_stride(62)
6098       .block_width(184)
6099       .block_height(56)
6100       .element_size(2)
6101       .input_element_stride(19)
6102       .output_element_stride(15)
6103       .iterations(1)
6104       .Test(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon);
6105   }
6106 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
6107 
6108 
6109 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_8_bw_8)6110   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_8_bw_8) {
6111     TEST_REQUIRES_ARM_NEON;
6112     TransposeMicrokernelTester()
6113       .input_stride(16)
6114       .output_stride(16)
6115       .block_width(8)
6116       .block_height(8)
6117       .element_size(2)
6118       .iterations(1)
6119       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6120   }
6121 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_1_16_bw_1_16)6122   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_1_16_bw_1_16) {
6123     TEST_REQUIRES_ARM_NEON;
6124     for(size_t i = 1; i <= 16; ++i){
6125       for(size_t j = 1; j <= 16; ++j){
6126         TransposeMicrokernelTester()
6127           .input_stride(j * 3)
6128           .output_stride(i * 7)
6129           .block_width(j)
6130           .block_height(i)
6131           .element_size(2)
6132           .iterations(1)
6133           .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6134       }
6135     }
6136   }
6137 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_8_bw_16)6138   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_8_bw_16) {
6139     TEST_REQUIRES_ARM_NEON;
6140     TransposeMicrokernelTester()
6141       .input_stride(16)
6142       .output_stride(8)
6143       .block_width(16)
6144       .block_height(8)
6145       .element_size(2)
6146       .iterations(1)
6147       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6148   }
6149 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_8_bw_9_16)6150   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_8_bw_9_16) {
6151     TEST_REQUIRES_ARM_NEON;
6152     for(size_t i = 9; i < 16; ++i){
6153       TransposeMicrokernelTester()
6154         .input_stride(i)
6155         .output_stride(16)
6156         .block_width(i)
6157         .block_height(8)
6158         .element_size(2)
6159         .iterations(1)
6160         .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6161     }
6162   }
6163 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_16_bw_9_16)6164   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_16_bw_9_16) {
6165     TEST_REQUIRES_ARM_NEON;
6166     for(size_t i = 9; i < 16; ++i){
6167       TransposeMicrokernelTester()
6168         .input_stride(i)
6169         .output_stride(16)
6170         .block_width(i)
6171         .block_height(16)
6172         .element_size(2)
6173         .iterations(1)
6174         .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6175     }
6176   }
6177 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_16_bw_8)6178   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_16_bw_8) {
6179     TEST_REQUIRES_ARM_NEON;
6180     TransposeMicrokernelTester()
6181       .input_stride(8)
6182       .output_stride(28)
6183       .block_width(8)
6184       .block_height(16)
6185       .element_size(2)
6186       .iterations(1)
6187       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6188   }
6189 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_9_16_bw_8)6190   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_9_16_bw_8){
6191     TEST_REQUIRES_ARM_NEON;
6192     for(size_t i = 9; i < 16; ++i){
6193       TransposeMicrokernelTester()
6194         .input_stride(25)
6195         .output_stride(i)
6196         .block_width(11)
6197         .block_height(i)
6198         .element_size(2)
6199         .iterations(1)
6200         .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6201     }
6202   }
6203 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_9_16_bw_16)6204   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_9_16_bw_16){
6205     TEST_REQUIRES_ARM_NEON;
6206     for(size_t i = 9; i < 16; ++i){
6207       TransposeMicrokernelTester()
6208         .input_stride(16)
6209         .output_stride(i)
6210         .block_width(16)
6211         .block_height(i)
6212         .element_size(2)
6213         .iterations(1)
6214         .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6215     }
6216   }
6217 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_9_16_bw_9_16)6218   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_9_16_bw_9_16) {
6219     TEST_REQUIRES_ARM_NEON;
6220     for(size_t i = 9; i < 16; ++i){
6221       for(size_t j = 9; j < 16; ++j){
6222         TransposeMicrokernelTester()
6223           .input_stride(j)
6224           .output_stride(i)
6225           .block_width(j)
6226           .block_height(i)
6227           .element_size(2)
6228           .iterations(1)
6229           .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6230       }
6231     }
6232   }
6233 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_8_bw_8_is_16)6234   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_8_bw_8_is_16) {
6235     TEST_REQUIRES_ARM_NEON;
6236     TransposeMicrokernelTester()
6237       .input_stride(16)
6238       .output_stride(8)
6239       .block_width(8)
6240       .block_height(8)
6241       .element_size(2)
6242       .iterations(1)
6243       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6244   }
6245 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_8_bw_8_os_16)6246   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_8_bw_8_os_16) {
6247     TEST_REQUIRES_ARM_NEON;
6248     TransposeMicrokernelTester()
6249       .input_stride(8)
6250       .output_stride(16)
6251       .block_width(8)
6252       .block_height(8)
6253       .element_size(2)
6254       .iterations(1)
6255       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6256   }
6257 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_8_bw_8_is_16_os_16)6258   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_8_bw_8_is_16_os_16) {
6259     TEST_REQUIRES_ARM_NEON;
6260     TransposeMicrokernelTester()
6261       .input_stride(16)
6262       .output_stride(16)
6263       .block_width(8)
6264       .block_height(8)
6265       .element_size(2)
6266       .iterations(1)
6267       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6268   }
6269 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_136_bw_152_ies_13)6270   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_136_bw_152_ies_13) {
6271     TEST_REQUIRES_ARM_NEON;
6272     TransposeMicrokernelTester()
6273       .input_stride(152)
6274       .output_stride(136)
6275       .block_width(152)
6276       .block_height(136)
6277       .element_size(2)
6278       .input_element_stride(13)
6279       .iterations(1)
6280       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6281   }
6282 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_24_bw_40_oes_13)6283   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_24_bw_40_oes_13) {
6284     TEST_REQUIRES_ARM_NEON;
6285     TransposeMicrokernelTester()
6286       .input_stride(40)
6287       .output_stride(24)
6288       .block_width(40)
6289       .block_height(24)
6290       .element_size(2)
6291       .output_element_stride(13)
6292       .iterations(1)
6293       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6294   }
6295 
TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2,bh_56_bw_184_ies_19_oes_15)6296   TEST(X16_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_2, bh_56_bw_184_ies_19_oes_15) {
6297     TEST_REQUIRES_ARM_NEON;
6298     TransposeMicrokernelTester()
6299       .input_stride(189)
6300       .output_stride(62)
6301       .block_width(184)
6302       .block_height(56)
6303       .element_size(2)
6304       .input_element_stride(19)
6305       .output_element_stride(15)
6306       .iterations(1)
6307       .Test(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon);
6308   }
6309 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
6310 
6311 
6312 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_8_bw_8)6313   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_8_bw_8) {
6314     TEST_REQUIRES_ARM_NEON;
6315     TransposeMicrokernelTester()
6316       .input_stride(16)
6317       .output_stride(16)
6318       .block_width(8)
6319       .block_height(8)
6320       .element_size(2)
6321       .iterations(1)
6322       .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6323   }
6324 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_1_16_bw_1_16)6325   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_1_16_bw_1_16) {
6326     TEST_REQUIRES_ARM_NEON;
6327     for(size_t i = 1; i <= 16; ++i){
6328       for(size_t j = 1; j <= 16; ++j){
6329         TransposeMicrokernelTester()
6330           .input_stride(j * 3)
6331           .output_stride(i * 7)
6332           .block_width(j)
6333           .block_height(i)
6334           .element_size(2)
6335           .iterations(1)
6336           .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6337       }
6338     }
6339   }
6340 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_8_bw_16)6341   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_8_bw_16) {
6342     TEST_REQUIRES_ARM_NEON;
6343     TransposeMicrokernelTester()
6344       .input_stride(16)
6345       .output_stride(8)
6346       .block_width(16)
6347       .block_height(8)
6348       .element_size(2)
6349       .iterations(1)
6350       .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6351   }
6352 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_8_bw_9_16)6353   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_8_bw_9_16) {
6354     TEST_REQUIRES_ARM_NEON;
6355     for(size_t i = 9; i < 16; ++i){
6356       TransposeMicrokernelTester()
6357         .input_stride(i)
6358         .output_stride(16)
6359         .block_width(i)
6360         .block_height(8)
6361         .element_size(2)
6362         .iterations(1)
6363         .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6364     }
6365   }
6366 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_16_bw_9_16)6367   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_16_bw_9_16) {
6368     TEST_REQUIRES_ARM_NEON;
6369     for(size_t i = 9; i < 16; ++i){
6370       TransposeMicrokernelTester()
6371         .input_stride(i)
6372         .output_stride(16)
6373         .block_width(i)
6374         .block_height(16)
6375         .element_size(2)
6376         .iterations(1)
6377         .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6378     }
6379   }
6380 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_16_bw_8)6381   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_16_bw_8) {
6382     TEST_REQUIRES_ARM_NEON;
6383     TransposeMicrokernelTester()
6384       .input_stride(8)
6385       .output_stride(28)
6386       .block_width(8)
6387       .block_height(16)
6388       .element_size(2)
6389       .iterations(1)
6390       .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6391   }
6392 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_9_16_bw_8)6393   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_9_16_bw_8){
6394     TEST_REQUIRES_ARM_NEON;
6395     for(size_t i = 9; i < 16; ++i){
6396       TransposeMicrokernelTester()
6397         .input_stride(25)
6398         .output_stride(i)
6399         .block_width(11)
6400         .block_height(i)
6401         .element_size(2)
6402         .iterations(1)
6403         .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6404     }
6405   }
6406 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_9_16_bw_16)6407   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_9_16_bw_16){
6408     TEST_REQUIRES_ARM_NEON;
6409     for(size_t i = 9; i < 16; ++i){
6410       TransposeMicrokernelTester()
6411         .input_stride(16)
6412         .output_stride(i)
6413         .block_width(16)
6414         .block_height(i)
6415         .element_size(2)
6416         .iterations(1)
6417         .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6418     }
6419   }
6420 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_9_16_bw_9_16)6421   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_9_16_bw_9_16) {
6422     TEST_REQUIRES_ARM_NEON;
6423     for(size_t i = 9; i < 16; ++i){
6424       for(size_t j = 9; j < 16; ++j){
6425         TransposeMicrokernelTester()
6426           .input_stride(j)
6427           .output_stride(i)
6428           .block_width(j)
6429           .block_height(i)
6430           .element_size(2)
6431           .iterations(1)
6432           .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6433       }
6434     }
6435   }
6436 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_8_bw_8_is_16)6437   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_8_bw_8_is_16) {
6438     TEST_REQUIRES_ARM_NEON;
6439     TransposeMicrokernelTester()
6440       .input_stride(16)
6441       .output_stride(8)
6442       .block_width(8)
6443       .block_height(8)
6444       .element_size(2)
6445       .iterations(1)
6446       .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6447   }
6448 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_8_bw_8_os_16)6449   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_8_bw_8_os_16) {
6450     TEST_REQUIRES_ARM_NEON;
6451     TransposeMicrokernelTester()
6452       .input_stride(8)
6453       .output_stride(16)
6454       .block_width(8)
6455       .block_height(8)
6456       .element_size(2)
6457       .iterations(1)
6458       .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6459   }
6460 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_8_bw_8_is_16_os_16)6461   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_8_bw_8_is_16_os_16) {
6462     TEST_REQUIRES_ARM_NEON;
6463     TransposeMicrokernelTester()
6464       .input_stride(16)
6465       .output_stride(16)
6466       .block_width(8)
6467       .block_height(8)
6468       .element_size(2)
6469       .iterations(1)
6470       .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6471   }
6472 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_136_bw_152_ies_13)6473   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_136_bw_152_ies_13) {
6474     TEST_REQUIRES_ARM_NEON;
6475     TransposeMicrokernelTester()
6476       .input_stride(152)
6477       .output_stride(136)
6478       .block_width(152)
6479       .block_height(136)
6480       .element_size(2)
6481       .input_element_stride(13)
6482       .iterations(1)
6483       .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6484   }
6485 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_24_bw_40_oes_13)6486   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_24_bw_40_oes_13) {
6487     TEST_REQUIRES_ARM_NEON;
6488     TransposeMicrokernelTester()
6489       .input_stride(40)
6490       .output_stride(24)
6491       .block_width(40)
6492       .block_height(24)
6493       .element_size(2)
6494       .output_element_stride(13)
6495       .iterations(1)
6496       .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6497   }
6498 
TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2,bh_56_bw_184_ies_19_oes_15)6499   TEST(X16_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_2, bh_56_bw_184_ies_19_oes_15) {
6500     TEST_REQUIRES_ARM_NEON;
6501     TransposeMicrokernelTester()
6502       .input_stride(189)
6503       .output_stride(62)
6504       .block_width(184)
6505       .block_height(56)
6506       .element_size(2)
6507       .input_element_stride(19)
6508       .output_element_stride(15)
6509       .iterations(1)
6510       .Test(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon);
6511   }
6512 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
6513