xref: /aosp_15_r20/external/XNNPACK/test/f32-prelu.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/f32-prelu.yaml
8 //   Generator: tools/generate-prelu-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/prelu.h>
17 #include "prelu-microkernel-tester.h"
18 
19 
20 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_PRELU__NEON_1X4,channels_eq_4)21   TEST(F32_PRELU__NEON_1X4, channels_eq_4) {
22     TEST_REQUIRES_ARM_NEON;
23     PReLUMicrokernelTester()
24       .rows(1)
25       .channels(4)
26       .Test(xnn_f32_prelu_ukernel__neon_1x4);
27   }
28 
TEST(F32_PRELU__NEON_1X4,channels_div_4)29   TEST(F32_PRELU__NEON_1X4, channels_div_4) {
30     TEST_REQUIRES_ARM_NEON;
31     for (size_t channels = 8; channels < 40; channels += 4) {
32       PReLUMicrokernelTester()
33         .rows(1)
34         .channels(channels)
35         .Test(xnn_f32_prelu_ukernel__neon_1x4);
36     }
37   }
38 
TEST(F32_PRELU__NEON_1X4,channels_lt_4)39   TEST(F32_PRELU__NEON_1X4, channels_lt_4) {
40     TEST_REQUIRES_ARM_NEON;
41     for (size_t channels = 1; channels < 4; channels++) {
42       PReLUMicrokernelTester()
43         .rows(1)
44         .channels(channels)
45         .Test(xnn_f32_prelu_ukernel__neon_1x4);
46     }
47   }
48 
TEST(F32_PRELU__NEON_1X4,channels_gt_4)49   TEST(F32_PRELU__NEON_1X4, channels_gt_4) {
50     TEST_REQUIRES_ARM_NEON;
51     for (size_t channels = 5; channels < 8; channels++) {
52       PReLUMicrokernelTester()
53         .rows(1)
54         .channels(channels)
55         .Test(xnn_f32_prelu_ukernel__neon_1x4);
56     }
57   }
58 
TEST(F32_PRELU__NEON_1X4,rows_gt_1)59   TEST(F32_PRELU__NEON_1X4, rows_gt_1) {
60     TEST_REQUIRES_ARM_NEON;
61     for (size_t rows = 2; rows < 2; rows++) {
62       for (size_t channels = 1; channels <= 20; channels += 3) {
63         PReLUMicrokernelTester()
64           .rows(rows)
65           .channels(channels)
66           .Test(xnn_f32_prelu_ukernel__neon_1x4);
67       }
68     }
69   }
70 
TEST(F32_PRELU__NEON_1X4,input_stride)71   TEST(F32_PRELU__NEON_1X4, input_stride) {
72     TEST_REQUIRES_ARM_NEON;
73     for (size_t rows = 1; rows <= 3; rows += 1) {
74       for (size_t channels = 1; channels <= 20; channels += 3) {
75         PReLUMicrokernelTester()
76           .rows(rows)
77           .channels(channels)
78           .input_stride(23)
79           .iterations(1)
80           .Test(xnn_f32_prelu_ukernel__neon_1x4);
81       }
82     }
83   }
84 
TEST(F32_PRELU__NEON_1X4,output_stride)85   TEST(F32_PRELU__NEON_1X4, output_stride) {
86     TEST_REQUIRES_ARM_NEON;
87     for (size_t rows = 1; rows <= 3; rows += 1) {
88       for (size_t channels = 1; channels <= 20; channels += 3) {
89         PReLUMicrokernelTester()
90           .rows(rows)
91           .channels(channels)
92           .output_stride(23)
93           .iterations(1)
94           .Test(xnn_f32_prelu_ukernel__neon_1x4);
95       }
96     }
97   }
98 
TEST(F32_PRELU__NEON_1X4,inplace)99   TEST(F32_PRELU__NEON_1X4, inplace) {
100     TEST_REQUIRES_ARM_NEON;
101     for (size_t rows = 1; rows <= 3; rows += 1) {
102       for (size_t channels = 1; channels <= 20; channels += 3) {
103         PReLUMicrokernelTester()
104           .rows(rows)
105           .channels(channels)
106           .inplace(true)
107           .iterations(1)
108           .Test(xnn_f32_prelu_ukernel__neon_1x4);
109       }
110     }
111   }
112 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
113 
114 
115 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_PRELU__NEON_1X8,channels_eq_8)116   TEST(F32_PRELU__NEON_1X8, channels_eq_8) {
117     TEST_REQUIRES_ARM_NEON;
118     PReLUMicrokernelTester()
119       .rows(1)
120       .channels(8)
121       .Test(xnn_f32_prelu_ukernel__neon_1x8);
122   }
123 
TEST(F32_PRELU__NEON_1X8,channels_div_8)124   TEST(F32_PRELU__NEON_1X8, channels_div_8) {
125     TEST_REQUIRES_ARM_NEON;
126     for (size_t channels = 16; channels < 80; channels += 8) {
127       PReLUMicrokernelTester()
128         .rows(1)
129         .channels(channels)
130         .Test(xnn_f32_prelu_ukernel__neon_1x8);
131     }
132   }
133 
TEST(F32_PRELU__NEON_1X8,channels_lt_8)134   TEST(F32_PRELU__NEON_1X8, channels_lt_8) {
135     TEST_REQUIRES_ARM_NEON;
136     for (size_t channels = 1; channels < 8; channels++) {
137       PReLUMicrokernelTester()
138         .rows(1)
139         .channels(channels)
140         .Test(xnn_f32_prelu_ukernel__neon_1x8);
141     }
142   }
143 
TEST(F32_PRELU__NEON_1X8,channels_gt_8)144   TEST(F32_PRELU__NEON_1X8, channels_gt_8) {
145     TEST_REQUIRES_ARM_NEON;
146     for (size_t channels = 9; channels < 16; channels++) {
147       PReLUMicrokernelTester()
148         .rows(1)
149         .channels(channels)
150         .Test(xnn_f32_prelu_ukernel__neon_1x8);
151     }
152   }
153 
TEST(F32_PRELU__NEON_1X8,rows_gt_1)154   TEST(F32_PRELU__NEON_1X8, rows_gt_1) {
155     TEST_REQUIRES_ARM_NEON;
156     for (size_t rows = 2; rows < 2; rows++) {
157       for (size_t channels = 1; channels <= 40; channels += 7) {
158         PReLUMicrokernelTester()
159           .rows(rows)
160           .channels(channels)
161           .Test(xnn_f32_prelu_ukernel__neon_1x8);
162       }
163     }
164   }
165 
TEST(F32_PRELU__NEON_1X8,input_stride)166   TEST(F32_PRELU__NEON_1X8, input_stride) {
167     TEST_REQUIRES_ARM_NEON;
168     for (size_t rows = 1; rows <= 3; rows += 1) {
169       for (size_t channels = 1; channels <= 40; channels += 7) {
170         PReLUMicrokernelTester()
171           .rows(rows)
172           .channels(channels)
173           .input_stride(43)
174           .iterations(1)
175           .Test(xnn_f32_prelu_ukernel__neon_1x8);
176       }
177     }
178   }
179 
TEST(F32_PRELU__NEON_1X8,output_stride)180   TEST(F32_PRELU__NEON_1X8, output_stride) {
181     TEST_REQUIRES_ARM_NEON;
182     for (size_t rows = 1; rows <= 3; rows += 1) {
183       for (size_t channels = 1; channels <= 40; channels += 7) {
184         PReLUMicrokernelTester()
185           .rows(rows)
186           .channels(channels)
187           .output_stride(43)
188           .iterations(1)
189           .Test(xnn_f32_prelu_ukernel__neon_1x8);
190       }
191     }
192   }
193 
TEST(F32_PRELU__NEON_1X8,inplace)194   TEST(F32_PRELU__NEON_1X8, inplace) {
195     TEST_REQUIRES_ARM_NEON;
196     for (size_t rows = 1; rows <= 3; rows += 1) {
197       for (size_t channels = 1; channels <= 40; channels += 7) {
198         PReLUMicrokernelTester()
199           .rows(rows)
200           .channels(channels)
201           .inplace(true)
202           .iterations(1)
203           .Test(xnn_f32_prelu_ukernel__neon_1x8);
204       }
205     }
206   }
207 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
208 
209 
210 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_PRELU__NEON_1X16,channels_eq_16)211   TEST(F32_PRELU__NEON_1X16, channels_eq_16) {
212     TEST_REQUIRES_ARM_NEON;
213     PReLUMicrokernelTester()
214       .rows(1)
215       .channels(16)
216       .Test(xnn_f32_prelu_ukernel__neon_1x16);
217   }
218 
TEST(F32_PRELU__NEON_1X16,channels_div_16)219   TEST(F32_PRELU__NEON_1X16, channels_div_16) {
220     TEST_REQUIRES_ARM_NEON;
221     for (size_t channels = 32; channels < 160; channels += 16) {
222       PReLUMicrokernelTester()
223         .rows(1)
224         .channels(channels)
225         .Test(xnn_f32_prelu_ukernel__neon_1x16);
226     }
227   }
228 
TEST(F32_PRELU__NEON_1X16,channels_lt_16)229   TEST(F32_PRELU__NEON_1X16, channels_lt_16) {
230     TEST_REQUIRES_ARM_NEON;
231     for (size_t channels = 1; channels < 16; channels++) {
232       PReLUMicrokernelTester()
233         .rows(1)
234         .channels(channels)
235         .Test(xnn_f32_prelu_ukernel__neon_1x16);
236     }
237   }
238 
TEST(F32_PRELU__NEON_1X16,channels_gt_16)239   TEST(F32_PRELU__NEON_1X16, channels_gt_16) {
240     TEST_REQUIRES_ARM_NEON;
241     for (size_t channels = 17; channels < 32; channels++) {
242       PReLUMicrokernelTester()
243         .rows(1)
244         .channels(channels)
245         .Test(xnn_f32_prelu_ukernel__neon_1x16);
246     }
247   }
248 
TEST(F32_PRELU__NEON_1X16,rows_gt_1)249   TEST(F32_PRELU__NEON_1X16, rows_gt_1) {
250     TEST_REQUIRES_ARM_NEON;
251     for (size_t rows = 2; rows < 2; rows++) {
252       for (size_t channels = 1; channels <= 80; channels += 15) {
253         PReLUMicrokernelTester()
254           .rows(rows)
255           .channels(channels)
256           .Test(xnn_f32_prelu_ukernel__neon_1x16);
257       }
258     }
259   }
260 
TEST(F32_PRELU__NEON_1X16,input_stride)261   TEST(F32_PRELU__NEON_1X16, input_stride) {
262     TEST_REQUIRES_ARM_NEON;
263     for (size_t rows = 1; rows <= 3; rows += 1) {
264       for (size_t channels = 1; channels <= 80; channels += 15) {
265         PReLUMicrokernelTester()
266           .rows(rows)
267           .channels(channels)
268           .input_stride(83)
269           .iterations(1)
270           .Test(xnn_f32_prelu_ukernel__neon_1x16);
271       }
272     }
273   }
274 
TEST(F32_PRELU__NEON_1X16,output_stride)275   TEST(F32_PRELU__NEON_1X16, output_stride) {
276     TEST_REQUIRES_ARM_NEON;
277     for (size_t rows = 1; rows <= 3; rows += 1) {
278       for (size_t channels = 1; channels <= 80; channels += 15) {
279         PReLUMicrokernelTester()
280           .rows(rows)
281           .channels(channels)
282           .output_stride(83)
283           .iterations(1)
284           .Test(xnn_f32_prelu_ukernel__neon_1x16);
285       }
286     }
287   }
288 
TEST(F32_PRELU__NEON_1X16,inplace)289   TEST(F32_PRELU__NEON_1X16, inplace) {
290     TEST_REQUIRES_ARM_NEON;
291     for (size_t rows = 1; rows <= 3; rows += 1) {
292       for (size_t channels = 1; channels <= 80; channels += 15) {
293         PReLUMicrokernelTester()
294           .rows(rows)
295           .channels(channels)
296           .inplace(true)
297           .iterations(1)
298           .Test(xnn_f32_prelu_ukernel__neon_1x16);
299       }
300     }
301   }
302 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
303 
304 
305 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_PRELU__NEON_2X4,channels_eq_4)306   TEST(F32_PRELU__NEON_2X4, channels_eq_4) {
307     TEST_REQUIRES_ARM_NEON;
308     PReLUMicrokernelTester()
309       .rows(2)
310       .channels(4)
311       .Test(xnn_f32_prelu_ukernel__neon_2x4);
312   }
313 
TEST(F32_PRELU__NEON_2X4,channels_div_4)314   TEST(F32_PRELU__NEON_2X4, channels_div_4) {
315     TEST_REQUIRES_ARM_NEON;
316     for (size_t channels = 8; channels < 40; channels += 4) {
317       PReLUMicrokernelTester()
318         .rows(2)
319         .channels(channels)
320         .Test(xnn_f32_prelu_ukernel__neon_2x4);
321     }
322   }
323 
TEST(F32_PRELU__NEON_2X4,channels_lt_4)324   TEST(F32_PRELU__NEON_2X4, channels_lt_4) {
325     TEST_REQUIRES_ARM_NEON;
326     for (size_t channels = 1; channels < 4; channels++) {
327       PReLUMicrokernelTester()
328         .rows(2)
329         .channels(channels)
330         .Test(xnn_f32_prelu_ukernel__neon_2x4);
331     }
332   }
333 
TEST(F32_PRELU__NEON_2X4,channels_gt_4)334   TEST(F32_PRELU__NEON_2X4, channels_gt_4) {
335     TEST_REQUIRES_ARM_NEON;
336     for (size_t channels = 5; channels < 8; channels++) {
337       PReLUMicrokernelTester()
338         .rows(2)
339         .channels(channels)
340         .Test(xnn_f32_prelu_ukernel__neon_2x4);
341     }
342   }
343 
TEST(F32_PRELU__NEON_2X4,rows_lt_2)344   TEST(F32_PRELU__NEON_2X4, rows_lt_2) {
345     TEST_REQUIRES_ARM_NEON;
346     for (size_t rows = 1; rows < 2; rows++) {
347       for (size_t channels = 1; channels <= 20; channels += 3) {
348         PReLUMicrokernelTester()
349           .rows(rows)
350           .channels(channels)
351           .Test(xnn_f32_prelu_ukernel__neon_2x4);
352       }
353     }
354   }
355 
TEST(F32_PRELU__NEON_2X4,rows_div_2)356   TEST(F32_PRELU__NEON_2X4, rows_div_2) {
357     TEST_REQUIRES_ARM_NEON;
358     for (size_t rows = 4; rows <= 8; rows += 2) {
359       for (size_t channels = 1; channels <= 20; channels += 3) {
360         PReLUMicrokernelTester()
361           .rows(rows)
362           .channels(channels)
363           .Test(xnn_f32_prelu_ukernel__neon_2x4);
364       }
365     }
366   }
367 
TEST(F32_PRELU__NEON_2X4,rows_gt_2)368   TEST(F32_PRELU__NEON_2X4, rows_gt_2) {
369     TEST_REQUIRES_ARM_NEON;
370     for (size_t rows = 3; rows < 4; rows++) {
371       for (size_t channels = 1; channels <= 20; channels += 3) {
372         PReLUMicrokernelTester()
373           .rows(rows)
374           .channels(channels)
375           .Test(xnn_f32_prelu_ukernel__neon_2x4);
376       }
377     }
378   }
379 
TEST(F32_PRELU__NEON_2X4,input_stride)380   TEST(F32_PRELU__NEON_2X4, input_stride) {
381     TEST_REQUIRES_ARM_NEON;
382     for (size_t rows = 1; rows <= 6; rows += 1) {
383       for (size_t channels = 1; channels <= 20; channels += 3) {
384         PReLUMicrokernelTester()
385           .rows(rows)
386           .channels(channels)
387           .input_stride(23)
388           .iterations(1)
389           .Test(xnn_f32_prelu_ukernel__neon_2x4);
390       }
391     }
392   }
393 
TEST(F32_PRELU__NEON_2X4,output_stride)394   TEST(F32_PRELU__NEON_2X4, output_stride) {
395     TEST_REQUIRES_ARM_NEON;
396     for (size_t rows = 1; rows <= 6; rows += 1) {
397       for (size_t channels = 1; channels <= 20; channels += 3) {
398         PReLUMicrokernelTester()
399           .rows(rows)
400           .channels(channels)
401           .output_stride(23)
402           .iterations(1)
403           .Test(xnn_f32_prelu_ukernel__neon_2x4);
404       }
405     }
406   }
407 
TEST(F32_PRELU__NEON_2X4,inplace)408   TEST(F32_PRELU__NEON_2X4, inplace) {
409     TEST_REQUIRES_ARM_NEON;
410     for (size_t rows = 1; rows <= 6; rows += 1) {
411       for (size_t channels = 1; channels <= 20; channels += 3) {
412         PReLUMicrokernelTester()
413           .rows(rows)
414           .channels(channels)
415           .inplace(true)
416           .iterations(1)
417           .Test(xnn_f32_prelu_ukernel__neon_2x4);
418       }
419     }
420   }
421 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
422 
423 
424 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_PRELU__NEON_2X8,channels_eq_8)425   TEST(F32_PRELU__NEON_2X8, channels_eq_8) {
426     TEST_REQUIRES_ARM_NEON;
427     PReLUMicrokernelTester()
428       .rows(2)
429       .channels(8)
430       .Test(xnn_f32_prelu_ukernel__neon_2x8);
431   }
432 
TEST(F32_PRELU__NEON_2X8,channels_div_8)433   TEST(F32_PRELU__NEON_2X8, channels_div_8) {
434     TEST_REQUIRES_ARM_NEON;
435     for (size_t channels = 16; channels < 80; channels += 8) {
436       PReLUMicrokernelTester()
437         .rows(2)
438         .channels(channels)
439         .Test(xnn_f32_prelu_ukernel__neon_2x8);
440     }
441   }
442 
TEST(F32_PRELU__NEON_2X8,channels_lt_8)443   TEST(F32_PRELU__NEON_2X8, channels_lt_8) {
444     TEST_REQUIRES_ARM_NEON;
445     for (size_t channels = 1; channels < 8; channels++) {
446       PReLUMicrokernelTester()
447         .rows(2)
448         .channels(channels)
449         .Test(xnn_f32_prelu_ukernel__neon_2x8);
450     }
451   }
452 
TEST(F32_PRELU__NEON_2X8,channels_gt_8)453   TEST(F32_PRELU__NEON_2X8, channels_gt_8) {
454     TEST_REQUIRES_ARM_NEON;
455     for (size_t channels = 9; channels < 16; channels++) {
456       PReLUMicrokernelTester()
457         .rows(2)
458         .channels(channels)
459         .Test(xnn_f32_prelu_ukernel__neon_2x8);
460     }
461   }
462 
TEST(F32_PRELU__NEON_2X8,rows_lt_2)463   TEST(F32_PRELU__NEON_2X8, rows_lt_2) {
464     TEST_REQUIRES_ARM_NEON;
465     for (size_t rows = 1; rows < 2; rows++) {
466       for (size_t channels = 1; channels <= 40; channels += 7) {
467         PReLUMicrokernelTester()
468           .rows(rows)
469           .channels(channels)
470           .Test(xnn_f32_prelu_ukernel__neon_2x8);
471       }
472     }
473   }
474 
TEST(F32_PRELU__NEON_2X8,rows_div_2)475   TEST(F32_PRELU__NEON_2X8, rows_div_2) {
476     TEST_REQUIRES_ARM_NEON;
477     for (size_t rows = 4; rows <= 8; rows += 2) {
478       for (size_t channels = 1; channels <= 40; channels += 7) {
479         PReLUMicrokernelTester()
480           .rows(rows)
481           .channels(channels)
482           .Test(xnn_f32_prelu_ukernel__neon_2x8);
483       }
484     }
485   }
486 
TEST(F32_PRELU__NEON_2X8,rows_gt_2)487   TEST(F32_PRELU__NEON_2X8, rows_gt_2) {
488     TEST_REQUIRES_ARM_NEON;
489     for (size_t rows = 3; rows < 4; rows++) {
490       for (size_t channels = 1; channels <= 40; channels += 7) {
491         PReLUMicrokernelTester()
492           .rows(rows)
493           .channels(channels)
494           .Test(xnn_f32_prelu_ukernel__neon_2x8);
495       }
496     }
497   }
498 
TEST(F32_PRELU__NEON_2X8,input_stride)499   TEST(F32_PRELU__NEON_2X8, input_stride) {
500     TEST_REQUIRES_ARM_NEON;
501     for (size_t rows = 1; rows <= 6; rows += 1) {
502       for (size_t channels = 1; channels <= 40; channels += 7) {
503         PReLUMicrokernelTester()
504           .rows(rows)
505           .channels(channels)
506           .input_stride(43)
507           .iterations(1)
508           .Test(xnn_f32_prelu_ukernel__neon_2x8);
509       }
510     }
511   }
512 
TEST(F32_PRELU__NEON_2X8,output_stride)513   TEST(F32_PRELU__NEON_2X8, output_stride) {
514     TEST_REQUIRES_ARM_NEON;
515     for (size_t rows = 1; rows <= 6; rows += 1) {
516       for (size_t channels = 1; channels <= 40; channels += 7) {
517         PReLUMicrokernelTester()
518           .rows(rows)
519           .channels(channels)
520           .output_stride(43)
521           .iterations(1)
522           .Test(xnn_f32_prelu_ukernel__neon_2x8);
523       }
524     }
525   }
526 
TEST(F32_PRELU__NEON_2X8,inplace)527   TEST(F32_PRELU__NEON_2X8, inplace) {
528     TEST_REQUIRES_ARM_NEON;
529     for (size_t rows = 1; rows <= 6; rows += 1) {
530       for (size_t channels = 1; channels <= 40; channels += 7) {
531         PReLUMicrokernelTester()
532           .rows(rows)
533           .channels(channels)
534           .inplace(true)
535           .iterations(1)
536           .Test(xnn_f32_prelu_ukernel__neon_2x8);
537       }
538     }
539   }
540 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
541 
542 
543 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_PRELU__NEON_2X16,channels_eq_16)544   TEST(F32_PRELU__NEON_2X16, channels_eq_16) {
545     TEST_REQUIRES_ARM_NEON;
546     PReLUMicrokernelTester()
547       .rows(2)
548       .channels(16)
549       .Test(xnn_f32_prelu_ukernel__neon_2x16);
550   }
551 
TEST(F32_PRELU__NEON_2X16,channels_div_16)552   TEST(F32_PRELU__NEON_2X16, channels_div_16) {
553     TEST_REQUIRES_ARM_NEON;
554     for (size_t channels = 32; channels < 160; channels += 16) {
555       PReLUMicrokernelTester()
556         .rows(2)
557         .channels(channels)
558         .Test(xnn_f32_prelu_ukernel__neon_2x16);
559     }
560   }
561 
TEST(F32_PRELU__NEON_2X16,channels_lt_16)562   TEST(F32_PRELU__NEON_2X16, channels_lt_16) {
563     TEST_REQUIRES_ARM_NEON;
564     for (size_t channels = 1; channels < 16; channels++) {
565       PReLUMicrokernelTester()
566         .rows(2)
567         .channels(channels)
568         .Test(xnn_f32_prelu_ukernel__neon_2x16);
569     }
570   }
571 
TEST(F32_PRELU__NEON_2X16,channels_gt_16)572   TEST(F32_PRELU__NEON_2X16, channels_gt_16) {
573     TEST_REQUIRES_ARM_NEON;
574     for (size_t channels = 17; channels < 32; channels++) {
575       PReLUMicrokernelTester()
576         .rows(2)
577         .channels(channels)
578         .Test(xnn_f32_prelu_ukernel__neon_2x16);
579     }
580   }
581 
TEST(F32_PRELU__NEON_2X16,rows_lt_2)582   TEST(F32_PRELU__NEON_2X16, rows_lt_2) {
583     TEST_REQUIRES_ARM_NEON;
584     for (size_t rows = 1; rows < 2; rows++) {
585       for (size_t channels = 1; channels <= 80; channels += 15) {
586         PReLUMicrokernelTester()
587           .rows(rows)
588           .channels(channels)
589           .Test(xnn_f32_prelu_ukernel__neon_2x16);
590       }
591     }
592   }
593 
TEST(F32_PRELU__NEON_2X16,rows_div_2)594   TEST(F32_PRELU__NEON_2X16, rows_div_2) {
595     TEST_REQUIRES_ARM_NEON;
596     for (size_t rows = 4; rows <= 8; rows += 2) {
597       for (size_t channels = 1; channels <= 80; channels += 15) {
598         PReLUMicrokernelTester()
599           .rows(rows)
600           .channels(channels)
601           .Test(xnn_f32_prelu_ukernel__neon_2x16);
602       }
603     }
604   }
605 
TEST(F32_PRELU__NEON_2X16,rows_gt_2)606   TEST(F32_PRELU__NEON_2X16, rows_gt_2) {
607     TEST_REQUIRES_ARM_NEON;
608     for (size_t rows = 3; rows < 4; rows++) {
609       for (size_t channels = 1; channels <= 80; channels += 15) {
610         PReLUMicrokernelTester()
611           .rows(rows)
612           .channels(channels)
613           .Test(xnn_f32_prelu_ukernel__neon_2x16);
614       }
615     }
616   }
617 
TEST(F32_PRELU__NEON_2X16,input_stride)618   TEST(F32_PRELU__NEON_2X16, input_stride) {
619     TEST_REQUIRES_ARM_NEON;
620     for (size_t rows = 1; rows <= 6; rows += 1) {
621       for (size_t channels = 1; channels <= 80; channels += 15) {
622         PReLUMicrokernelTester()
623           .rows(rows)
624           .channels(channels)
625           .input_stride(83)
626           .iterations(1)
627           .Test(xnn_f32_prelu_ukernel__neon_2x16);
628       }
629     }
630   }
631 
TEST(F32_PRELU__NEON_2X16,output_stride)632   TEST(F32_PRELU__NEON_2X16, output_stride) {
633     TEST_REQUIRES_ARM_NEON;
634     for (size_t rows = 1; rows <= 6; rows += 1) {
635       for (size_t channels = 1; channels <= 80; channels += 15) {
636         PReLUMicrokernelTester()
637           .rows(rows)
638           .channels(channels)
639           .output_stride(83)
640           .iterations(1)
641           .Test(xnn_f32_prelu_ukernel__neon_2x16);
642       }
643     }
644   }
645 
TEST(F32_PRELU__NEON_2X16,inplace)646   TEST(F32_PRELU__NEON_2X16, inplace) {
647     TEST_REQUIRES_ARM_NEON;
648     for (size_t rows = 1; rows <= 6; rows += 1) {
649       for (size_t channels = 1; channels <= 80; channels += 15) {
650         PReLUMicrokernelTester()
651           .rows(rows)
652           .channels(channels)
653           .inplace(true)
654           .iterations(1)
655           .Test(xnn_f32_prelu_ukernel__neon_2x16);
656       }
657     }
658   }
659 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
660 
661 
662 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_PRELU__NEON_4X4,channels_eq_4)663   TEST(F32_PRELU__NEON_4X4, channels_eq_4) {
664     TEST_REQUIRES_ARM_NEON;
665     PReLUMicrokernelTester()
666       .rows(4)
667       .channels(4)
668       .Test(xnn_f32_prelu_ukernel__neon_4x4);
669   }
670 
TEST(F32_PRELU__NEON_4X4,channels_div_4)671   TEST(F32_PRELU__NEON_4X4, channels_div_4) {
672     TEST_REQUIRES_ARM_NEON;
673     for (size_t channels = 8; channels < 40; channels += 4) {
674       PReLUMicrokernelTester()
675         .rows(4)
676         .channels(channels)
677         .Test(xnn_f32_prelu_ukernel__neon_4x4);
678     }
679   }
680 
TEST(F32_PRELU__NEON_4X4,channels_lt_4)681   TEST(F32_PRELU__NEON_4X4, channels_lt_4) {
682     TEST_REQUIRES_ARM_NEON;
683     for (size_t channels = 1; channels < 4; channels++) {
684       PReLUMicrokernelTester()
685         .rows(4)
686         .channels(channels)
687         .Test(xnn_f32_prelu_ukernel__neon_4x4);
688     }
689   }
690 
TEST(F32_PRELU__NEON_4X4,channels_gt_4)691   TEST(F32_PRELU__NEON_4X4, channels_gt_4) {
692     TEST_REQUIRES_ARM_NEON;
693     for (size_t channels = 5; channels < 8; channels++) {
694       PReLUMicrokernelTester()
695         .rows(4)
696         .channels(channels)
697         .Test(xnn_f32_prelu_ukernel__neon_4x4);
698     }
699   }
700 
TEST(F32_PRELU__NEON_4X4,rows_lt_4)701   TEST(F32_PRELU__NEON_4X4, rows_lt_4) {
702     TEST_REQUIRES_ARM_NEON;
703     for (size_t rows = 1; rows < 4; rows++) {
704       for (size_t channels = 1; channels <= 20; channels += 3) {
705         PReLUMicrokernelTester()
706           .rows(rows)
707           .channels(channels)
708           .Test(xnn_f32_prelu_ukernel__neon_4x4);
709       }
710     }
711   }
712 
TEST(F32_PRELU__NEON_4X4,rows_div_4)713   TEST(F32_PRELU__NEON_4X4, rows_div_4) {
714     TEST_REQUIRES_ARM_NEON;
715     for (size_t rows = 8; rows <= 16; rows += 4) {
716       for (size_t channels = 1; channels <= 20; channels += 3) {
717         PReLUMicrokernelTester()
718           .rows(rows)
719           .channels(channels)
720           .Test(xnn_f32_prelu_ukernel__neon_4x4);
721       }
722     }
723   }
724 
TEST(F32_PRELU__NEON_4X4,rows_gt_4)725   TEST(F32_PRELU__NEON_4X4, rows_gt_4) {
726     TEST_REQUIRES_ARM_NEON;
727     for (size_t rows = 5; rows < 8; rows++) {
728       for (size_t channels = 1; channels <= 20; channels += 3) {
729         PReLUMicrokernelTester()
730           .rows(rows)
731           .channels(channels)
732           .Test(xnn_f32_prelu_ukernel__neon_4x4);
733       }
734     }
735   }
736 
TEST(F32_PRELU__NEON_4X4,input_stride)737   TEST(F32_PRELU__NEON_4X4, input_stride) {
738     TEST_REQUIRES_ARM_NEON;
739     for (size_t rows = 1; rows <= 12; rows += 3) {
740       for (size_t channels = 1; channels <= 20; channels += 3) {
741         PReLUMicrokernelTester()
742           .rows(rows)
743           .channels(channels)
744           .input_stride(23)
745           .iterations(1)
746           .Test(xnn_f32_prelu_ukernel__neon_4x4);
747       }
748     }
749   }
750 
TEST(F32_PRELU__NEON_4X4,output_stride)751   TEST(F32_PRELU__NEON_4X4, output_stride) {
752     TEST_REQUIRES_ARM_NEON;
753     for (size_t rows = 1; rows <= 12; rows += 3) {
754       for (size_t channels = 1; channels <= 20; channels += 3) {
755         PReLUMicrokernelTester()
756           .rows(rows)
757           .channels(channels)
758           .output_stride(23)
759           .iterations(1)
760           .Test(xnn_f32_prelu_ukernel__neon_4x4);
761       }
762     }
763   }
764 
TEST(F32_PRELU__NEON_4X4,inplace)765   TEST(F32_PRELU__NEON_4X4, inplace) {
766     TEST_REQUIRES_ARM_NEON;
767     for (size_t rows = 1; rows <= 12; rows += 3) {
768       for (size_t channels = 1; channels <= 20; channels += 3) {
769         PReLUMicrokernelTester()
770           .rows(rows)
771           .channels(channels)
772           .inplace(true)
773           .iterations(1)
774           .Test(xnn_f32_prelu_ukernel__neon_4x4);
775       }
776     }
777   }
778 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
779 
780 
781 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_PRELU__NEON_4X8,channels_eq_8)782   TEST(F32_PRELU__NEON_4X8, channels_eq_8) {
783     TEST_REQUIRES_ARM_NEON;
784     PReLUMicrokernelTester()
785       .rows(4)
786       .channels(8)
787       .Test(xnn_f32_prelu_ukernel__neon_4x8);
788   }
789 
TEST(F32_PRELU__NEON_4X8,channels_div_8)790   TEST(F32_PRELU__NEON_4X8, channels_div_8) {
791     TEST_REQUIRES_ARM_NEON;
792     for (size_t channels = 16; channels < 80; channels += 8) {
793       PReLUMicrokernelTester()
794         .rows(4)
795         .channels(channels)
796         .Test(xnn_f32_prelu_ukernel__neon_4x8);
797     }
798   }
799 
TEST(F32_PRELU__NEON_4X8,channels_lt_8)800   TEST(F32_PRELU__NEON_4X8, channels_lt_8) {
801     TEST_REQUIRES_ARM_NEON;
802     for (size_t channels = 1; channels < 8; channels++) {
803       PReLUMicrokernelTester()
804         .rows(4)
805         .channels(channels)
806         .Test(xnn_f32_prelu_ukernel__neon_4x8);
807     }
808   }
809 
TEST(F32_PRELU__NEON_4X8,channels_gt_8)810   TEST(F32_PRELU__NEON_4X8, channels_gt_8) {
811     TEST_REQUIRES_ARM_NEON;
812     for (size_t channels = 9; channels < 16; channels++) {
813       PReLUMicrokernelTester()
814         .rows(4)
815         .channels(channels)
816         .Test(xnn_f32_prelu_ukernel__neon_4x8);
817     }
818   }
819 
TEST(F32_PRELU__NEON_4X8,rows_lt_4)820   TEST(F32_PRELU__NEON_4X8, rows_lt_4) {
821     TEST_REQUIRES_ARM_NEON;
822     for (size_t rows = 1; rows < 4; rows++) {
823       for (size_t channels = 1; channels <= 40; channels += 7) {
824         PReLUMicrokernelTester()
825           .rows(rows)
826           .channels(channels)
827           .Test(xnn_f32_prelu_ukernel__neon_4x8);
828       }
829     }
830   }
831 
TEST(F32_PRELU__NEON_4X8,rows_div_4)832   TEST(F32_PRELU__NEON_4X8, rows_div_4) {
833     TEST_REQUIRES_ARM_NEON;
834     for (size_t rows = 8; rows <= 16; rows += 4) {
835       for (size_t channels = 1; channels <= 40; channels += 7) {
836         PReLUMicrokernelTester()
837           .rows(rows)
838           .channels(channels)
839           .Test(xnn_f32_prelu_ukernel__neon_4x8);
840       }
841     }
842   }
843 
TEST(F32_PRELU__NEON_4X8,rows_gt_4)844   TEST(F32_PRELU__NEON_4X8, rows_gt_4) {
845     TEST_REQUIRES_ARM_NEON;
846     for (size_t rows = 5; rows < 8; rows++) {
847       for (size_t channels = 1; channels <= 40; channels += 7) {
848         PReLUMicrokernelTester()
849           .rows(rows)
850           .channels(channels)
851           .Test(xnn_f32_prelu_ukernel__neon_4x8);
852       }
853     }
854   }
855 
TEST(F32_PRELU__NEON_4X8,input_stride)856   TEST(F32_PRELU__NEON_4X8, input_stride) {
857     TEST_REQUIRES_ARM_NEON;
858     for (size_t rows = 1; rows <= 12; rows += 3) {
859       for (size_t channels = 1; channels <= 40; channels += 7) {
860         PReLUMicrokernelTester()
861           .rows(rows)
862           .channels(channels)
863           .input_stride(43)
864           .iterations(1)
865           .Test(xnn_f32_prelu_ukernel__neon_4x8);
866       }
867     }
868   }
869 
TEST(F32_PRELU__NEON_4X8,output_stride)870   TEST(F32_PRELU__NEON_4X8, output_stride) {
871     TEST_REQUIRES_ARM_NEON;
872     for (size_t rows = 1; rows <= 12; rows += 3) {
873       for (size_t channels = 1; channels <= 40; channels += 7) {
874         PReLUMicrokernelTester()
875           .rows(rows)
876           .channels(channels)
877           .output_stride(43)
878           .iterations(1)
879           .Test(xnn_f32_prelu_ukernel__neon_4x8);
880       }
881     }
882   }
883 
TEST(F32_PRELU__NEON_4X8,inplace)884   TEST(F32_PRELU__NEON_4X8, inplace) {
885     TEST_REQUIRES_ARM_NEON;
886     for (size_t rows = 1; rows <= 12; rows += 3) {
887       for (size_t channels = 1; channels <= 40; channels += 7) {
888         PReLUMicrokernelTester()
889           .rows(rows)
890           .channels(channels)
891           .inplace(true)
892           .iterations(1)
893           .Test(xnn_f32_prelu_ukernel__neon_4x8);
894       }
895     }
896   }
897 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
898 
899 
900 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_PRELU__NEON_4X16,channels_eq_16)901   TEST(F32_PRELU__NEON_4X16, channels_eq_16) {
902     TEST_REQUIRES_ARM_NEON;
903     PReLUMicrokernelTester()
904       .rows(4)
905       .channels(16)
906       .Test(xnn_f32_prelu_ukernel__neon_4x16);
907   }
908 
TEST(F32_PRELU__NEON_4X16,channels_div_16)909   TEST(F32_PRELU__NEON_4X16, channels_div_16) {
910     TEST_REQUIRES_ARM_NEON;
911     for (size_t channels = 32; channels < 160; channels += 16) {
912       PReLUMicrokernelTester()
913         .rows(4)
914         .channels(channels)
915         .Test(xnn_f32_prelu_ukernel__neon_4x16);
916     }
917   }
918 
TEST(F32_PRELU__NEON_4X16,channels_lt_16)919   TEST(F32_PRELU__NEON_4X16, channels_lt_16) {
920     TEST_REQUIRES_ARM_NEON;
921     for (size_t channels = 1; channels < 16; channels++) {
922       PReLUMicrokernelTester()
923         .rows(4)
924         .channels(channels)
925         .Test(xnn_f32_prelu_ukernel__neon_4x16);
926     }
927   }
928 
TEST(F32_PRELU__NEON_4X16,channels_gt_16)929   TEST(F32_PRELU__NEON_4X16, channels_gt_16) {
930     TEST_REQUIRES_ARM_NEON;
931     for (size_t channels = 17; channels < 32; channels++) {
932       PReLUMicrokernelTester()
933         .rows(4)
934         .channels(channels)
935         .Test(xnn_f32_prelu_ukernel__neon_4x16);
936     }
937   }
938 
TEST(F32_PRELU__NEON_4X16,rows_lt_4)939   TEST(F32_PRELU__NEON_4X16, rows_lt_4) {
940     TEST_REQUIRES_ARM_NEON;
941     for (size_t rows = 1; rows < 4; rows++) {
942       for (size_t channels = 1; channels <= 80; channels += 15) {
943         PReLUMicrokernelTester()
944           .rows(rows)
945           .channels(channels)
946           .Test(xnn_f32_prelu_ukernel__neon_4x16);
947       }
948     }
949   }
950 
TEST(F32_PRELU__NEON_4X16,rows_div_4)951   TEST(F32_PRELU__NEON_4X16, rows_div_4) {
952     TEST_REQUIRES_ARM_NEON;
953     for (size_t rows = 8; rows <= 16; rows += 4) {
954       for (size_t channels = 1; channels <= 80; channels += 15) {
955         PReLUMicrokernelTester()
956           .rows(rows)
957           .channels(channels)
958           .Test(xnn_f32_prelu_ukernel__neon_4x16);
959       }
960     }
961   }
962 
TEST(F32_PRELU__NEON_4X16,rows_gt_4)963   TEST(F32_PRELU__NEON_4X16, rows_gt_4) {
964     TEST_REQUIRES_ARM_NEON;
965     for (size_t rows = 5; rows < 8; rows++) {
966       for (size_t channels = 1; channels <= 80; channels += 15) {
967         PReLUMicrokernelTester()
968           .rows(rows)
969           .channels(channels)
970           .Test(xnn_f32_prelu_ukernel__neon_4x16);
971       }
972     }
973   }
974 
TEST(F32_PRELU__NEON_4X16,input_stride)975   TEST(F32_PRELU__NEON_4X16, input_stride) {
976     TEST_REQUIRES_ARM_NEON;
977     for (size_t rows = 1; rows <= 12; rows += 3) {
978       for (size_t channels = 1; channels <= 80; channels += 15) {
979         PReLUMicrokernelTester()
980           .rows(rows)
981           .channels(channels)
982           .input_stride(83)
983           .iterations(1)
984           .Test(xnn_f32_prelu_ukernel__neon_4x16);
985       }
986     }
987   }
988 
TEST(F32_PRELU__NEON_4X16,output_stride)989   TEST(F32_PRELU__NEON_4X16, output_stride) {
990     TEST_REQUIRES_ARM_NEON;
991     for (size_t rows = 1; rows <= 12; rows += 3) {
992       for (size_t channels = 1; channels <= 80; channels += 15) {
993         PReLUMicrokernelTester()
994           .rows(rows)
995           .channels(channels)
996           .output_stride(83)
997           .iterations(1)
998           .Test(xnn_f32_prelu_ukernel__neon_4x16);
999       }
1000     }
1001   }
1002 
TEST(F32_PRELU__NEON_4X16,inplace)1003   TEST(F32_PRELU__NEON_4X16, inplace) {
1004     TEST_REQUIRES_ARM_NEON;
1005     for (size_t rows = 1; rows <= 12; rows += 3) {
1006       for (size_t channels = 1; channels <= 80; channels += 15) {
1007         PReLUMicrokernelTester()
1008           .rows(rows)
1009           .channels(channels)
1010           .inplace(true)
1011           .iterations(1)
1012           .Test(xnn_f32_prelu_ukernel__neon_4x16);
1013       }
1014     }
1015   }
1016 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1017 
1018 
1019 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_PRELU__SSE_2X4,channels_eq_4)1020   TEST(F32_PRELU__SSE_2X4, channels_eq_4) {
1021     TEST_REQUIRES_X86_SSE;
1022     PReLUMicrokernelTester()
1023       .rows(2)
1024       .channels(4)
1025       .Test(xnn_f32_prelu_ukernel__sse_2x4);
1026   }
1027 
TEST(F32_PRELU__SSE_2X4,channels_div_4)1028   TEST(F32_PRELU__SSE_2X4, channels_div_4) {
1029     TEST_REQUIRES_X86_SSE;
1030     for (size_t channels = 8; channels < 40; channels += 4) {
1031       PReLUMicrokernelTester()
1032         .rows(2)
1033         .channels(channels)
1034         .Test(xnn_f32_prelu_ukernel__sse_2x4);
1035     }
1036   }
1037 
TEST(F32_PRELU__SSE_2X4,channels_lt_4)1038   TEST(F32_PRELU__SSE_2X4, channels_lt_4) {
1039     TEST_REQUIRES_X86_SSE;
1040     for (size_t channels = 1; channels < 4; channels++) {
1041       PReLUMicrokernelTester()
1042         .rows(2)
1043         .channels(channels)
1044         .Test(xnn_f32_prelu_ukernel__sse_2x4);
1045     }
1046   }
1047 
TEST(F32_PRELU__SSE_2X4,channels_gt_4)1048   TEST(F32_PRELU__SSE_2X4, channels_gt_4) {
1049     TEST_REQUIRES_X86_SSE;
1050     for (size_t channels = 5; channels < 8; channels++) {
1051       PReLUMicrokernelTester()
1052         .rows(2)
1053         .channels(channels)
1054         .Test(xnn_f32_prelu_ukernel__sse_2x4);
1055     }
1056   }
1057 
TEST(F32_PRELU__SSE_2X4,rows_lt_2)1058   TEST(F32_PRELU__SSE_2X4, rows_lt_2) {
1059     TEST_REQUIRES_X86_SSE;
1060     for (size_t rows = 1; rows < 2; rows++) {
1061       for (size_t channels = 1; channels <= 20; channels += 3) {
1062         PReLUMicrokernelTester()
1063           .rows(rows)
1064           .channels(channels)
1065           .Test(xnn_f32_prelu_ukernel__sse_2x4);
1066       }
1067     }
1068   }
1069 
TEST(F32_PRELU__SSE_2X4,rows_div_2)1070   TEST(F32_PRELU__SSE_2X4, rows_div_2) {
1071     TEST_REQUIRES_X86_SSE;
1072     for (size_t rows = 4; rows <= 8; rows += 2) {
1073       for (size_t channels = 1; channels <= 20; channels += 3) {
1074         PReLUMicrokernelTester()
1075           .rows(rows)
1076           .channels(channels)
1077           .Test(xnn_f32_prelu_ukernel__sse_2x4);
1078       }
1079     }
1080   }
1081 
TEST(F32_PRELU__SSE_2X4,rows_gt_2)1082   TEST(F32_PRELU__SSE_2X4, rows_gt_2) {
1083     TEST_REQUIRES_X86_SSE;
1084     for (size_t rows = 3; rows < 4; rows++) {
1085       for (size_t channels = 1; channels <= 20; channels += 3) {
1086         PReLUMicrokernelTester()
1087           .rows(rows)
1088           .channels(channels)
1089           .Test(xnn_f32_prelu_ukernel__sse_2x4);
1090       }
1091     }
1092   }
1093 
TEST(F32_PRELU__SSE_2X4,input_stride)1094   TEST(F32_PRELU__SSE_2X4, input_stride) {
1095     TEST_REQUIRES_X86_SSE;
1096     for (size_t rows = 1; rows <= 6; rows += 1) {
1097       for (size_t channels = 1; channels <= 20; channels += 3) {
1098         PReLUMicrokernelTester()
1099           .rows(rows)
1100           .channels(channels)
1101           .input_stride(23)
1102           .iterations(1)
1103           .Test(xnn_f32_prelu_ukernel__sse_2x4);
1104       }
1105     }
1106   }
1107 
TEST(F32_PRELU__SSE_2X4,output_stride)1108   TEST(F32_PRELU__SSE_2X4, output_stride) {
1109     TEST_REQUIRES_X86_SSE;
1110     for (size_t rows = 1; rows <= 6; rows += 1) {
1111       for (size_t channels = 1; channels <= 20; channels += 3) {
1112         PReLUMicrokernelTester()
1113           .rows(rows)
1114           .channels(channels)
1115           .output_stride(23)
1116           .iterations(1)
1117           .Test(xnn_f32_prelu_ukernel__sse_2x4);
1118       }
1119     }
1120   }
1121 
TEST(F32_PRELU__SSE_2X4,inplace)1122   TEST(F32_PRELU__SSE_2X4, inplace) {
1123     TEST_REQUIRES_X86_SSE;
1124     for (size_t rows = 1; rows <= 6; rows += 1) {
1125       for (size_t channels = 1; channels <= 20; channels += 3) {
1126         PReLUMicrokernelTester()
1127           .rows(rows)
1128           .channels(channels)
1129           .inplace(true)
1130           .iterations(1)
1131           .Test(xnn_f32_prelu_ukernel__sse_2x4);
1132       }
1133     }
1134   }
1135 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1136 
1137 
1138 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_PRELU__SSE_2X8,channels_eq_8)1139   TEST(F32_PRELU__SSE_2X8, channels_eq_8) {
1140     TEST_REQUIRES_X86_SSE;
1141     PReLUMicrokernelTester()
1142       .rows(2)
1143       .channels(8)
1144       .Test(xnn_f32_prelu_ukernel__sse_2x8);
1145   }
1146 
TEST(F32_PRELU__SSE_2X8,channels_div_8)1147   TEST(F32_PRELU__SSE_2X8, channels_div_8) {
1148     TEST_REQUIRES_X86_SSE;
1149     for (size_t channels = 16; channels < 80; channels += 8) {
1150       PReLUMicrokernelTester()
1151         .rows(2)
1152         .channels(channels)
1153         .Test(xnn_f32_prelu_ukernel__sse_2x8);
1154     }
1155   }
1156 
TEST(F32_PRELU__SSE_2X8,channels_lt_8)1157   TEST(F32_PRELU__SSE_2X8, channels_lt_8) {
1158     TEST_REQUIRES_X86_SSE;
1159     for (size_t channels = 1; channels < 8; channels++) {
1160       PReLUMicrokernelTester()
1161         .rows(2)
1162         .channels(channels)
1163         .Test(xnn_f32_prelu_ukernel__sse_2x8);
1164     }
1165   }
1166 
TEST(F32_PRELU__SSE_2X8,channels_gt_8)1167   TEST(F32_PRELU__SSE_2X8, channels_gt_8) {
1168     TEST_REQUIRES_X86_SSE;
1169     for (size_t channels = 9; channels < 16; channels++) {
1170       PReLUMicrokernelTester()
1171         .rows(2)
1172         .channels(channels)
1173         .Test(xnn_f32_prelu_ukernel__sse_2x8);
1174     }
1175   }
1176 
TEST(F32_PRELU__SSE_2X8,rows_lt_2)1177   TEST(F32_PRELU__SSE_2X8, rows_lt_2) {
1178     TEST_REQUIRES_X86_SSE;
1179     for (size_t rows = 1; rows < 2; rows++) {
1180       for (size_t channels = 1; channels <= 40; channels += 7) {
1181         PReLUMicrokernelTester()
1182           .rows(rows)
1183           .channels(channels)
1184           .Test(xnn_f32_prelu_ukernel__sse_2x8);
1185       }
1186     }
1187   }
1188 
TEST(F32_PRELU__SSE_2X8,rows_div_2)1189   TEST(F32_PRELU__SSE_2X8, rows_div_2) {
1190     TEST_REQUIRES_X86_SSE;
1191     for (size_t rows = 4; rows <= 8; rows += 2) {
1192       for (size_t channels = 1; channels <= 40; channels += 7) {
1193         PReLUMicrokernelTester()
1194           .rows(rows)
1195           .channels(channels)
1196           .Test(xnn_f32_prelu_ukernel__sse_2x8);
1197       }
1198     }
1199   }
1200 
TEST(F32_PRELU__SSE_2X8,rows_gt_2)1201   TEST(F32_PRELU__SSE_2X8, rows_gt_2) {
1202     TEST_REQUIRES_X86_SSE;
1203     for (size_t rows = 3; rows < 4; rows++) {
1204       for (size_t channels = 1; channels <= 40; channels += 7) {
1205         PReLUMicrokernelTester()
1206           .rows(rows)
1207           .channels(channels)
1208           .Test(xnn_f32_prelu_ukernel__sse_2x8);
1209       }
1210     }
1211   }
1212 
TEST(F32_PRELU__SSE_2X8,input_stride)1213   TEST(F32_PRELU__SSE_2X8, input_stride) {
1214     TEST_REQUIRES_X86_SSE;
1215     for (size_t rows = 1; rows <= 6; rows += 1) {
1216       for (size_t channels = 1; channels <= 40; channels += 7) {
1217         PReLUMicrokernelTester()
1218           .rows(rows)
1219           .channels(channels)
1220           .input_stride(43)
1221           .iterations(1)
1222           .Test(xnn_f32_prelu_ukernel__sse_2x8);
1223       }
1224     }
1225   }
1226 
TEST(F32_PRELU__SSE_2X8,output_stride)1227   TEST(F32_PRELU__SSE_2X8, output_stride) {
1228     TEST_REQUIRES_X86_SSE;
1229     for (size_t rows = 1; rows <= 6; rows += 1) {
1230       for (size_t channels = 1; channels <= 40; channels += 7) {
1231         PReLUMicrokernelTester()
1232           .rows(rows)
1233           .channels(channels)
1234           .output_stride(43)
1235           .iterations(1)
1236           .Test(xnn_f32_prelu_ukernel__sse_2x8);
1237       }
1238     }
1239   }
1240 
TEST(F32_PRELU__SSE_2X8,inplace)1241   TEST(F32_PRELU__SSE_2X8, inplace) {
1242     TEST_REQUIRES_X86_SSE;
1243     for (size_t rows = 1; rows <= 6; rows += 1) {
1244       for (size_t channels = 1; channels <= 40; channels += 7) {
1245         PReLUMicrokernelTester()
1246           .rows(rows)
1247           .channels(channels)
1248           .inplace(true)
1249           .iterations(1)
1250           .Test(xnn_f32_prelu_ukernel__sse_2x8);
1251       }
1252     }
1253   }
1254 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1255 
1256 
1257 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_PRELU__SSE2_2X4,channels_eq_4)1258   TEST(F32_PRELU__SSE2_2X4, channels_eq_4) {
1259     TEST_REQUIRES_X86_SSE2;
1260     PReLUMicrokernelTester()
1261       .rows(2)
1262       .channels(4)
1263       .Test(xnn_f32_prelu_ukernel__sse2_2x4);
1264   }
1265 
TEST(F32_PRELU__SSE2_2X4,channels_div_4)1266   TEST(F32_PRELU__SSE2_2X4, channels_div_4) {
1267     TEST_REQUIRES_X86_SSE2;
1268     for (size_t channels = 8; channels < 40; channels += 4) {
1269       PReLUMicrokernelTester()
1270         .rows(2)
1271         .channels(channels)
1272         .Test(xnn_f32_prelu_ukernel__sse2_2x4);
1273     }
1274   }
1275 
TEST(F32_PRELU__SSE2_2X4,channels_lt_4)1276   TEST(F32_PRELU__SSE2_2X4, channels_lt_4) {
1277     TEST_REQUIRES_X86_SSE2;
1278     for (size_t channels = 1; channels < 4; channels++) {
1279       PReLUMicrokernelTester()
1280         .rows(2)
1281         .channels(channels)
1282         .Test(xnn_f32_prelu_ukernel__sse2_2x4);
1283     }
1284   }
1285 
TEST(F32_PRELU__SSE2_2X4,channels_gt_4)1286   TEST(F32_PRELU__SSE2_2X4, channels_gt_4) {
1287     TEST_REQUIRES_X86_SSE2;
1288     for (size_t channels = 5; channels < 8; channels++) {
1289       PReLUMicrokernelTester()
1290         .rows(2)
1291         .channels(channels)
1292         .Test(xnn_f32_prelu_ukernel__sse2_2x4);
1293     }
1294   }
1295 
TEST(F32_PRELU__SSE2_2X4,rows_lt_2)1296   TEST(F32_PRELU__SSE2_2X4, rows_lt_2) {
1297     TEST_REQUIRES_X86_SSE2;
1298     for (size_t rows = 1; rows < 2; rows++) {
1299       for (size_t channels = 1; channels <= 20; channels += 3) {
1300         PReLUMicrokernelTester()
1301           .rows(rows)
1302           .channels(channels)
1303           .Test(xnn_f32_prelu_ukernel__sse2_2x4);
1304       }
1305     }
1306   }
1307 
TEST(F32_PRELU__SSE2_2X4,rows_div_2)1308   TEST(F32_PRELU__SSE2_2X4, rows_div_2) {
1309     TEST_REQUIRES_X86_SSE2;
1310     for (size_t rows = 4; rows <= 8; rows += 2) {
1311       for (size_t channels = 1; channels <= 20; channels += 3) {
1312         PReLUMicrokernelTester()
1313           .rows(rows)
1314           .channels(channels)
1315           .Test(xnn_f32_prelu_ukernel__sse2_2x4);
1316       }
1317     }
1318   }
1319 
TEST(F32_PRELU__SSE2_2X4,rows_gt_2)1320   TEST(F32_PRELU__SSE2_2X4, rows_gt_2) {
1321     TEST_REQUIRES_X86_SSE2;
1322     for (size_t rows = 3; rows < 4; rows++) {
1323       for (size_t channels = 1; channels <= 20; channels += 3) {
1324         PReLUMicrokernelTester()
1325           .rows(rows)
1326           .channels(channels)
1327           .Test(xnn_f32_prelu_ukernel__sse2_2x4);
1328       }
1329     }
1330   }
1331 
TEST(F32_PRELU__SSE2_2X4,input_stride)1332   TEST(F32_PRELU__SSE2_2X4, input_stride) {
1333     TEST_REQUIRES_X86_SSE2;
1334     for (size_t rows = 1; rows <= 6; rows += 1) {
1335       for (size_t channels = 1; channels <= 20; channels += 3) {
1336         PReLUMicrokernelTester()
1337           .rows(rows)
1338           .channels(channels)
1339           .input_stride(23)
1340           .iterations(1)
1341           .Test(xnn_f32_prelu_ukernel__sse2_2x4);
1342       }
1343     }
1344   }
1345 
TEST(F32_PRELU__SSE2_2X4,output_stride)1346   TEST(F32_PRELU__SSE2_2X4, output_stride) {
1347     TEST_REQUIRES_X86_SSE2;
1348     for (size_t rows = 1; rows <= 6; rows += 1) {
1349       for (size_t channels = 1; channels <= 20; channels += 3) {
1350         PReLUMicrokernelTester()
1351           .rows(rows)
1352           .channels(channels)
1353           .output_stride(23)
1354           .iterations(1)
1355           .Test(xnn_f32_prelu_ukernel__sse2_2x4);
1356       }
1357     }
1358   }
1359 
TEST(F32_PRELU__SSE2_2X4,inplace)1360   TEST(F32_PRELU__SSE2_2X4, inplace) {
1361     TEST_REQUIRES_X86_SSE2;
1362     for (size_t rows = 1; rows <= 6; rows += 1) {
1363       for (size_t channels = 1; channels <= 20; channels += 3) {
1364         PReLUMicrokernelTester()
1365           .rows(rows)
1366           .channels(channels)
1367           .inplace(true)
1368           .iterations(1)
1369           .Test(xnn_f32_prelu_ukernel__sse2_2x4);
1370       }
1371     }
1372   }
1373 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1374 
1375 
1376 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_PRELU__SSE2_2X8,channels_eq_8)1377   TEST(F32_PRELU__SSE2_2X8, channels_eq_8) {
1378     TEST_REQUIRES_X86_SSE2;
1379     PReLUMicrokernelTester()
1380       .rows(2)
1381       .channels(8)
1382       .Test(xnn_f32_prelu_ukernel__sse2_2x8);
1383   }
1384 
TEST(F32_PRELU__SSE2_2X8,channels_div_8)1385   TEST(F32_PRELU__SSE2_2X8, channels_div_8) {
1386     TEST_REQUIRES_X86_SSE2;
1387     for (size_t channels = 16; channels < 80; channels += 8) {
1388       PReLUMicrokernelTester()
1389         .rows(2)
1390         .channels(channels)
1391         .Test(xnn_f32_prelu_ukernel__sse2_2x8);
1392     }
1393   }
1394 
TEST(F32_PRELU__SSE2_2X8,channels_lt_8)1395   TEST(F32_PRELU__SSE2_2X8, channels_lt_8) {
1396     TEST_REQUIRES_X86_SSE2;
1397     for (size_t channels = 1; channels < 8; channels++) {
1398       PReLUMicrokernelTester()
1399         .rows(2)
1400         .channels(channels)
1401         .Test(xnn_f32_prelu_ukernel__sse2_2x8);
1402     }
1403   }
1404 
TEST(F32_PRELU__SSE2_2X8,channels_gt_8)1405   TEST(F32_PRELU__SSE2_2X8, channels_gt_8) {
1406     TEST_REQUIRES_X86_SSE2;
1407     for (size_t channels = 9; channels < 16; channels++) {
1408       PReLUMicrokernelTester()
1409         .rows(2)
1410         .channels(channels)
1411         .Test(xnn_f32_prelu_ukernel__sse2_2x8);
1412     }
1413   }
1414 
TEST(F32_PRELU__SSE2_2X8,rows_lt_2)1415   TEST(F32_PRELU__SSE2_2X8, rows_lt_2) {
1416     TEST_REQUIRES_X86_SSE2;
1417     for (size_t rows = 1; rows < 2; rows++) {
1418       for (size_t channels = 1; channels <= 40; channels += 7) {
1419         PReLUMicrokernelTester()
1420           .rows(rows)
1421           .channels(channels)
1422           .Test(xnn_f32_prelu_ukernel__sse2_2x8);
1423       }
1424     }
1425   }
1426 
TEST(F32_PRELU__SSE2_2X8,rows_div_2)1427   TEST(F32_PRELU__SSE2_2X8, rows_div_2) {
1428     TEST_REQUIRES_X86_SSE2;
1429     for (size_t rows = 4; rows <= 8; rows += 2) {
1430       for (size_t channels = 1; channels <= 40; channels += 7) {
1431         PReLUMicrokernelTester()
1432           .rows(rows)
1433           .channels(channels)
1434           .Test(xnn_f32_prelu_ukernel__sse2_2x8);
1435       }
1436     }
1437   }
1438 
TEST(F32_PRELU__SSE2_2X8,rows_gt_2)1439   TEST(F32_PRELU__SSE2_2X8, rows_gt_2) {
1440     TEST_REQUIRES_X86_SSE2;
1441     for (size_t rows = 3; rows < 4; rows++) {
1442       for (size_t channels = 1; channels <= 40; channels += 7) {
1443         PReLUMicrokernelTester()
1444           .rows(rows)
1445           .channels(channels)
1446           .Test(xnn_f32_prelu_ukernel__sse2_2x8);
1447       }
1448     }
1449   }
1450 
TEST(F32_PRELU__SSE2_2X8,input_stride)1451   TEST(F32_PRELU__SSE2_2X8, input_stride) {
1452     TEST_REQUIRES_X86_SSE2;
1453     for (size_t rows = 1; rows <= 6; rows += 1) {
1454       for (size_t channels = 1; channels <= 40; channels += 7) {
1455         PReLUMicrokernelTester()
1456           .rows(rows)
1457           .channels(channels)
1458           .input_stride(43)
1459           .iterations(1)
1460           .Test(xnn_f32_prelu_ukernel__sse2_2x8);
1461       }
1462     }
1463   }
1464 
TEST(F32_PRELU__SSE2_2X8,output_stride)1465   TEST(F32_PRELU__SSE2_2X8, output_stride) {
1466     TEST_REQUIRES_X86_SSE2;
1467     for (size_t rows = 1; rows <= 6; rows += 1) {
1468       for (size_t channels = 1; channels <= 40; channels += 7) {
1469         PReLUMicrokernelTester()
1470           .rows(rows)
1471           .channels(channels)
1472           .output_stride(43)
1473           .iterations(1)
1474           .Test(xnn_f32_prelu_ukernel__sse2_2x8);
1475       }
1476     }
1477   }
1478 
TEST(F32_PRELU__SSE2_2X8,inplace)1479   TEST(F32_PRELU__SSE2_2X8, inplace) {
1480     TEST_REQUIRES_X86_SSE2;
1481     for (size_t rows = 1; rows <= 6; rows += 1) {
1482       for (size_t channels = 1; channels <= 40; channels += 7) {
1483         PReLUMicrokernelTester()
1484           .rows(rows)
1485           .channels(channels)
1486           .inplace(true)
1487           .iterations(1)
1488           .Test(xnn_f32_prelu_ukernel__sse2_2x8);
1489       }
1490     }
1491   }
1492 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1493 
1494 
1495 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_PRELU__SSE41_2X4,channels_eq_4)1496   TEST(F32_PRELU__SSE41_2X4, channels_eq_4) {
1497     TEST_REQUIRES_X86_SSE41;
1498     PReLUMicrokernelTester()
1499       .rows(2)
1500       .channels(4)
1501       .Test(xnn_f32_prelu_ukernel__sse41_2x4);
1502   }
1503 
TEST(F32_PRELU__SSE41_2X4,channels_div_4)1504   TEST(F32_PRELU__SSE41_2X4, channels_div_4) {
1505     TEST_REQUIRES_X86_SSE41;
1506     for (size_t channels = 8; channels < 40; channels += 4) {
1507       PReLUMicrokernelTester()
1508         .rows(2)
1509         .channels(channels)
1510         .Test(xnn_f32_prelu_ukernel__sse41_2x4);
1511     }
1512   }
1513 
TEST(F32_PRELU__SSE41_2X4,channels_lt_4)1514   TEST(F32_PRELU__SSE41_2X4, channels_lt_4) {
1515     TEST_REQUIRES_X86_SSE41;
1516     for (size_t channels = 1; channels < 4; channels++) {
1517       PReLUMicrokernelTester()
1518         .rows(2)
1519         .channels(channels)
1520         .Test(xnn_f32_prelu_ukernel__sse41_2x4);
1521     }
1522   }
1523 
TEST(F32_PRELU__SSE41_2X4,channels_gt_4)1524   TEST(F32_PRELU__SSE41_2X4, channels_gt_4) {
1525     TEST_REQUIRES_X86_SSE41;
1526     for (size_t channels = 5; channels < 8; channels++) {
1527       PReLUMicrokernelTester()
1528         .rows(2)
1529         .channels(channels)
1530         .Test(xnn_f32_prelu_ukernel__sse41_2x4);
1531     }
1532   }
1533 
TEST(F32_PRELU__SSE41_2X4,rows_lt_2)1534   TEST(F32_PRELU__SSE41_2X4, rows_lt_2) {
1535     TEST_REQUIRES_X86_SSE41;
1536     for (size_t rows = 1; rows < 2; rows++) {
1537       for (size_t channels = 1; channels <= 20; channels += 3) {
1538         PReLUMicrokernelTester()
1539           .rows(rows)
1540           .channels(channels)
1541           .Test(xnn_f32_prelu_ukernel__sse41_2x4);
1542       }
1543     }
1544   }
1545 
TEST(F32_PRELU__SSE41_2X4,rows_div_2)1546   TEST(F32_PRELU__SSE41_2X4, rows_div_2) {
1547     TEST_REQUIRES_X86_SSE41;
1548     for (size_t rows = 4; rows <= 8; rows += 2) {
1549       for (size_t channels = 1; channels <= 20; channels += 3) {
1550         PReLUMicrokernelTester()
1551           .rows(rows)
1552           .channels(channels)
1553           .Test(xnn_f32_prelu_ukernel__sse41_2x4);
1554       }
1555     }
1556   }
1557 
TEST(F32_PRELU__SSE41_2X4,rows_gt_2)1558   TEST(F32_PRELU__SSE41_2X4, rows_gt_2) {
1559     TEST_REQUIRES_X86_SSE41;
1560     for (size_t rows = 3; rows < 4; rows++) {
1561       for (size_t channels = 1; channels <= 20; channels += 3) {
1562         PReLUMicrokernelTester()
1563           .rows(rows)
1564           .channels(channels)
1565           .Test(xnn_f32_prelu_ukernel__sse41_2x4);
1566       }
1567     }
1568   }
1569 
TEST(F32_PRELU__SSE41_2X4,input_stride)1570   TEST(F32_PRELU__SSE41_2X4, input_stride) {
1571     TEST_REQUIRES_X86_SSE41;
1572     for (size_t rows = 1; rows <= 6; rows += 1) {
1573       for (size_t channels = 1; channels <= 20; channels += 3) {
1574         PReLUMicrokernelTester()
1575           .rows(rows)
1576           .channels(channels)
1577           .input_stride(23)
1578           .iterations(1)
1579           .Test(xnn_f32_prelu_ukernel__sse41_2x4);
1580       }
1581     }
1582   }
1583 
TEST(F32_PRELU__SSE41_2X4,output_stride)1584   TEST(F32_PRELU__SSE41_2X4, output_stride) {
1585     TEST_REQUIRES_X86_SSE41;
1586     for (size_t rows = 1; rows <= 6; rows += 1) {
1587       for (size_t channels = 1; channels <= 20; channels += 3) {
1588         PReLUMicrokernelTester()
1589           .rows(rows)
1590           .channels(channels)
1591           .output_stride(23)
1592           .iterations(1)
1593           .Test(xnn_f32_prelu_ukernel__sse41_2x4);
1594       }
1595     }
1596   }
1597 
TEST(F32_PRELU__SSE41_2X4,inplace)1598   TEST(F32_PRELU__SSE41_2X4, inplace) {
1599     TEST_REQUIRES_X86_SSE41;
1600     for (size_t rows = 1; rows <= 6; rows += 1) {
1601       for (size_t channels = 1; channels <= 20; channels += 3) {
1602         PReLUMicrokernelTester()
1603           .rows(rows)
1604           .channels(channels)
1605           .inplace(true)
1606           .iterations(1)
1607           .Test(xnn_f32_prelu_ukernel__sse41_2x4);
1608       }
1609     }
1610   }
1611 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1612 
1613 
1614 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_PRELU__SSE41_2X8,channels_eq_8)1615   TEST(F32_PRELU__SSE41_2X8, channels_eq_8) {
1616     TEST_REQUIRES_X86_SSE41;
1617     PReLUMicrokernelTester()
1618       .rows(2)
1619       .channels(8)
1620       .Test(xnn_f32_prelu_ukernel__sse41_2x8);
1621   }
1622 
TEST(F32_PRELU__SSE41_2X8,channels_div_8)1623   TEST(F32_PRELU__SSE41_2X8, channels_div_8) {
1624     TEST_REQUIRES_X86_SSE41;
1625     for (size_t channels = 16; channels < 80; channels += 8) {
1626       PReLUMicrokernelTester()
1627         .rows(2)
1628         .channels(channels)
1629         .Test(xnn_f32_prelu_ukernel__sse41_2x8);
1630     }
1631   }
1632 
TEST(F32_PRELU__SSE41_2X8,channels_lt_8)1633   TEST(F32_PRELU__SSE41_2X8, channels_lt_8) {
1634     TEST_REQUIRES_X86_SSE41;
1635     for (size_t channels = 1; channels < 8; channels++) {
1636       PReLUMicrokernelTester()
1637         .rows(2)
1638         .channels(channels)
1639         .Test(xnn_f32_prelu_ukernel__sse41_2x8);
1640     }
1641   }
1642 
TEST(F32_PRELU__SSE41_2X8,channels_gt_8)1643   TEST(F32_PRELU__SSE41_2X8, channels_gt_8) {
1644     TEST_REQUIRES_X86_SSE41;
1645     for (size_t channels = 9; channels < 16; channels++) {
1646       PReLUMicrokernelTester()
1647         .rows(2)
1648         .channels(channels)
1649         .Test(xnn_f32_prelu_ukernel__sse41_2x8);
1650     }
1651   }
1652 
TEST(F32_PRELU__SSE41_2X8,rows_lt_2)1653   TEST(F32_PRELU__SSE41_2X8, rows_lt_2) {
1654     TEST_REQUIRES_X86_SSE41;
1655     for (size_t rows = 1; rows < 2; rows++) {
1656       for (size_t channels = 1; channels <= 40; channels += 7) {
1657         PReLUMicrokernelTester()
1658           .rows(rows)
1659           .channels(channels)
1660           .Test(xnn_f32_prelu_ukernel__sse41_2x8);
1661       }
1662     }
1663   }
1664 
TEST(F32_PRELU__SSE41_2X8,rows_div_2)1665   TEST(F32_PRELU__SSE41_2X8, rows_div_2) {
1666     TEST_REQUIRES_X86_SSE41;
1667     for (size_t rows = 4; rows <= 8; rows += 2) {
1668       for (size_t channels = 1; channels <= 40; channels += 7) {
1669         PReLUMicrokernelTester()
1670           .rows(rows)
1671           .channels(channels)
1672           .Test(xnn_f32_prelu_ukernel__sse41_2x8);
1673       }
1674     }
1675   }
1676 
TEST(F32_PRELU__SSE41_2X8,rows_gt_2)1677   TEST(F32_PRELU__SSE41_2X8, rows_gt_2) {
1678     TEST_REQUIRES_X86_SSE41;
1679     for (size_t rows = 3; rows < 4; rows++) {
1680       for (size_t channels = 1; channels <= 40; channels += 7) {
1681         PReLUMicrokernelTester()
1682           .rows(rows)
1683           .channels(channels)
1684           .Test(xnn_f32_prelu_ukernel__sse41_2x8);
1685       }
1686     }
1687   }
1688 
TEST(F32_PRELU__SSE41_2X8,input_stride)1689   TEST(F32_PRELU__SSE41_2X8, input_stride) {
1690     TEST_REQUIRES_X86_SSE41;
1691     for (size_t rows = 1; rows <= 6; rows += 1) {
1692       for (size_t channels = 1; channels <= 40; channels += 7) {
1693         PReLUMicrokernelTester()
1694           .rows(rows)
1695           .channels(channels)
1696           .input_stride(43)
1697           .iterations(1)
1698           .Test(xnn_f32_prelu_ukernel__sse41_2x8);
1699       }
1700     }
1701   }
1702 
TEST(F32_PRELU__SSE41_2X8,output_stride)1703   TEST(F32_PRELU__SSE41_2X8, output_stride) {
1704     TEST_REQUIRES_X86_SSE41;
1705     for (size_t rows = 1; rows <= 6; rows += 1) {
1706       for (size_t channels = 1; channels <= 40; channels += 7) {
1707         PReLUMicrokernelTester()
1708           .rows(rows)
1709           .channels(channels)
1710           .output_stride(43)
1711           .iterations(1)
1712           .Test(xnn_f32_prelu_ukernel__sse41_2x8);
1713       }
1714     }
1715   }
1716 
TEST(F32_PRELU__SSE41_2X8,inplace)1717   TEST(F32_PRELU__SSE41_2X8, inplace) {
1718     TEST_REQUIRES_X86_SSE41;
1719     for (size_t rows = 1; rows <= 6; rows += 1) {
1720       for (size_t channels = 1; channels <= 40; channels += 7) {
1721         PReLUMicrokernelTester()
1722           .rows(rows)
1723           .channels(channels)
1724           .inplace(true)
1725           .iterations(1)
1726           .Test(xnn_f32_prelu_ukernel__sse41_2x8);
1727       }
1728     }
1729   }
1730 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1731 
1732 
1733 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_PRELU__AVX_2X8,channels_eq_8)1734   TEST(F32_PRELU__AVX_2X8, channels_eq_8) {
1735     TEST_REQUIRES_X86_AVX;
1736     PReLUMicrokernelTester()
1737       .rows(2)
1738       .channels(8)
1739       .Test(xnn_f32_prelu_ukernel__avx_2x8);
1740   }
1741 
TEST(F32_PRELU__AVX_2X8,channels_div_8)1742   TEST(F32_PRELU__AVX_2X8, channels_div_8) {
1743     TEST_REQUIRES_X86_AVX;
1744     for (size_t channels = 16; channels < 80; channels += 8) {
1745       PReLUMicrokernelTester()
1746         .rows(2)
1747         .channels(channels)
1748         .Test(xnn_f32_prelu_ukernel__avx_2x8);
1749     }
1750   }
1751 
TEST(F32_PRELU__AVX_2X8,channels_lt_8)1752   TEST(F32_PRELU__AVX_2X8, channels_lt_8) {
1753     TEST_REQUIRES_X86_AVX;
1754     for (size_t channels = 1; channels < 8; channels++) {
1755       PReLUMicrokernelTester()
1756         .rows(2)
1757         .channels(channels)
1758         .Test(xnn_f32_prelu_ukernel__avx_2x8);
1759     }
1760   }
1761 
TEST(F32_PRELU__AVX_2X8,channels_gt_8)1762   TEST(F32_PRELU__AVX_2X8, channels_gt_8) {
1763     TEST_REQUIRES_X86_AVX;
1764     for (size_t channels = 9; channels < 16; channels++) {
1765       PReLUMicrokernelTester()
1766         .rows(2)
1767         .channels(channels)
1768         .Test(xnn_f32_prelu_ukernel__avx_2x8);
1769     }
1770   }
1771 
TEST(F32_PRELU__AVX_2X8,rows_lt_2)1772   TEST(F32_PRELU__AVX_2X8, rows_lt_2) {
1773     TEST_REQUIRES_X86_AVX;
1774     for (size_t rows = 1; rows < 2; rows++) {
1775       for (size_t channels = 1; channels <= 40; channels += 7) {
1776         PReLUMicrokernelTester()
1777           .rows(rows)
1778           .channels(channels)
1779           .Test(xnn_f32_prelu_ukernel__avx_2x8);
1780       }
1781     }
1782   }
1783 
TEST(F32_PRELU__AVX_2X8,rows_div_2)1784   TEST(F32_PRELU__AVX_2X8, rows_div_2) {
1785     TEST_REQUIRES_X86_AVX;
1786     for (size_t rows = 4; rows <= 8; rows += 2) {
1787       for (size_t channels = 1; channels <= 40; channels += 7) {
1788         PReLUMicrokernelTester()
1789           .rows(rows)
1790           .channels(channels)
1791           .Test(xnn_f32_prelu_ukernel__avx_2x8);
1792       }
1793     }
1794   }
1795 
TEST(F32_PRELU__AVX_2X8,rows_gt_2)1796   TEST(F32_PRELU__AVX_2X8, rows_gt_2) {
1797     TEST_REQUIRES_X86_AVX;
1798     for (size_t rows = 3; rows < 4; rows++) {
1799       for (size_t channels = 1; channels <= 40; channels += 7) {
1800         PReLUMicrokernelTester()
1801           .rows(rows)
1802           .channels(channels)
1803           .Test(xnn_f32_prelu_ukernel__avx_2x8);
1804       }
1805     }
1806   }
1807 
TEST(F32_PRELU__AVX_2X8,input_stride)1808   TEST(F32_PRELU__AVX_2X8, input_stride) {
1809     TEST_REQUIRES_X86_AVX;
1810     for (size_t rows = 1; rows <= 6; rows += 1) {
1811       for (size_t channels = 1; channels <= 40; channels += 7) {
1812         PReLUMicrokernelTester()
1813           .rows(rows)
1814           .channels(channels)
1815           .input_stride(43)
1816           .iterations(1)
1817           .Test(xnn_f32_prelu_ukernel__avx_2x8);
1818       }
1819     }
1820   }
1821 
TEST(F32_PRELU__AVX_2X8,output_stride)1822   TEST(F32_PRELU__AVX_2X8, output_stride) {
1823     TEST_REQUIRES_X86_AVX;
1824     for (size_t rows = 1; rows <= 6; rows += 1) {
1825       for (size_t channels = 1; channels <= 40; channels += 7) {
1826         PReLUMicrokernelTester()
1827           .rows(rows)
1828           .channels(channels)
1829           .output_stride(43)
1830           .iterations(1)
1831           .Test(xnn_f32_prelu_ukernel__avx_2x8);
1832       }
1833     }
1834   }
1835 
TEST(F32_PRELU__AVX_2X8,inplace)1836   TEST(F32_PRELU__AVX_2X8, inplace) {
1837     TEST_REQUIRES_X86_AVX;
1838     for (size_t rows = 1; rows <= 6; rows += 1) {
1839       for (size_t channels = 1; channels <= 40; channels += 7) {
1840         PReLUMicrokernelTester()
1841           .rows(rows)
1842           .channels(channels)
1843           .inplace(true)
1844           .iterations(1)
1845           .Test(xnn_f32_prelu_ukernel__avx_2x8);
1846       }
1847     }
1848   }
1849 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1850 
1851 
1852 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_PRELU__AVX_2X16,channels_eq_16)1853   TEST(F32_PRELU__AVX_2X16, channels_eq_16) {
1854     TEST_REQUIRES_X86_AVX;
1855     PReLUMicrokernelTester()
1856       .rows(2)
1857       .channels(16)
1858       .Test(xnn_f32_prelu_ukernel__avx_2x16);
1859   }
1860 
TEST(F32_PRELU__AVX_2X16,channels_div_16)1861   TEST(F32_PRELU__AVX_2X16, channels_div_16) {
1862     TEST_REQUIRES_X86_AVX;
1863     for (size_t channels = 32; channels < 160; channels += 16) {
1864       PReLUMicrokernelTester()
1865         .rows(2)
1866         .channels(channels)
1867         .Test(xnn_f32_prelu_ukernel__avx_2x16);
1868     }
1869   }
1870 
TEST(F32_PRELU__AVX_2X16,channels_lt_16)1871   TEST(F32_PRELU__AVX_2X16, channels_lt_16) {
1872     TEST_REQUIRES_X86_AVX;
1873     for (size_t channels = 1; channels < 16; channels++) {
1874       PReLUMicrokernelTester()
1875         .rows(2)
1876         .channels(channels)
1877         .Test(xnn_f32_prelu_ukernel__avx_2x16);
1878     }
1879   }
1880 
TEST(F32_PRELU__AVX_2X16,channels_gt_16)1881   TEST(F32_PRELU__AVX_2X16, channels_gt_16) {
1882     TEST_REQUIRES_X86_AVX;
1883     for (size_t channels = 17; channels < 32; channels++) {
1884       PReLUMicrokernelTester()
1885         .rows(2)
1886         .channels(channels)
1887         .Test(xnn_f32_prelu_ukernel__avx_2x16);
1888     }
1889   }
1890 
TEST(F32_PRELU__AVX_2X16,rows_lt_2)1891   TEST(F32_PRELU__AVX_2X16, rows_lt_2) {
1892     TEST_REQUIRES_X86_AVX;
1893     for (size_t rows = 1; rows < 2; rows++) {
1894       for (size_t channels = 1; channels <= 80; channels += 15) {
1895         PReLUMicrokernelTester()
1896           .rows(rows)
1897           .channels(channels)
1898           .Test(xnn_f32_prelu_ukernel__avx_2x16);
1899       }
1900     }
1901   }
1902 
TEST(F32_PRELU__AVX_2X16,rows_div_2)1903   TEST(F32_PRELU__AVX_2X16, rows_div_2) {
1904     TEST_REQUIRES_X86_AVX;
1905     for (size_t rows = 4; rows <= 8; rows += 2) {
1906       for (size_t channels = 1; channels <= 80; channels += 15) {
1907         PReLUMicrokernelTester()
1908           .rows(rows)
1909           .channels(channels)
1910           .Test(xnn_f32_prelu_ukernel__avx_2x16);
1911       }
1912     }
1913   }
1914 
TEST(F32_PRELU__AVX_2X16,rows_gt_2)1915   TEST(F32_PRELU__AVX_2X16, rows_gt_2) {
1916     TEST_REQUIRES_X86_AVX;
1917     for (size_t rows = 3; rows < 4; rows++) {
1918       for (size_t channels = 1; channels <= 80; channels += 15) {
1919         PReLUMicrokernelTester()
1920           .rows(rows)
1921           .channels(channels)
1922           .Test(xnn_f32_prelu_ukernel__avx_2x16);
1923       }
1924     }
1925   }
1926 
TEST(F32_PRELU__AVX_2X16,input_stride)1927   TEST(F32_PRELU__AVX_2X16, input_stride) {
1928     TEST_REQUIRES_X86_AVX;
1929     for (size_t rows = 1; rows <= 6; rows += 1) {
1930       for (size_t channels = 1; channels <= 80; channels += 15) {
1931         PReLUMicrokernelTester()
1932           .rows(rows)
1933           .channels(channels)
1934           .input_stride(83)
1935           .iterations(1)
1936           .Test(xnn_f32_prelu_ukernel__avx_2x16);
1937       }
1938     }
1939   }
1940 
TEST(F32_PRELU__AVX_2X16,output_stride)1941   TEST(F32_PRELU__AVX_2X16, output_stride) {
1942     TEST_REQUIRES_X86_AVX;
1943     for (size_t rows = 1; rows <= 6; rows += 1) {
1944       for (size_t channels = 1; channels <= 80; channels += 15) {
1945         PReLUMicrokernelTester()
1946           .rows(rows)
1947           .channels(channels)
1948           .output_stride(83)
1949           .iterations(1)
1950           .Test(xnn_f32_prelu_ukernel__avx_2x16);
1951       }
1952     }
1953   }
1954 
TEST(F32_PRELU__AVX_2X16,inplace)1955   TEST(F32_PRELU__AVX_2X16, inplace) {
1956     TEST_REQUIRES_X86_AVX;
1957     for (size_t rows = 1; rows <= 6; rows += 1) {
1958       for (size_t channels = 1; channels <= 80; channels += 15) {
1959         PReLUMicrokernelTester()
1960           .rows(rows)
1961           .channels(channels)
1962           .inplace(true)
1963           .iterations(1)
1964           .Test(xnn_f32_prelu_ukernel__avx_2x16);
1965       }
1966     }
1967   }
1968 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1969 
1970 
1971 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_PRELU__AVX512F_2X16,channels_eq_16)1972   TEST(F32_PRELU__AVX512F_2X16, channels_eq_16) {
1973     TEST_REQUIRES_X86_AVX512F;
1974     PReLUMicrokernelTester()
1975       .rows(2)
1976       .channels(16)
1977       .Test(xnn_f32_prelu_ukernel__avx512f_2x16);
1978   }
1979 
TEST(F32_PRELU__AVX512F_2X16,channels_div_16)1980   TEST(F32_PRELU__AVX512F_2X16, channels_div_16) {
1981     TEST_REQUIRES_X86_AVX512F;
1982     for (size_t channels = 32; channels < 160; channels += 16) {
1983       PReLUMicrokernelTester()
1984         .rows(2)
1985         .channels(channels)
1986         .Test(xnn_f32_prelu_ukernel__avx512f_2x16);
1987     }
1988   }
1989 
TEST(F32_PRELU__AVX512F_2X16,channels_lt_16)1990   TEST(F32_PRELU__AVX512F_2X16, channels_lt_16) {
1991     TEST_REQUIRES_X86_AVX512F;
1992     for (size_t channels = 1; channels < 16; channels++) {
1993       PReLUMicrokernelTester()
1994         .rows(2)
1995         .channels(channels)
1996         .Test(xnn_f32_prelu_ukernel__avx512f_2x16);
1997     }
1998   }
1999 
TEST(F32_PRELU__AVX512F_2X16,channels_gt_16)2000   TEST(F32_PRELU__AVX512F_2X16, channels_gt_16) {
2001     TEST_REQUIRES_X86_AVX512F;
2002     for (size_t channels = 17; channels < 32; channels++) {
2003       PReLUMicrokernelTester()
2004         .rows(2)
2005         .channels(channels)
2006         .Test(xnn_f32_prelu_ukernel__avx512f_2x16);
2007     }
2008   }
2009 
TEST(F32_PRELU__AVX512F_2X16,rows_lt_2)2010   TEST(F32_PRELU__AVX512F_2X16, rows_lt_2) {
2011     TEST_REQUIRES_X86_AVX512F;
2012     for (size_t rows = 1; rows < 2; rows++) {
2013       for (size_t channels = 1; channels <= 80; channels += 15) {
2014         PReLUMicrokernelTester()
2015           .rows(rows)
2016           .channels(channels)
2017           .Test(xnn_f32_prelu_ukernel__avx512f_2x16);
2018       }
2019     }
2020   }
2021 
TEST(F32_PRELU__AVX512F_2X16,rows_div_2)2022   TEST(F32_PRELU__AVX512F_2X16, rows_div_2) {
2023     TEST_REQUIRES_X86_AVX512F;
2024     for (size_t rows = 4; rows <= 8; rows += 2) {
2025       for (size_t channels = 1; channels <= 80; channels += 15) {
2026         PReLUMicrokernelTester()
2027           .rows(rows)
2028           .channels(channels)
2029           .Test(xnn_f32_prelu_ukernel__avx512f_2x16);
2030       }
2031     }
2032   }
2033 
TEST(F32_PRELU__AVX512F_2X16,rows_gt_2)2034   TEST(F32_PRELU__AVX512F_2X16, rows_gt_2) {
2035     TEST_REQUIRES_X86_AVX512F;
2036     for (size_t rows = 3; rows < 4; rows++) {
2037       for (size_t channels = 1; channels <= 80; channels += 15) {
2038         PReLUMicrokernelTester()
2039           .rows(rows)
2040           .channels(channels)
2041           .Test(xnn_f32_prelu_ukernel__avx512f_2x16);
2042       }
2043     }
2044   }
2045 
TEST(F32_PRELU__AVX512F_2X16,input_stride)2046   TEST(F32_PRELU__AVX512F_2X16, input_stride) {
2047     TEST_REQUIRES_X86_AVX512F;
2048     for (size_t rows = 1; rows <= 6; rows += 1) {
2049       for (size_t channels = 1; channels <= 80; channels += 15) {
2050         PReLUMicrokernelTester()
2051           .rows(rows)
2052           .channels(channels)
2053           .input_stride(83)
2054           .iterations(1)
2055           .Test(xnn_f32_prelu_ukernel__avx512f_2x16);
2056       }
2057     }
2058   }
2059 
TEST(F32_PRELU__AVX512F_2X16,output_stride)2060   TEST(F32_PRELU__AVX512F_2X16, output_stride) {
2061     TEST_REQUIRES_X86_AVX512F;
2062     for (size_t rows = 1; rows <= 6; rows += 1) {
2063       for (size_t channels = 1; channels <= 80; channels += 15) {
2064         PReLUMicrokernelTester()
2065           .rows(rows)
2066           .channels(channels)
2067           .output_stride(83)
2068           .iterations(1)
2069           .Test(xnn_f32_prelu_ukernel__avx512f_2x16);
2070       }
2071     }
2072   }
2073 
TEST(F32_PRELU__AVX512F_2X16,inplace)2074   TEST(F32_PRELU__AVX512F_2X16, inplace) {
2075     TEST_REQUIRES_X86_AVX512F;
2076     for (size_t rows = 1; rows <= 6; rows += 1) {
2077       for (size_t channels = 1; channels <= 80; channels += 15) {
2078         PReLUMicrokernelTester()
2079           .rows(rows)
2080           .channels(channels)
2081           .inplace(true)
2082           .iterations(1)
2083           .Test(xnn_f32_prelu_ukernel__avx512f_2x16);
2084       }
2085     }
2086   }
2087 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2088 
2089 
2090 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_PRELU__AVX512F_2X32,channels_eq_32)2091   TEST(F32_PRELU__AVX512F_2X32, channels_eq_32) {
2092     TEST_REQUIRES_X86_AVX512F;
2093     PReLUMicrokernelTester()
2094       .rows(2)
2095       .channels(32)
2096       .Test(xnn_f32_prelu_ukernel__avx512f_2x32);
2097   }
2098 
TEST(F32_PRELU__AVX512F_2X32,channels_div_32)2099   TEST(F32_PRELU__AVX512F_2X32, channels_div_32) {
2100     TEST_REQUIRES_X86_AVX512F;
2101     for (size_t channels = 64; channels < 320; channels += 32) {
2102       PReLUMicrokernelTester()
2103         .rows(2)
2104         .channels(channels)
2105         .Test(xnn_f32_prelu_ukernel__avx512f_2x32);
2106     }
2107   }
2108 
TEST(F32_PRELU__AVX512F_2X32,channels_lt_32)2109   TEST(F32_PRELU__AVX512F_2X32, channels_lt_32) {
2110     TEST_REQUIRES_X86_AVX512F;
2111     for (size_t channels = 1; channels < 32; channels++) {
2112       PReLUMicrokernelTester()
2113         .rows(2)
2114         .channels(channels)
2115         .Test(xnn_f32_prelu_ukernel__avx512f_2x32);
2116     }
2117   }
2118 
TEST(F32_PRELU__AVX512F_2X32,channels_gt_32)2119   TEST(F32_PRELU__AVX512F_2X32, channels_gt_32) {
2120     TEST_REQUIRES_X86_AVX512F;
2121     for (size_t channels = 33; channels < 64; channels++) {
2122       PReLUMicrokernelTester()
2123         .rows(2)
2124         .channels(channels)
2125         .Test(xnn_f32_prelu_ukernel__avx512f_2x32);
2126     }
2127   }
2128 
TEST(F32_PRELU__AVX512F_2X32,rows_lt_2)2129   TEST(F32_PRELU__AVX512F_2X32, rows_lt_2) {
2130     TEST_REQUIRES_X86_AVX512F;
2131     for (size_t rows = 1; rows < 2; rows++) {
2132       for (size_t channels = 1; channels <= 160; channels += 31) {
2133         PReLUMicrokernelTester()
2134           .rows(rows)
2135           .channels(channels)
2136           .Test(xnn_f32_prelu_ukernel__avx512f_2x32);
2137       }
2138     }
2139   }
2140 
TEST(F32_PRELU__AVX512F_2X32,rows_div_2)2141   TEST(F32_PRELU__AVX512F_2X32, rows_div_2) {
2142     TEST_REQUIRES_X86_AVX512F;
2143     for (size_t rows = 4; rows <= 8; rows += 2) {
2144       for (size_t channels = 1; channels <= 160; channels += 31) {
2145         PReLUMicrokernelTester()
2146           .rows(rows)
2147           .channels(channels)
2148           .Test(xnn_f32_prelu_ukernel__avx512f_2x32);
2149       }
2150     }
2151   }
2152 
TEST(F32_PRELU__AVX512F_2X32,rows_gt_2)2153   TEST(F32_PRELU__AVX512F_2X32, rows_gt_2) {
2154     TEST_REQUIRES_X86_AVX512F;
2155     for (size_t rows = 3; rows < 4; rows++) {
2156       for (size_t channels = 1; channels <= 160; channels += 31) {
2157         PReLUMicrokernelTester()
2158           .rows(rows)
2159           .channels(channels)
2160           .Test(xnn_f32_prelu_ukernel__avx512f_2x32);
2161       }
2162     }
2163   }
2164 
TEST(F32_PRELU__AVX512F_2X32,input_stride)2165   TEST(F32_PRELU__AVX512F_2X32, input_stride) {
2166     TEST_REQUIRES_X86_AVX512F;
2167     for (size_t rows = 1; rows <= 6; rows += 1) {
2168       for (size_t channels = 1; channels <= 160; channels += 31) {
2169         PReLUMicrokernelTester()
2170           .rows(rows)
2171           .channels(channels)
2172           .input_stride(163)
2173           .iterations(1)
2174           .Test(xnn_f32_prelu_ukernel__avx512f_2x32);
2175       }
2176     }
2177   }
2178 
TEST(F32_PRELU__AVX512F_2X32,output_stride)2179   TEST(F32_PRELU__AVX512F_2X32, output_stride) {
2180     TEST_REQUIRES_X86_AVX512F;
2181     for (size_t rows = 1; rows <= 6; rows += 1) {
2182       for (size_t channels = 1; channels <= 160; channels += 31) {
2183         PReLUMicrokernelTester()
2184           .rows(rows)
2185           .channels(channels)
2186           .output_stride(163)
2187           .iterations(1)
2188           .Test(xnn_f32_prelu_ukernel__avx512f_2x32);
2189       }
2190     }
2191   }
2192 
TEST(F32_PRELU__AVX512F_2X32,inplace)2193   TEST(F32_PRELU__AVX512F_2X32, inplace) {
2194     TEST_REQUIRES_X86_AVX512F;
2195     for (size_t rows = 1; rows <= 6; rows += 1) {
2196       for (size_t channels = 1; channels <= 160; channels += 31) {
2197         PReLUMicrokernelTester()
2198           .rows(rows)
2199           .channels(channels)
2200           .inplace(true)
2201           .iterations(1)
2202           .Test(xnn_f32_prelu_ukernel__avx512f_2x32);
2203       }
2204     }
2205   }
2206 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2207 
2208 
2209 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4,channels_eq_4)2210   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4, channels_eq_4) {
2211     PReLUMicrokernelTester()
2212       .rows(1)
2213       .channels(4)
2214       .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4);
2215   }
2216 
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4,channels_div_4)2217   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4, channels_div_4) {
2218     for (size_t channels = 8; channels < 40; channels += 4) {
2219       PReLUMicrokernelTester()
2220         .rows(1)
2221         .channels(channels)
2222         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4);
2223     }
2224   }
2225 
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4,channels_lt_4)2226   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4, channels_lt_4) {
2227     for (size_t channels = 1; channels < 4; channels++) {
2228       PReLUMicrokernelTester()
2229         .rows(1)
2230         .channels(channels)
2231         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4);
2232     }
2233   }
2234 
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4,channels_gt_4)2235   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4, channels_gt_4) {
2236     for (size_t channels = 5; channels < 8; channels++) {
2237       PReLUMicrokernelTester()
2238         .rows(1)
2239         .channels(channels)
2240         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4);
2241     }
2242   }
2243 
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4,rows_gt_1)2244   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4, rows_gt_1) {
2245     for (size_t rows = 2; rows < 2; rows++) {
2246       for (size_t channels = 1; channels <= 20; channels += 3) {
2247         PReLUMicrokernelTester()
2248           .rows(rows)
2249           .channels(channels)
2250           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4);
2251       }
2252     }
2253   }
2254 
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4,input_stride)2255   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4, input_stride) {
2256     for (size_t rows = 1; rows <= 3; rows += 1) {
2257       for (size_t channels = 1; channels <= 20; channels += 3) {
2258         PReLUMicrokernelTester()
2259           .rows(rows)
2260           .channels(channels)
2261           .input_stride(23)
2262           .iterations(1)
2263           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4);
2264       }
2265     }
2266   }
2267 
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4,output_stride)2268   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4, output_stride) {
2269     for (size_t rows = 1; rows <= 3; rows += 1) {
2270       for (size_t channels = 1; channels <= 20; channels += 3) {
2271         PReLUMicrokernelTester()
2272           .rows(rows)
2273           .channels(channels)
2274           .output_stride(23)
2275           .iterations(1)
2276           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4);
2277       }
2278     }
2279   }
2280 
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4,inplace)2281   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4, inplace) {
2282     for (size_t rows = 1; rows <= 3; rows += 1) {
2283       for (size_t channels = 1; channels <= 20; channels += 3) {
2284         PReLUMicrokernelTester()
2285           .rows(rows)
2286           .channels(channels)
2287           .inplace(true)
2288           .iterations(1)
2289           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4);
2290       }
2291     }
2292   }
2293 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2294 
2295 
2296 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8,channels_eq_8)2297   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8, channels_eq_8) {
2298     PReLUMicrokernelTester()
2299       .rows(1)
2300       .channels(8)
2301       .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8);
2302   }
2303 
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8,channels_div_8)2304   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8, channels_div_8) {
2305     for (size_t channels = 16; channels < 80; channels += 8) {
2306       PReLUMicrokernelTester()
2307         .rows(1)
2308         .channels(channels)
2309         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8);
2310     }
2311   }
2312 
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8,channels_lt_8)2313   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8, channels_lt_8) {
2314     for (size_t channels = 1; channels < 8; channels++) {
2315       PReLUMicrokernelTester()
2316         .rows(1)
2317         .channels(channels)
2318         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8);
2319     }
2320   }
2321 
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8,channels_gt_8)2322   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8, channels_gt_8) {
2323     for (size_t channels = 9; channels < 16; channels++) {
2324       PReLUMicrokernelTester()
2325         .rows(1)
2326         .channels(channels)
2327         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8);
2328     }
2329   }
2330 
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8,rows_gt_1)2331   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8, rows_gt_1) {
2332     for (size_t rows = 2; rows < 2; rows++) {
2333       for (size_t channels = 1; channels <= 40; channels += 7) {
2334         PReLUMicrokernelTester()
2335           .rows(rows)
2336           .channels(channels)
2337           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8);
2338       }
2339     }
2340   }
2341 
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8,input_stride)2342   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8, input_stride) {
2343     for (size_t rows = 1; rows <= 3; rows += 1) {
2344       for (size_t channels = 1; channels <= 40; channels += 7) {
2345         PReLUMicrokernelTester()
2346           .rows(rows)
2347           .channels(channels)
2348           .input_stride(43)
2349           .iterations(1)
2350           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8);
2351       }
2352     }
2353   }
2354 
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8,output_stride)2355   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8, output_stride) {
2356     for (size_t rows = 1; rows <= 3; rows += 1) {
2357       for (size_t channels = 1; channels <= 40; channels += 7) {
2358         PReLUMicrokernelTester()
2359           .rows(rows)
2360           .channels(channels)
2361           .output_stride(43)
2362           .iterations(1)
2363           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8);
2364       }
2365     }
2366   }
2367 
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8,inplace)2368   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8, inplace) {
2369     for (size_t rows = 1; rows <= 3; rows += 1) {
2370       for (size_t channels = 1; channels <= 40; channels += 7) {
2371         PReLUMicrokernelTester()
2372           .rows(rows)
2373           .channels(channels)
2374           .inplace(true)
2375           .iterations(1)
2376           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8);
2377       }
2378     }
2379   }
2380 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2381 
2382 
2383 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16,channels_eq_16)2384   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16, channels_eq_16) {
2385     PReLUMicrokernelTester()
2386       .rows(1)
2387       .channels(16)
2388       .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16);
2389   }
2390 
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16,channels_div_16)2391   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16, channels_div_16) {
2392     for (size_t channels = 32; channels < 160; channels += 16) {
2393       PReLUMicrokernelTester()
2394         .rows(1)
2395         .channels(channels)
2396         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16);
2397     }
2398   }
2399 
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16,channels_lt_16)2400   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16, channels_lt_16) {
2401     for (size_t channels = 1; channels < 16; channels++) {
2402       PReLUMicrokernelTester()
2403         .rows(1)
2404         .channels(channels)
2405         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16);
2406     }
2407   }
2408 
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16,channels_gt_16)2409   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16, channels_gt_16) {
2410     for (size_t channels = 17; channels < 32; channels++) {
2411       PReLUMicrokernelTester()
2412         .rows(1)
2413         .channels(channels)
2414         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16);
2415     }
2416   }
2417 
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16,rows_gt_1)2418   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16, rows_gt_1) {
2419     for (size_t rows = 2; rows < 2; rows++) {
2420       for (size_t channels = 1; channels <= 80; channels += 15) {
2421         PReLUMicrokernelTester()
2422           .rows(rows)
2423           .channels(channels)
2424           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16);
2425       }
2426     }
2427   }
2428 
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16,input_stride)2429   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16, input_stride) {
2430     for (size_t rows = 1; rows <= 3; rows += 1) {
2431       for (size_t channels = 1; channels <= 80; channels += 15) {
2432         PReLUMicrokernelTester()
2433           .rows(rows)
2434           .channels(channels)
2435           .input_stride(83)
2436           .iterations(1)
2437           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16);
2438       }
2439     }
2440   }
2441 
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16,output_stride)2442   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16, output_stride) {
2443     for (size_t rows = 1; rows <= 3; rows += 1) {
2444       for (size_t channels = 1; channels <= 80; channels += 15) {
2445         PReLUMicrokernelTester()
2446           .rows(rows)
2447           .channels(channels)
2448           .output_stride(83)
2449           .iterations(1)
2450           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16);
2451       }
2452     }
2453   }
2454 
TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16,inplace)2455   TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16, inplace) {
2456     for (size_t rows = 1; rows <= 3; rows += 1) {
2457       for (size_t channels = 1; channels <= 80; channels += 15) {
2458         PReLUMicrokernelTester()
2459           .rows(rows)
2460           .channels(channels)
2461           .inplace(true)
2462           .iterations(1)
2463           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16);
2464       }
2465     }
2466   }
2467 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2468 
2469 
2470 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4,channels_eq_4)2471   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4, channels_eq_4) {
2472     PReLUMicrokernelTester()
2473       .rows(2)
2474       .channels(4)
2475       .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4);
2476   }
2477 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4,channels_div_4)2478   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4, channels_div_4) {
2479     for (size_t channels = 8; channels < 40; channels += 4) {
2480       PReLUMicrokernelTester()
2481         .rows(2)
2482         .channels(channels)
2483         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4);
2484     }
2485   }
2486 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4,channels_lt_4)2487   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4, channels_lt_4) {
2488     for (size_t channels = 1; channels < 4; channels++) {
2489       PReLUMicrokernelTester()
2490         .rows(2)
2491         .channels(channels)
2492         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4);
2493     }
2494   }
2495 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4,channels_gt_4)2496   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4, channels_gt_4) {
2497     for (size_t channels = 5; channels < 8; channels++) {
2498       PReLUMicrokernelTester()
2499         .rows(2)
2500         .channels(channels)
2501         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4);
2502     }
2503   }
2504 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4,rows_lt_2)2505   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4, rows_lt_2) {
2506     for (size_t rows = 1; rows < 2; rows++) {
2507       for (size_t channels = 1; channels <= 20; channels += 3) {
2508         PReLUMicrokernelTester()
2509           .rows(rows)
2510           .channels(channels)
2511           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4);
2512       }
2513     }
2514   }
2515 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4,rows_div_2)2516   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4, rows_div_2) {
2517     for (size_t rows = 4; rows <= 8; rows += 2) {
2518       for (size_t channels = 1; channels <= 20; channels += 3) {
2519         PReLUMicrokernelTester()
2520           .rows(rows)
2521           .channels(channels)
2522           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4);
2523       }
2524     }
2525   }
2526 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4,rows_gt_2)2527   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4, rows_gt_2) {
2528     for (size_t rows = 3; rows < 4; rows++) {
2529       for (size_t channels = 1; channels <= 20; channels += 3) {
2530         PReLUMicrokernelTester()
2531           .rows(rows)
2532           .channels(channels)
2533           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4);
2534       }
2535     }
2536   }
2537 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4,input_stride)2538   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4, input_stride) {
2539     for (size_t rows = 1; rows <= 6; rows += 1) {
2540       for (size_t channels = 1; channels <= 20; channels += 3) {
2541         PReLUMicrokernelTester()
2542           .rows(rows)
2543           .channels(channels)
2544           .input_stride(23)
2545           .iterations(1)
2546           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4);
2547       }
2548     }
2549   }
2550 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4,output_stride)2551   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4, output_stride) {
2552     for (size_t rows = 1; rows <= 6; rows += 1) {
2553       for (size_t channels = 1; channels <= 20; channels += 3) {
2554         PReLUMicrokernelTester()
2555           .rows(rows)
2556           .channels(channels)
2557           .output_stride(23)
2558           .iterations(1)
2559           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4);
2560       }
2561     }
2562   }
2563 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4,inplace)2564   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4, inplace) {
2565     for (size_t rows = 1; rows <= 6; rows += 1) {
2566       for (size_t channels = 1; channels <= 20; channels += 3) {
2567         PReLUMicrokernelTester()
2568           .rows(rows)
2569           .channels(channels)
2570           .inplace(true)
2571           .iterations(1)
2572           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4);
2573       }
2574     }
2575   }
2576 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2577 
2578 
2579 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8,channels_eq_8)2580   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8, channels_eq_8) {
2581     PReLUMicrokernelTester()
2582       .rows(2)
2583       .channels(8)
2584       .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8);
2585   }
2586 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8,channels_div_8)2587   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8, channels_div_8) {
2588     for (size_t channels = 16; channels < 80; channels += 8) {
2589       PReLUMicrokernelTester()
2590         .rows(2)
2591         .channels(channels)
2592         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8);
2593     }
2594   }
2595 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8,channels_lt_8)2596   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8, channels_lt_8) {
2597     for (size_t channels = 1; channels < 8; channels++) {
2598       PReLUMicrokernelTester()
2599         .rows(2)
2600         .channels(channels)
2601         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8);
2602     }
2603   }
2604 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8,channels_gt_8)2605   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8, channels_gt_8) {
2606     for (size_t channels = 9; channels < 16; channels++) {
2607       PReLUMicrokernelTester()
2608         .rows(2)
2609         .channels(channels)
2610         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8);
2611     }
2612   }
2613 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8,rows_lt_2)2614   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8, rows_lt_2) {
2615     for (size_t rows = 1; rows < 2; rows++) {
2616       for (size_t channels = 1; channels <= 40; channels += 7) {
2617         PReLUMicrokernelTester()
2618           .rows(rows)
2619           .channels(channels)
2620           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8);
2621       }
2622     }
2623   }
2624 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8,rows_div_2)2625   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8, rows_div_2) {
2626     for (size_t rows = 4; rows <= 8; rows += 2) {
2627       for (size_t channels = 1; channels <= 40; channels += 7) {
2628         PReLUMicrokernelTester()
2629           .rows(rows)
2630           .channels(channels)
2631           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8);
2632       }
2633     }
2634   }
2635 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8,rows_gt_2)2636   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8, rows_gt_2) {
2637     for (size_t rows = 3; rows < 4; rows++) {
2638       for (size_t channels = 1; channels <= 40; channels += 7) {
2639         PReLUMicrokernelTester()
2640           .rows(rows)
2641           .channels(channels)
2642           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8);
2643       }
2644     }
2645   }
2646 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8,input_stride)2647   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8, input_stride) {
2648     for (size_t rows = 1; rows <= 6; rows += 1) {
2649       for (size_t channels = 1; channels <= 40; channels += 7) {
2650         PReLUMicrokernelTester()
2651           .rows(rows)
2652           .channels(channels)
2653           .input_stride(43)
2654           .iterations(1)
2655           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8);
2656       }
2657     }
2658   }
2659 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8,output_stride)2660   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8, output_stride) {
2661     for (size_t rows = 1; rows <= 6; rows += 1) {
2662       for (size_t channels = 1; channels <= 40; channels += 7) {
2663         PReLUMicrokernelTester()
2664           .rows(rows)
2665           .channels(channels)
2666           .output_stride(43)
2667           .iterations(1)
2668           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8);
2669       }
2670     }
2671   }
2672 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8,inplace)2673   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8, inplace) {
2674     for (size_t rows = 1; rows <= 6; rows += 1) {
2675       for (size_t channels = 1; channels <= 40; channels += 7) {
2676         PReLUMicrokernelTester()
2677           .rows(rows)
2678           .channels(channels)
2679           .inplace(true)
2680           .iterations(1)
2681           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8);
2682       }
2683     }
2684   }
2685 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2686 
2687 
2688 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16,channels_eq_16)2689   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16, channels_eq_16) {
2690     PReLUMicrokernelTester()
2691       .rows(2)
2692       .channels(16)
2693       .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16);
2694   }
2695 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16,channels_div_16)2696   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16, channels_div_16) {
2697     for (size_t channels = 32; channels < 160; channels += 16) {
2698       PReLUMicrokernelTester()
2699         .rows(2)
2700         .channels(channels)
2701         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16);
2702     }
2703   }
2704 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16,channels_lt_16)2705   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16, channels_lt_16) {
2706     for (size_t channels = 1; channels < 16; channels++) {
2707       PReLUMicrokernelTester()
2708         .rows(2)
2709         .channels(channels)
2710         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16);
2711     }
2712   }
2713 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16,channels_gt_16)2714   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16, channels_gt_16) {
2715     for (size_t channels = 17; channels < 32; channels++) {
2716       PReLUMicrokernelTester()
2717         .rows(2)
2718         .channels(channels)
2719         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16);
2720     }
2721   }
2722 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16,rows_lt_2)2723   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16, rows_lt_2) {
2724     for (size_t rows = 1; rows < 2; rows++) {
2725       for (size_t channels = 1; channels <= 80; channels += 15) {
2726         PReLUMicrokernelTester()
2727           .rows(rows)
2728           .channels(channels)
2729           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16);
2730       }
2731     }
2732   }
2733 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16,rows_div_2)2734   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16, rows_div_2) {
2735     for (size_t rows = 4; rows <= 8; rows += 2) {
2736       for (size_t channels = 1; channels <= 80; channels += 15) {
2737         PReLUMicrokernelTester()
2738           .rows(rows)
2739           .channels(channels)
2740           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16);
2741       }
2742     }
2743   }
2744 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16,rows_gt_2)2745   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16, rows_gt_2) {
2746     for (size_t rows = 3; rows < 4; rows++) {
2747       for (size_t channels = 1; channels <= 80; channels += 15) {
2748         PReLUMicrokernelTester()
2749           .rows(rows)
2750           .channels(channels)
2751           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16);
2752       }
2753     }
2754   }
2755 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16,input_stride)2756   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16, input_stride) {
2757     for (size_t rows = 1; rows <= 6; rows += 1) {
2758       for (size_t channels = 1; channels <= 80; channels += 15) {
2759         PReLUMicrokernelTester()
2760           .rows(rows)
2761           .channels(channels)
2762           .input_stride(83)
2763           .iterations(1)
2764           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16);
2765       }
2766     }
2767   }
2768 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16,output_stride)2769   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16, output_stride) {
2770     for (size_t rows = 1; rows <= 6; rows += 1) {
2771       for (size_t channels = 1; channels <= 80; channels += 15) {
2772         PReLUMicrokernelTester()
2773           .rows(rows)
2774           .channels(channels)
2775           .output_stride(83)
2776           .iterations(1)
2777           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16);
2778       }
2779     }
2780   }
2781 
TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16,inplace)2782   TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16, inplace) {
2783     for (size_t rows = 1; rows <= 6; rows += 1) {
2784       for (size_t channels = 1; channels <= 80; channels += 15) {
2785         PReLUMicrokernelTester()
2786           .rows(rows)
2787           .channels(channels)
2788           .inplace(true)
2789           .iterations(1)
2790           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16);
2791       }
2792     }
2793   }
2794 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2795 
2796 
2797 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4,channels_eq_4)2798   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4, channels_eq_4) {
2799     PReLUMicrokernelTester()
2800       .rows(4)
2801       .channels(4)
2802       .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4);
2803   }
2804 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4,channels_div_4)2805   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4, channels_div_4) {
2806     for (size_t channels = 8; channels < 40; channels += 4) {
2807       PReLUMicrokernelTester()
2808         .rows(4)
2809         .channels(channels)
2810         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4);
2811     }
2812   }
2813 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4,channels_lt_4)2814   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4, channels_lt_4) {
2815     for (size_t channels = 1; channels < 4; channels++) {
2816       PReLUMicrokernelTester()
2817         .rows(4)
2818         .channels(channels)
2819         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4);
2820     }
2821   }
2822 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4,channels_gt_4)2823   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4, channels_gt_4) {
2824     for (size_t channels = 5; channels < 8; channels++) {
2825       PReLUMicrokernelTester()
2826         .rows(4)
2827         .channels(channels)
2828         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4);
2829     }
2830   }
2831 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4,rows_lt_4)2832   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4, rows_lt_4) {
2833     for (size_t rows = 1; rows < 4; rows++) {
2834       for (size_t channels = 1; channels <= 20; channels += 3) {
2835         PReLUMicrokernelTester()
2836           .rows(rows)
2837           .channels(channels)
2838           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4);
2839       }
2840     }
2841   }
2842 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4,rows_div_4)2843   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4, rows_div_4) {
2844     for (size_t rows = 8; rows <= 16; rows += 4) {
2845       for (size_t channels = 1; channels <= 20; channels += 3) {
2846         PReLUMicrokernelTester()
2847           .rows(rows)
2848           .channels(channels)
2849           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4);
2850       }
2851     }
2852   }
2853 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4,rows_gt_4)2854   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4, rows_gt_4) {
2855     for (size_t rows = 5; rows < 8; rows++) {
2856       for (size_t channels = 1; channels <= 20; channels += 3) {
2857         PReLUMicrokernelTester()
2858           .rows(rows)
2859           .channels(channels)
2860           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4);
2861       }
2862     }
2863   }
2864 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4,input_stride)2865   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4, input_stride) {
2866     for (size_t rows = 1; rows <= 12; rows += 3) {
2867       for (size_t channels = 1; channels <= 20; channels += 3) {
2868         PReLUMicrokernelTester()
2869           .rows(rows)
2870           .channels(channels)
2871           .input_stride(23)
2872           .iterations(1)
2873           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4);
2874       }
2875     }
2876   }
2877 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4,output_stride)2878   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4, output_stride) {
2879     for (size_t rows = 1; rows <= 12; rows += 3) {
2880       for (size_t channels = 1; channels <= 20; channels += 3) {
2881         PReLUMicrokernelTester()
2882           .rows(rows)
2883           .channels(channels)
2884           .output_stride(23)
2885           .iterations(1)
2886           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4);
2887       }
2888     }
2889   }
2890 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4,inplace)2891   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4, inplace) {
2892     for (size_t rows = 1; rows <= 12; rows += 3) {
2893       for (size_t channels = 1; channels <= 20; channels += 3) {
2894         PReLUMicrokernelTester()
2895           .rows(rows)
2896           .channels(channels)
2897           .inplace(true)
2898           .iterations(1)
2899           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4);
2900       }
2901     }
2902   }
2903 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2904 
2905 
2906 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8,channels_eq_8)2907   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8, channels_eq_8) {
2908     PReLUMicrokernelTester()
2909       .rows(4)
2910       .channels(8)
2911       .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8);
2912   }
2913 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8,channels_div_8)2914   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8, channels_div_8) {
2915     for (size_t channels = 16; channels < 80; channels += 8) {
2916       PReLUMicrokernelTester()
2917         .rows(4)
2918         .channels(channels)
2919         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8);
2920     }
2921   }
2922 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8,channels_lt_8)2923   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8, channels_lt_8) {
2924     for (size_t channels = 1; channels < 8; channels++) {
2925       PReLUMicrokernelTester()
2926         .rows(4)
2927         .channels(channels)
2928         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8);
2929     }
2930   }
2931 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8,channels_gt_8)2932   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8, channels_gt_8) {
2933     for (size_t channels = 9; channels < 16; channels++) {
2934       PReLUMicrokernelTester()
2935         .rows(4)
2936         .channels(channels)
2937         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8);
2938     }
2939   }
2940 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8,rows_lt_4)2941   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8, rows_lt_4) {
2942     for (size_t rows = 1; rows < 4; rows++) {
2943       for (size_t channels = 1; channels <= 40; channels += 7) {
2944         PReLUMicrokernelTester()
2945           .rows(rows)
2946           .channels(channels)
2947           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8);
2948       }
2949     }
2950   }
2951 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8,rows_div_4)2952   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8, rows_div_4) {
2953     for (size_t rows = 8; rows <= 16; rows += 4) {
2954       for (size_t channels = 1; channels <= 40; channels += 7) {
2955         PReLUMicrokernelTester()
2956           .rows(rows)
2957           .channels(channels)
2958           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8);
2959       }
2960     }
2961   }
2962 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8,rows_gt_4)2963   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8, rows_gt_4) {
2964     for (size_t rows = 5; rows < 8; rows++) {
2965       for (size_t channels = 1; channels <= 40; channels += 7) {
2966         PReLUMicrokernelTester()
2967           .rows(rows)
2968           .channels(channels)
2969           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8);
2970       }
2971     }
2972   }
2973 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8,input_stride)2974   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8, input_stride) {
2975     for (size_t rows = 1; rows <= 12; rows += 3) {
2976       for (size_t channels = 1; channels <= 40; channels += 7) {
2977         PReLUMicrokernelTester()
2978           .rows(rows)
2979           .channels(channels)
2980           .input_stride(43)
2981           .iterations(1)
2982           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8);
2983       }
2984     }
2985   }
2986 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8,output_stride)2987   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8, output_stride) {
2988     for (size_t rows = 1; rows <= 12; rows += 3) {
2989       for (size_t channels = 1; channels <= 40; channels += 7) {
2990         PReLUMicrokernelTester()
2991           .rows(rows)
2992           .channels(channels)
2993           .output_stride(43)
2994           .iterations(1)
2995           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8);
2996       }
2997     }
2998   }
2999 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8,inplace)3000   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8, inplace) {
3001     for (size_t rows = 1; rows <= 12; rows += 3) {
3002       for (size_t channels = 1; channels <= 40; channels += 7) {
3003         PReLUMicrokernelTester()
3004           .rows(rows)
3005           .channels(channels)
3006           .inplace(true)
3007           .iterations(1)
3008           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8);
3009       }
3010     }
3011   }
3012 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3013 
3014 
3015 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16,channels_eq_16)3016   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16, channels_eq_16) {
3017     PReLUMicrokernelTester()
3018       .rows(4)
3019       .channels(16)
3020       .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16);
3021   }
3022 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16,channels_div_16)3023   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16, channels_div_16) {
3024     for (size_t channels = 32; channels < 160; channels += 16) {
3025       PReLUMicrokernelTester()
3026         .rows(4)
3027         .channels(channels)
3028         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16);
3029     }
3030   }
3031 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16,channels_lt_16)3032   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16, channels_lt_16) {
3033     for (size_t channels = 1; channels < 16; channels++) {
3034       PReLUMicrokernelTester()
3035         .rows(4)
3036         .channels(channels)
3037         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16);
3038     }
3039   }
3040 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16,channels_gt_16)3041   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16, channels_gt_16) {
3042     for (size_t channels = 17; channels < 32; channels++) {
3043       PReLUMicrokernelTester()
3044         .rows(4)
3045         .channels(channels)
3046         .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16);
3047     }
3048   }
3049 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16,rows_lt_4)3050   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16, rows_lt_4) {
3051     for (size_t rows = 1; rows < 4; rows++) {
3052       for (size_t channels = 1; channels <= 80; channels += 15) {
3053         PReLUMicrokernelTester()
3054           .rows(rows)
3055           .channels(channels)
3056           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16);
3057       }
3058     }
3059   }
3060 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16,rows_div_4)3061   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16, rows_div_4) {
3062     for (size_t rows = 8; rows <= 16; rows += 4) {
3063       for (size_t channels = 1; channels <= 80; channels += 15) {
3064         PReLUMicrokernelTester()
3065           .rows(rows)
3066           .channels(channels)
3067           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16);
3068       }
3069     }
3070   }
3071 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16,rows_gt_4)3072   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16, rows_gt_4) {
3073     for (size_t rows = 5; rows < 8; rows++) {
3074       for (size_t channels = 1; channels <= 80; channels += 15) {
3075         PReLUMicrokernelTester()
3076           .rows(rows)
3077           .channels(channels)
3078           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16);
3079       }
3080     }
3081   }
3082 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16,input_stride)3083   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16, input_stride) {
3084     for (size_t rows = 1; rows <= 12; rows += 3) {
3085       for (size_t channels = 1; channels <= 80; channels += 15) {
3086         PReLUMicrokernelTester()
3087           .rows(rows)
3088           .channels(channels)
3089           .input_stride(83)
3090           .iterations(1)
3091           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16);
3092       }
3093     }
3094   }
3095 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16,output_stride)3096   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16, output_stride) {
3097     for (size_t rows = 1; rows <= 12; rows += 3) {
3098       for (size_t channels = 1; channels <= 80; channels += 15) {
3099         PReLUMicrokernelTester()
3100           .rows(rows)
3101           .channels(channels)
3102           .output_stride(83)
3103           .iterations(1)
3104           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16);
3105       }
3106     }
3107   }
3108 
TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16,inplace)3109   TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16, inplace) {
3110     for (size_t rows = 1; rows <= 12; rows += 3) {
3111       for (size_t channels = 1; channels <= 80; channels += 15) {
3112         PReLUMicrokernelTester()
3113           .rows(rows)
3114           .channels(channels)
3115           .inplace(true)
3116           .iterations(1)
3117           .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16);
3118       }
3119     }
3120   }
3121 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3122 
3123 
3124 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PRELU__WASMSIMD_MINMAX_1X4,channels_eq_4)3125   TEST(F32_PRELU__WASMSIMD_MINMAX_1X4, channels_eq_4) {
3126     PReLUMicrokernelTester()
3127       .rows(1)
3128       .channels(4)
3129       .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4);
3130   }
3131 
TEST(F32_PRELU__WASMSIMD_MINMAX_1X4,channels_div_4)3132   TEST(F32_PRELU__WASMSIMD_MINMAX_1X4, channels_div_4) {
3133     for (size_t channels = 8; channels < 40; channels += 4) {
3134       PReLUMicrokernelTester()
3135         .rows(1)
3136         .channels(channels)
3137         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4);
3138     }
3139   }
3140 
TEST(F32_PRELU__WASMSIMD_MINMAX_1X4,channels_lt_4)3141   TEST(F32_PRELU__WASMSIMD_MINMAX_1X4, channels_lt_4) {
3142     for (size_t channels = 1; channels < 4; channels++) {
3143       PReLUMicrokernelTester()
3144         .rows(1)
3145         .channels(channels)
3146         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4);
3147     }
3148   }
3149 
TEST(F32_PRELU__WASMSIMD_MINMAX_1X4,channels_gt_4)3150   TEST(F32_PRELU__WASMSIMD_MINMAX_1X4, channels_gt_4) {
3151     for (size_t channels = 5; channels < 8; channels++) {
3152       PReLUMicrokernelTester()
3153         .rows(1)
3154         .channels(channels)
3155         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4);
3156     }
3157   }
3158 
TEST(F32_PRELU__WASMSIMD_MINMAX_1X4,rows_gt_1)3159   TEST(F32_PRELU__WASMSIMD_MINMAX_1X4, rows_gt_1) {
3160     for (size_t rows = 2; rows < 2; rows++) {
3161       for (size_t channels = 1; channels <= 20; channels += 3) {
3162         PReLUMicrokernelTester()
3163           .rows(rows)
3164           .channels(channels)
3165           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4);
3166       }
3167     }
3168   }
3169 
TEST(F32_PRELU__WASMSIMD_MINMAX_1X4,input_stride)3170   TEST(F32_PRELU__WASMSIMD_MINMAX_1X4, input_stride) {
3171     for (size_t rows = 1; rows <= 3; rows += 1) {
3172       for (size_t channels = 1; channels <= 20; channels += 3) {
3173         PReLUMicrokernelTester()
3174           .rows(rows)
3175           .channels(channels)
3176           .input_stride(23)
3177           .iterations(1)
3178           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4);
3179       }
3180     }
3181   }
3182 
TEST(F32_PRELU__WASMSIMD_MINMAX_1X4,output_stride)3183   TEST(F32_PRELU__WASMSIMD_MINMAX_1X4, output_stride) {
3184     for (size_t rows = 1; rows <= 3; rows += 1) {
3185       for (size_t channels = 1; channels <= 20; channels += 3) {
3186         PReLUMicrokernelTester()
3187           .rows(rows)
3188           .channels(channels)
3189           .output_stride(23)
3190           .iterations(1)
3191           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4);
3192       }
3193     }
3194   }
3195 
TEST(F32_PRELU__WASMSIMD_MINMAX_1X4,inplace)3196   TEST(F32_PRELU__WASMSIMD_MINMAX_1X4, inplace) {
3197     for (size_t rows = 1; rows <= 3; rows += 1) {
3198       for (size_t channels = 1; channels <= 20; channels += 3) {
3199         PReLUMicrokernelTester()
3200           .rows(rows)
3201           .channels(channels)
3202           .inplace(true)
3203           .iterations(1)
3204           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4);
3205       }
3206     }
3207   }
3208 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3209 
3210 
3211 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PRELU__WASMSIMD_MINMAX_1X8,channels_eq_8)3212   TEST(F32_PRELU__WASMSIMD_MINMAX_1X8, channels_eq_8) {
3213     PReLUMicrokernelTester()
3214       .rows(1)
3215       .channels(8)
3216       .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8);
3217   }
3218 
TEST(F32_PRELU__WASMSIMD_MINMAX_1X8,channels_div_8)3219   TEST(F32_PRELU__WASMSIMD_MINMAX_1X8, channels_div_8) {
3220     for (size_t channels = 16; channels < 80; channels += 8) {
3221       PReLUMicrokernelTester()
3222         .rows(1)
3223         .channels(channels)
3224         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8);
3225     }
3226   }
3227 
TEST(F32_PRELU__WASMSIMD_MINMAX_1X8,channels_lt_8)3228   TEST(F32_PRELU__WASMSIMD_MINMAX_1X8, channels_lt_8) {
3229     for (size_t channels = 1; channels < 8; channels++) {
3230       PReLUMicrokernelTester()
3231         .rows(1)
3232         .channels(channels)
3233         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8);
3234     }
3235   }
3236 
TEST(F32_PRELU__WASMSIMD_MINMAX_1X8,channels_gt_8)3237   TEST(F32_PRELU__WASMSIMD_MINMAX_1X8, channels_gt_8) {
3238     for (size_t channels = 9; channels < 16; channels++) {
3239       PReLUMicrokernelTester()
3240         .rows(1)
3241         .channels(channels)
3242         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8);
3243     }
3244   }
3245 
TEST(F32_PRELU__WASMSIMD_MINMAX_1X8,rows_gt_1)3246   TEST(F32_PRELU__WASMSIMD_MINMAX_1X8, rows_gt_1) {
3247     for (size_t rows = 2; rows < 2; rows++) {
3248       for (size_t channels = 1; channels <= 40; channels += 7) {
3249         PReLUMicrokernelTester()
3250           .rows(rows)
3251           .channels(channels)
3252           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8);
3253       }
3254     }
3255   }
3256 
TEST(F32_PRELU__WASMSIMD_MINMAX_1X8,input_stride)3257   TEST(F32_PRELU__WASMSIMD_MINMAX_1X8, input_stride) {
3258     for (size_t rows = 1; rows <= 3; rows += 1) {
3259       for (size_t channels = 1; channels <= 40; channels += 7) {
3260         PReLUMicrokernelTester()
3261           .rows(rows)
3262           .channels(channels)
3263           .input_stride(43)
3264           .iterations(1)
3265           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8);
3266       }
3267     }
3268   }
3269 
TEST(F32_PRELU__WASMSIMD_MINMAX_1X8,output_stride)3270   TEST(F32_PRELU__WASMSIMD_MINMAX_1X8, output_stride) {
3271     for (size_t rows = 1; rows <= 3; rows += 1) {
3272       for (size_t channels = 1; channels <= 40; channels += 7) {
3273         PReLUMicrokernelTester()
3274           .rows(rows)
3275           .channels(channels)
3276           .output_stride(43)
3277           .iterations(1)
3278           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8);
3279       }
3280     }
3281   }
3282 
TEST(F32_PRELU__WASMSIMD_MINMAX_1X8,inplace)3283   TEST(F32_PRELU__WASMSIMD_MINMAX_1X8, inplace) {
3284     for (size_t rows = 1; rows <= 3; rows += 1) {
3285       for (size_t channels = 1; channels <= 40; channels += 7) {
3286         PReLUMicrokernelTester()
3287           .rows(rows)
3288           .channels(channels)
3289           .inplace(true)
3290           .iterations(1)
3291           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8);
3292       }
3293     }
3294   }
3295 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3296 
3297 
3298 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PRELU__WASMSIMD_MINMAX_1X16,channels_eq_16)3299   TEST(F32_PRELU__WASMSIMD_MINMAX_1X16, channels_eq_16) {
3300     PReLUMicrokernelTester()
3301       .rows(1)
3302       .channels(16)
3303       .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16);
3304   }
3305 
TEST(F32_PRELU__WASMSIMD_MINMAX_1X16,channels_div_16)3306   TEST(F32_PRELU__WASMSIMD_MINMAX_1X16, channels_div_16) {
3307     for (size_t channels = 32; channels < 160; channels += 16) {
3308       PReLUMicrokernelTester()
3309         .rows(1)
3310         .channels(channels)
3311         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16);
3312     }
3313   }
3314 
TEST(F32_PRELU__WASMSIMD_MINMAX_1X16,channels_lt_16)3315   TEST(F32_PRELU__WASMSIMD_MINMAX_1X16, channels_lt_16) {
3316     for (size_t channels = 1; channels < 16; channels++) {
3317       PReLUMicrokernelTester()
3318         .rows(1)
3319         .channels(channels)
3320         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16);
3321     }
3322   }
3323 
TEST(F32_PRELU__WASMSIMD_MINMAX_1X16,channels_gt_16)3324   TEST(F32_PRELU__WASMSIMD_MINMAX_1X16, channels_gt_16) {
3325     for (size_t channels = 17; channels < 32; channels++) {
3326       PReLUMicrokernelTester()
3327         .rows(1)
3328         .channels(channels)
3329         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16);
3330     }
3331   }
3332 
TEST(F32_PRELU__WASMSIMD_MINMAX_1X16,rows_gt_1)3333   TEST(F32_PRELU__WASMSIMD_MINMAX_1X16, rows_gt_1) {
3334     for (size_t rows = 2; rows < 2; rows++) {
3335       for (size_t channels = 1; channels <= 80; channels += 15) {
3336         PReLUMicrokernelTester()
3337           .rows(rows)
3338           .channels(channels)
3339           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16);
3340       }
3341     }
3342   }
3343 
TEST(F32_PRELU__WASMSIMD_MINMAX_1X16,input_stride)3344   TEST(F32_PRELU__WASMSIMD_MINMAX_1X16, input_stride) {
3345     for (size_t rows = 1; rows <= 3; rows += 1) {
3346       for (size_t channels = 1; channels <= 80; channels += 15) {
3347         PReLUMicrokernelTester()
3348           .rows(rows)
3349           .channels(channels)
3350           .input_stride(83)
3351           .iterations(1)
3352           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16);
3353       }
3354     }
3355   }
3356 
TEST(F32_PRELU__WASMSIMD_MINMAX_1X16,output_stride)3357   TEST(F32_PRELU__WASMSIMD_MINMAX_1X16, output_stride) {
3358     for (size_t rows = 1; rows <= 3; rows += 1) {
3359       for (size_t channels = 1; channels <= 80; channels += 15) {
3360         PReLUMicrokernelTester()
3361           .rows(rows)
3362           .channels(channels)
3363           .output_stride(83)
3364           .iterations(1)
3365           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16);
3366       }
3367     }
3368   }
3369 
TEST(F32_PRELU__WASMSIMD_MINMAX_1X16,inplace)3370   TEST(F32_PRELU__WASMSIMD_MINMAX_1X16, inplace) {
3371     for (size_t rows = 1; rows <= 3; rows += 1) {
3372       for (size_t channels = 1; channels <= 80; channels += 15) {
3373         PReLUMicrokernelTester()
3374           .rows(rows)
3375           .channels(channels)
3376           .inplace(true)
3377           .iterations(1)
3378           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16);
3379       }
3380     }
3381   }
3382 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3383 
3384 
3385 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PRELU__WASMSIMD_MINMAX_2X4,channels_eq_4)3386   TEST(F32_PRELU__WASMSIMD_MINMAX_2X4, channels_eq_4) {
3387     PReLUMicrokernelTester()
3388       .rows(2)
3389       .channels(4)
3390       .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4);
3391   }
3392 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X4,channels_div_4)3393   TEST(F32_PRELU__WASMSIMD_MINMAX_2X4, channels_div_4) {
3394     for (size_t channels = 8; channels < 40; channels += 4) {
3395       PReLUMicrokernelTester()
3396         .rows(2)
3397         .channels(channels)
3398         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4);
3399     }
3400   }
3401 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X4,channels_lt_4)3402   TEST(F32_PRELU__WASMSIMD_MINMAX_2X4, channels_lt_4) {
3403     for (size_t channels = 1; channels < 4; channels++) {
3404       PReLUMicrokernelTester()
3405         .rows(2)
3406         .channels(channels)
3407         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4);
3408     }
3409   }
3410 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X4,channels_gt_4)3411   TEST(F32_PRELU__WASMSIMD_MINMAX_2X4, channels_gt_4) {
3412     for (size_t channels = 5; channels < 8; channels++) {
3413       PReLUMicrokernelTester()
3414         .rows(2)
3415         .channels(channels)
3416         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4);
3417     }
3418   }
3419 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X4,rows_lt_2)3420   TEST(F32_PRELU__WASMSIMD_MINMAX_2X4, rows_lt_2) {
3421     for (size_t rows = 1; rows < 2; rows++) {
3422       for (size_t channels = 1; channels <= 20; channels += 3) {
3423         PReLUMicrokernelTester()
3424           .rows(rows)
3425           .channels(channels)
3426           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4);
3427       }
3428     }
3429   }
3430 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X4,rows_div_2)3431   TEST(F32_PRELU__WASMSIMD_MINMAX_2X4, rows_div_2) {
3432     for (size_t rows = 4; rows <= 8; rows += 2) {
3433       for (size_t channels = 1; channels <= 20; channels += 3) {
3434         PReLUMicrokernelTester()
3435           .rows(rows)
3436           .channels(channels)
3437           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4);
3438       }
3439     }
3440   }
3441 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X4,rows_gt_2)3442   TEST(F32_PRELU__WASMSIMD_MINMAX_2X4, rows_gt_2) {
3443     for (size_t rows = 3; rows < 4; rows++) {
3444       for (size_t channels = 1; channels <= 20; channels += 3) {
3445         PReLUMicrokernelTester()
3446           .rows(rows)
3447           .channels(channels)
3448           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4);
3449       }
3450     }
3451   }
3452 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X4,input_stride)3453   TEST(F32_PRELU__WASMSIMD_MINMAX_2X4, input_stride) {
3454     for (size_t rows = 1; rows <= 6; rows += 1) {
3455       for (size_t channels = 1; channels <= 20; channels += 3) {
3456         PReLUMicrokernelTester()
3457           .rows(rows)
3458           .channels(channels)
3459           .input_stride(23)
3460           .iterations(1)
3461           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4);
3462       }
3463     }
3464   }
3465 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X4,output_stride)3466   TEST(F32_PRELU__WASMSIMD_MINMAX_2X4, output_stride) {
3467     for (size_t rows = 1; rows <= 6; rows += 1) {
3468       for (size_t channels = 1; channels <= 20; channels += 3) {
3469         PReLUMicrokernelTester()
3470           .rows(rows)
3471           .channels(channels)
3472           .output_stride(23)
3473           .iterations(1)
3474           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4);
3475       }
3476     }
3477   }
3478 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X4,inplace)3479   TEST(F32_PRELU__WASMSIMD_MINMAX_2X4, inplace) {
3480     for (size_t rows = 1; rows <= 6; rows += 1) {
3481       for (size_t channels = 1; channels <= 20; channels += 3) {
3482         PReLUMicrokernelTester()
3483           .rows(rows)
3484           .channels(channels)
3485           .inplace(true)
3486           .iterations(1)
3487           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4);
3488       }
3489     }
3490   }
3491 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3492 
3493 
3494 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PRELU__WASMSIMD_MINMAX_2X8,channels_eq_8)3495   TEST(F32_PRELU__WASMSIMD_MINMAX_2X8, channels_eq_8) {
3496     PReLUMicrokernelTester()
3497       .rows(2)
3498       .channels(8)
3499       .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8);
3500   }
3501 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X8,channels_div_8)3502   TEST(F32_PRELU__WASMSIMD_MINMAX_2X8, channels_div_8) {
3503     for (size_t channels = 16; channels < 80; channels += 8) {
3504       PReLUMicrokernelTester()
3505         .rows(2)
3506         .channels(channels)
3507         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8);
3508     }
3509   }
3510 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X8,channels_lt_8)3511   TEST(F32_PRELU__WASMSIMD_MINMAX_2X8, channels_lt_8) {
3512     for (size_t channels = 1; channels < 8; channels++) {
3513       PReLUMicrokernelTester()
3514         .rows(2)
3515         .channels(channels)
3516         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8);
3517     }
3518   }
3519 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X8,channels_gt_8)3520   TEST(F32_PRELU__WASMSIMD_MINMAX_2X8, channels_gt_8) {
3521     for (size_t channels = 9; channels < 16; channels++) {
3522       PReLUMicrokernelTester()
3523         .rows(2)
3524         .channels(channels)
3525         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8);
3526     }
3527   }
3528 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X8,rows_lt_2)3529   TEST(F32_PRELU__WASMSIMD_MINMAX_2X8, rows_lt_2) {
3530     for (size_t rows = 1; rows < 2; rows++) {
3531       for (size_t channels = 1; channels <= 40; channels += 7) {
3532         PReLUMicrokernelTester()
3533           .rows(rows)
3534           .channels(channels)
3535           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8);
3536       }
3537     }
3538   }
3539 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X8,rows_div_2)3540   TEST(F32_PRELU__WASMSIMD_MINMAX_2X8, rows_div_2) {
3541     for (size_t rows = 4; rows <= 8; rows += 2) {
3542       for (size_t channels = 1; channels <= 40; channels += 7) {
3543         PReLUMicrokernelTester()
3544           .rows(rows)
3545           .channels(channels)
3546           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8);
3547       }
3548     }
3549   }
3550 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X8,rows_gt_2)3551   TEST(F32_PRELU__WASMSIMD_MINMAX_2X8, rows_gt_2) {
3552     for (size_t rows = 3; rows < 4; rows++) {
3553       for (size_t channels = 1; channels <= 40; channels += 7) {
3554         PReLUMicrokernelTester()
3555           .rows(rows)
3556           .channels(channels)
3557           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8);
3558       }
3559     }
3560   }
3561 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X8,input_stride)3562   TEST(F32_PRELU__WASMSIMD_MINMAX_2X8, input_stride) {
3563     for (size_t rows = 1; rows <= 6; rows += 1) {
3564       for (size_t channels = 1; channels <= 40; channels += 7) {
3565         PReLUMicrokernelTester()
3566           .rows(rows)
3567           .channels(channels)
3568           .input_stride(43)
3569           .iterations(1)
3570           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8);
3571       }
3572     }
3573   }
3574 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X8,output_stride)3575   TEST(F32_PRELU__WASMSIMD_MINMAX_2X8, output_stride) {
3576     for (size_t rows = 1; rows <= 6; rows += 1) {
3577       for (size_t channels = 1; channels <= 40; channels += 7) {
3578         PReLUMicrokernelTester()
3579           .rows(rows)
3580           .channels(channels)
3581           .output_stride(43)
3582           .iterations(1)
3583           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8);
3584       }
3585     }
3586   }
3587 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X8,inplace)3588   TEST(F32_PRELU__WASMSIMD_MINMAX_2X8, inplace) {
3589     for (size_t rows = 1; rows <= 6; rows += 1) {
3590       for (size_t channels = 1; channels <= 40; channels += 7) {
3591         PReLUMicrokernelTester()
3592           .rows(rows)
3593           .channels(channels)
3594           .inplace(true)
3595           .iterations(1)
3596           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8);
3597       }
3598     }
3599   }
3600 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3601 
3602 
3603 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PRELU__WASMSIMD_MINMAX_2X16,channels_eq_16)3604   TEST(F32_PRELU__WASMSIMD_MINMAX_2X16, channels_eq_16) {
3605     PReLUMicrokernelTester()
3606       .rows(2)
3607       .channels(16)
3608       .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16);
3609   }
3610 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X16,channels_div_16)3611   TEST(F32_PRELU__WASMSIMD_MINMAX_2X16, channels_div_16) {
3612     for (size_t channels = 32; channels < 160; channels += 16) {
3613       PReLUMicrokernelTester()
3614         .rows(2)
3615         .channels(channels)
3616         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16);
3617     }
3618   }
3619 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X16,channels_lt_16)3620   TEST(F32_PRELU__WASMSIMD_MINMAX_2X16, channels_lt_16) {
3621     for (size_t channels = 1; channels < 16; channels++) {
3622       PReLUMicrokernelTester()
3623         .rows(2)
3624         .channels(channels)
3625         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16);
3626     }
3627   }
3628 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X16,channels_gt_16)3629   TEST(F32_PRELU__WASMSIMD_MINMAX_2X16, channels_gt_16) {
3630     for (size_t channels = 17; channels < 32; channels++) {
3631       PReLUMicrokernelTester()
3632         .rows(2)
3633         .channels(channels)
3634         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16);
3635     }
3636   }
3637 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X16,rows_lt_2)3638   TEST(F32_PRELU__WASMSIMD_MINMAX_2X16, rows_lt_2) {
3639     for (size_t rows = 1; rows < 2; rows++) {
3640       for (size_t channels = 1; channels <= 80; channels += 15) {
3641         PReLUMicrokernelTester()
3642           .rows(rows)
3643           .channels(channels)
3644           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16);
3645       }
3646     }
3647   }
3648 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X16,rows_div_2)3649   TEST(F32_PRELU__WASMSIMD_MINMAX_2X16, rows_div_2) {
3650     for (size_t rows = 4; rows <= 8; rows += 2) {
3651       for (size_t channels = 1; channels <= 80; channels += 15) {
3652         PReLUMicrokernelTester()
3653           .rows(rows)
3654           .channels(channels)
3655           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16);
3656       }
3657     }
3658   }
3659 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X16,rows_gt_2)3660   TEST(F32_PRELU__WASMSIMD_MINMAX_2X16, rows_gt_2) {
3661     for (size_t rows = 3; rows < 4; rows++) {
3662       for (size_t channels = 1; channels <= 80; channels += 15) {
3663         PReLUMicrokernelTester()
3664           .rows(rows)
3665           .channels(channels)
3666           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16);
3667       }
3668     }
3669   }
3670 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X16,input_stride)3671   TEST(F32_PRELU__WASMSIMD_MINMAX_2X16, input_stride) {
3672     for (size_t rows = 1; rows <= 6; rows += 1) {
3673       for (size_t channels = 1; channels <= 80; channels += 15) {
3674         PReLUMicrokernelTester()
3675           .rows(rows)
3676           .channels(channels)
3677           .input_stride(83)
3678           .iterations(1)
3679           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16);
3680       }
3681     }
3682   }
3683 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X16,output_stride)3684   TEST(F32_PRELU__WASMSIMD_MINMAX_2X16, output_stride) {
3685     for (size_t rows = 1; rows <= 6; rows += 1) {
3686       for (size_t channels = 1; channels <= 80; channels += 15) {
3687         PReLUMicrokernelTester()
3688           .rows(rows)
3689           .channels(channels)
3690           .output_stride(83)
3691           .iterations(1)
3692           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16);
3693       }
3694     }
3695   }
3696 
TEST(F32_PRELU__WASMSIMD_MINMAX_2X16,inplace)3697   TEST(F32_PRELU__WASMSIMD_MINMAX_2X16, inplace) {
3698     for (size_t rows = 1; rows <= 6; rows += 1) {
3699       for (size_t channels = 1; channels <= 80; channels += 15) {
3700         PReLUMicrokernelTester()
3701           .rows(rows)
3702           .channels(channels)
3703           .inplace(true)
3704           .iterations(1)
3705           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16);
3706       }
3707     }
3708   }
3709 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3710 
3711 
3712 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PRELU__WASMSIMD_MINMAX_4X4,channels_eq_4)3713   TEST(F32_PRELU__WASMSIMD_MINMAX_4X4, channels_eq_4) {
3714     PReLUMicrokernelTester()
3715       .rows(4)
3716       .channels(4)
3717       .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4);
3718   }
3719 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X4,channels_div_4)3720   TEST(F32_PRELU__WASMSIMD_MINMAX_4X4, channels_div_4) {
3721     for (size_t channels = 8; channels < 40; channels += 4) {
3722       PReLUMicrokernelTester()
3723         .rows(4)
3724         .channels(channels)
3725         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4);
3726     }
3727   }
3728 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X4,channels_lt_4)3729   TEST(F32_PRELU__WASMSIMD_MINMAX_4X4, channels_lt_4) {
3730     for (size_t channels = 1; channels < 4; channels++) {
3731       PReLUMicrokernelTester()
3732         .rows(4)
3733         .channels(channels)
3734         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4);
3735     }
3736   }
3737 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X4,channels_gt_4)3738   TEST(F32_PRELU__WASMSIMD_MINMAX_4X4, channels_gt_4) {
3739     for (size_t channels = 5; channels < 8; channels++) {
3740       PReLUMicrokernelTester()
3741         .rows(4)
3742         .channels(channels)
3743         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4);
3744     }
3745   }
3746 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X4,rows_lt_4)3747   TEST(F32_PRELU__WASMSIMD_MINMAX_4X4, rows_lt_4) {
3748     for (size_t rows = 1; rows < 4; rows++) {
3749       for (size_t channels = 1; channels <= 20; channels += 3) {
3750         PReLUMicrokernelTester()
3751           .rows(rows)
3752           .channels(channels)
3753           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4);
3754       }
3755     }
3756   }
3757 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X4,rows_div_4)3758   TEST(F32_PRELU__WASMSIMD_MINMAX_4X4, rows_div_4) {
3759     for (size_t rows = 8; rows <= 16; rows += 4) {
3760       for (size_t channels = 1; channels <= 20; channels += 3) {
3761         PReLUMicrokernelTester()
3762           .rows(rows)
3763           .channels(channels)
3764           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4);
3765       }
3766     }
3767   }
3768 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X4,rows_gt_4)3769   TEST(F32_PRELU__WASMSIMD_MINMAX_4X4, rows_gt_4) {
3770     for (size_t rows = 5; rows < 8; rows++) {
3771       for (size_t channels = 1; channels <= 20; channels += 3) {
3772         PReLUMicrokernelTester()
3773           .rows(rows)
3774           .channels(channels)
3775           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4);
3776       }
3777     }
3778   }
3779 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X4,input_stride)3780   TEST(F32_PRELU__WASMSIMD_MINMAX_4X4, input_stride) {
3781     for (size_t rows = 1; rows <= 12; rows += 3) {
3782       for (size_t channels = 1; channels <= 20; channels += 3) {
3783         PReLUMicrokernelTester()
3784           .rows(rows)
3785           .channels(channels)
3786           .input_stride(23)
3787           .iterations(1)
3788           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4);
3789       }
3790     }
3791   }
3792 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X4,output_stride)3793   TEST(F32_PRELU__WASMSIMD_MINMAX_4X4, output_stride) {
3794     for (size_t rows = 1; rows <= 12; rows += 3) {
3795       for (size_t channels = 1; channels <= 20; channels += 3) {
3796         PReLUMicrokernelTester()
3797           .rows(rows)
3798           .channels(channels)
3799           .output_stride(23)
3800           .iterations(1)
3801           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4);
3802       }
3803     }
3804   }
3805 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X4,inplace)3806   TEST(F32_PRELU__WASMSIMD_MINMAX_4X4, inplace) {
3807     for (size_t rows = 1; rows <= 12; rows += 3) {
3808       for (size_t channels = 1; channels <= 20; channels += 3) {
3809         PReLUMicrokernelTester()
3810           .rows(rows)
3811           .channels(channels)
3812           .inplace(true)
3813           .iterations(1)
3814           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4);
3815       }
3816     }
3817   }
3818 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3819 
3820 
3821 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PRELU__WASMSIMD_MINMAX_4X8,channels_eq_8)3822   TEST(F32_PRELU__WASMSIMD_MINMAX_4X8, channels_eq_8) {
3823     PReLUMicrokernelTester()
3824       .rows(4)
3825       .channels(8)
3826       .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8);
3827   }
3828 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X8,channels_div_8)3829   TEST(F32_PRELU__WASMSIMD_MINMAX_4X8, channels_div_8) {
3830     for (size_t channels = 16; channels < 80; channels += 8) {
3831       PReLUMicrokernelTester()
3832         .rows(4)
3833         .channels(channels)
3834         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8);
3835     }
3836   }
3837 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X8,channels_lt_8)3838   TEST(F32_PRELU__WASMSIMD_MINMAX_4X8, channels_lt_8) {
3839     for (size_t channels = 1; channels < 8; channels++) {
3840       PReLUMicrokernelTester()
3841         .rows(4)
3842         .channels(channels)
3843         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8);
3844     }
3845   }
3846 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X8,channels_gt_8)3847   TEST(F32_PRELU__WASMSIMD_MINMAX_4X8, channels_gt_8) {
3848     for (size_t channels = 9; channels < 16; channels++) {
3849       PReLUMicrokernelTester()
3850         .rows(4)
3851         .channels(channels)
3852         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8);
3853     }
3854   }
3855 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X8,rows_lt_4)3856   TEST(F32_PRELU__WASMSIMD_MINMAX_4X8, rows_lt_4) {
3857     for (size_t rows = 1; rows < 4; rows++) {
3858       for (size_t channels = 1; channels <= 40; channels += 7) {
3859         PReLUMicrokernelTester()
3860           .rows(rows)
3861           .channels(channels)
3862           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8);
3863       }
3864     }
3865   }
3866 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X8,rows_div_4)3867   TEST(F32_PRELU__WASMSIMD_MINMAX_4X8, rows_div_4) {
3868     for (size_t rows = 8; rows <= 16; rows += 4) {
3869       for (size_t channels = 1; channels <= 40; channels += 7) {
3870         PReLUMicrokernelTester()
3871           .rows(rows)
3872           .channels(channels)
3873           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8);
3874       }
3875     }
3876   }
3877 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X8,rows_gt_4)3878   TEST(F32_PRELU__WASMSIMD_MINMAX_4X8, rows_gt_4) {
3879     for (size_t rows = 5; rows < 8; rows++) {
3880       for (size_t channels = 1; channels <= 40; channels += 7) {
3881         PReLUMicrokernelTester()
3882           .rows(rows)
3883           .channels(channels)
3884           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8);
3885       }
3886     }
3887   }
3888 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X8,input_stride)3889   TEST(F32_PRELU__WASMSIMD_MINMAX_4X8, input_stride) {
3890     for (size_t rows = 1; rows <= 12; rows += 3) {
3891       for (size_t channels = 1; channels <= 40; channels += 7) {
3892         PReLUMicrokernelTester()
3893           .rows(rows)
3894           .channels(channels)
3895           .input_stride(43)
3896           .iterations(1)
3897           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8);
3898       }
3899     }
3900   }
3901 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X8,output_stride)3902   TEST(F32_PRELU__WASMSIMD_MINMAX_4X8, output_stride) {
3903     for (size_t rows = 1; rows <= 12; rows += 3) {
3904       for (size_t channels = 1; channels <= 40; channels += 7) {
3905         PReLUMicrokernelTester()
3906           .rows(rows)
3907           .channels(channels)
3908           .output_stride(43)
3909           .iterations(1)
3910           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8);
3911       }
3912     }
3913   }
3914 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X8,inplace)3915   TEST(F32_PRELU__WASMSIMD_MINMAX_4X8, inplace) {
3916     for (size_t rows = 1; rows <= 12; rows += 3) {
3917       for (size_t channels = 1; channels <= 40; channels += 7) {
3918         PReLUMicrokernelTester()
3919           .rows(rows)
3920           .channels(channels)
3921           .inplace(true)
3922           .iterations(1)
3923           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8);
3924       }
3925     }
3926   }
3927 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3928 
3929 
3930 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PRELU__WASMSIMD_MINMAX_4X16,channels_eq_16)3931   TEST(F32_PRELU__WASMSIMD_MINMAX_4X16, channels_eq_16) {
3932     PReLUMicrokernelTester()
3933       .rows(4)
3934       .channels(16)
3935       .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16);
3936   }
3937 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X16,channels_div_16)3938   TEST(F32_PRELU__WASMSIMD_MINMAX_4X16, channels_div_16) {
3939     for (size_t channels = 32; channels < 160; channels += 16) {
3940       PReLUMicrokernelTester()
3941         .rows(4)
3942         .channels(channels)
3943         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16);
3944     }
3945   }
3946 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X16,channels_lt_16)3947   TEST(F32_PRELU__WASMSIMD_MINMAX_4X16, channels_lt_16) {
3948     for (size_t channels = 1; channels < 16; channels++) {
3949       PReLUMicrokernelTester()
3950         .rows(4)
3951         .channels(channels)
3952         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16);
3953     }
3954   }
3955 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X16,channels_gt_16)3956   TEST(F32_PRELU__WASMSIMD_MINMAX_4X16, channels_gt_16) {
3957     for (size_t channels = 17; channels < 32; channels++) {
3958       PReLUMicrokernelTester()
3959         .rows(4)
3960         .channels(channels)
3961         .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16);
3962     }
3963   }
3964 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X16,rows_lt_4)3965   TEST(F32_PRELU__WASMSIMD_MINMAX_4X16, rows_lt_4) {
3966     for (size_t rows = 1; rows < 4; rows++) {
3967       for (size_t channels = 1; channels <= 80; channels += 15) {
3968         PReLUMicrokernelTester()
3969           .rows(rows)
3970           .channels(channels)
3971           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16);
3972       }
3973     }
3974   }
3975 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X16,rows_div_4)3976   TEST(F32_PRELU__WASMSIMD_MINMAX_4X16, rows_div_4) {
3977     for (size_t rows = 8; rows <= 16; rows += 4) {
3978       for (size_t channels = 1; channels <= 80; channels += 15) {
3979         PReLUMicrokernelTester()
3980           .rows(rows)
3981           .channels(channels)
3982           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16);
3983       }
3984     }
3985   }
3986 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X16,rows_gt_4)3987   TEST(F32_PRELU__WASMSIMD_MINMAX_4X16, rows_gt_4) {
3988     for (size_t rows = 5; rows < 8; rows++) {
3989       for (size_t channels = 1; channels <= 80; channels += 15) {
3990         PReLUMicrokernelTester()
3991           .rows(rows)
3992           .channels(channels)
3993           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16);
3994       }
3995     }
3996   }
3997 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X16,input_stride)3998   TEST(F32_PRELU__WASMSIMD_MINMAX_4X16, input_stride) {
3999     for (size_t rows = 1; rows <= 12; rows += 3) {
4000       for (size_t channels = 1; channels <= 80; channels += 15) {
4001         PReLUMicrokernelTester()
4002           .rows(rows)
4003           .channels(channels)
4004           .input_stride(83)
4005           .iterations(1)
4006           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16);
4007       }
4008     }
4009   }
4010 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X16,output_stride)4011   TEST(F32_PRELU__WASMSIMD_MINMAX_4X16, output_stride) {
4012     for (size_t rows = 1; rows <= 12; rows += 3) {
4013       for (size_t channels = 1; channels <= 80; channels += 15) {
4014         PReLUMicrokernelTester()
4015           .rows(rows)
4016           .channels(channels)
4017           .output_stride(83)
4018           .iterations(1)
4019           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16);
4020       }
4021     }
4022   }
4023 
TEST(F32_PRELU__WASMSIMD_MINMAX_4X16,inplace)4024   TEST(F32_PRELU__WASMSIMD_MINMAX_4X16, inplace) {
4025     for (size_t rows = 1; rows <= 12; rows += 3) {
4026       for (size_t channels = 1; channels <= 80; channels += 15) {
4027         PReLUMicrokernelTester()
4028           .rows(rows)
4029           .channels(channels)
4030           .inplace(true)
4031           .iterations(1)
4032           .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16);
4033       }
4034     }
4035   }
4036 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
4037 
4038 
4039 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PRELU__WASM_2X1,channels_eq_1)4040   TEST(F32_PRELU__WASM_2X1, channels_eq_1) {
4041     PReLUMicrokernelTester()
4042       .rows(2)
4043       .channels(1)
4044       .Test(xnn_f32_prelu_ukernel__wasm_2x1);
4045   }
4046 
TEST(F32_PRELU__WASM_2X1,channels_gt_1)4047   TEST(F32_PRELU__WASM_2X1, channels_gt_1) {
4048     for (size_t channels = 2; channels < 10; channels++) {
4049       PReLUMicrokernelTester()
4050         .rows(2)
4051         .channels(channels)
4052         .Test(xnn_f32_prelu_ukernel__wasm_2x1);
4053     }
4054   }
4055 
TEST(F32_PRELU__WASM_2X1,rows_lt_2)4056   TEST(F32_PRELU__WASM_2X1, rows_lt_2) {
4057     for (size_t rows = 1; rows < 2; rows++) {
4058       for (size_t channels = 1; channels <= 5; channels += 1) {
4059         PReLUMicrokernelTester()
4060           .rows(rows)
4061           .channels(channels)
4062           .Test(xnn_f32_prelu_ukernel__wasm_2x1);
4063       }
4064     }
4065   }
4066 
TEST(F32_PRELU__WASM_2X1,rows_div_2)4067   TEST(F32_PRELU__WASM_2X1, rows_div_2) {
4068     for (size_t rows = 4; rows <= 8; rows += 2) {
4069       for (size_t channels = 1; channels <= 5; channels += 1) {
4070         PReLUMicrokernelTester()
4071           .rows(rows)
4072           .channels(channels)
4073           .Test(xnn_f32_prelu_ukernel__wasm_2x1);
4074       }
4075     }
4076   }
4077 
TEST(F32_PRELU__WASM_2X1,rows_gt_2)4078   TEST(F32_PRELU__WASM_2X1, rows_gt_2) {
4079     for (size_t rows = 3; rows < 4; rows++) {
4080       for (size_t channels = 1; channels <= 5; channels += 1) {
4081         PReLUMicrokernelTester()
4082           .rows(rows)
4083           .channels(channels)
4084           .Test(xnn_f32_prelu_ukernel__wasm_2x1);
4085       }
4086     }
4087   }
4088 
TEST(F32_PRELU__WASM_2X1,input_stride)4089   TEST(F32_PRELU__WASM_2X1, input_stride) {
4090     for (size_t rows = 1; rows <= 6; rows += 1) {
4091       for (size_t channels = 1; channels <= 5; channels += 1) {
4092         PReLUMicrokernelTester()
4093           .rows(rows)
4094           .channels(channels)
4095           .input_stride(7)
4096           .iterations(1)
4097           .Test(xnn_f32_prelu_ukernel__wasm_2x1);
4098       }
4099     }
4100   }
4101 
TEST(F32_PRELU__WASM_2X1,output_stride)4102   TEST(F32_PRELU__WASM_2X1, output_stride) {
4103     for (size_t rows = 1; rows <= 6; rows += 1) {
4104       for (size_t channels = 1; channels <= 5; channels += 1) {
4105         PReLUMicrokernelTester()
4106           .rows(rows)
4107           .channels(channels)
4108           .output_stride(7)
4109           .iterations(1)
4110           .Test(xnn_f32_prelu_ukernel__wasm_2x1);
4111       }
4112     }
4113   }
4114 
TEST(F32_PRELU__WASM_2X1,inplace)4115   TEST(F32_PRELU__WASM_2X1, inplace) {
4116     for (size_t rows = 1; rows <= 6; rows += 1) {
4117       for (size_t channels = 1; channels <= 5; channels += 1) {
4118         PReLUMicrokernelTester()
4119           .rows(rows)
4120           .channels(channels)
4121           .inplace(true)
4122           .iterations(1)
4123           .Test(xnn_f32_prelu_ukernel__wasm_2x1);
4124       }
4125     }
4126   }
4127 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
4128 
4129 
4130 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_PRELU__WASM_2X4,channels_eq_4)4131   TEST(F32_PRELU__WASM_2X4, channels_eq_4) {
4132     PReLUMicrokernelTester()
4133       .rows(2)
4134       .channels(4)
4135       .Test(xnn_f32_prelu_ukernel__wasm_2x4);
4136   }
4137 
TEST(F32_PRELU__WASM_2X4,channels_div_4)4138   TEST(F32_PRELU__WASM_2X4, channels_div_4) {
4139     for (size_t channels = 8; channels < 40; channels += 4) {
4140       PReLUMicrokernelTester()
4141         .rows(2)
4142         .channels(channels)
4143         .Test(xnn_f32_prelu_ukernel__wasm_2x4);
4144     }
4145   }
4146 
TEST(F32_PRELU__WASM_2X4,channels_lt_4)4147   TEST(F32_PRELU__WASM_2X4, channels_lt_4) {
4148     for (size_t channels = 1; channels < 4; channels++) {
4149       PReLUMicrokernelTester()
4150         .rows(2)
4151         .channels(channels)
4152         .Test(xnn_f32_prelu_ukernel__wasm_2x4);
4153     }
4154   }
4155 
TEST(F32_PRELU__WASM_2X4,channels_gt_4)4156   TEST(F32_PRELU__WASM_2X4, channels_gt_4) {
4157     for (size_t channels = 5; channels < 8; channels++) {
4158       PReLUMicrokernelTester()
4159         .rows(2)
4160         .channels(channels)
4161         .Test(xnn_f32_prelu_ukernel__wasm_2x4);
4162     }
4163   }
4164 
TEST(F32_PRELU__WASM_2X4,rows_lt_2)4165   TEST(F32_PRELU__WASM_2X4, rows_lt_2) {
4166     for (size_t rows = 1; rows < 2; rows++) {
4167       for (size_t channels = 1; channels <= 20; channels += 3) {
4168         PReLUMicrokernelTester()
4169           .rows(rows)
4170           .channels(channels)
4171           .Test(xnn_f32_prelu_ukernel__wasm_2x4);
4172       }
4173     }
4174   }
4175 
TEST(F32_PRELU__WASM_2X4,rows_div_2)4176   TEST(F32_PRELU__WASM_2X4, rows_div_2) {
4177     for (size_t rows = 4; rows <= 8; rows += 2) {
4178       for (size_t channels = 1; channels <= 20; channels += 3) {
4179         PReLUMicrokernelTester()
4180           .rows(rows)
4181           .channels(channels)
4182           .Test(xnn_f32_prelu_ukernel__wasm_2x4);
4183       }
4184     }
4185   }
4186 
TEST(F32_PRELU__WASM_2X4,rows_gt_2)4187   TEST(F32_PRELU__WASM_2X4, rows_gt_2) {
4188     for (size_t rows = 3; rows < 4; rows++) {
4189       for (size_t channels = 1; channels <= 20; channels += 3) {
4190         PReLUMicrokernelTester()
4191           .rows(rows)
4192           .channels(channels)
4193           .Test(xnn_f32_prelu_ukernel__wasm_2x4);
4194       }
4195     }
4196   }
4197 
TEST(F32_PRELU__WASM_2X4,input_stride)4198   TEST(F32_PRELU__WASM_2X4, input_stride) {
4199     for (size_t rows = 1; rows <= 6; rows += 1) {
4200       for (size_t channels = 1; channels <= 20; channels += 3) {
4201         PReLUMicrokernelTester()
4202           .rows(rows)
4203           .channels(channels)
4204           .input_stride(23)
4205           .iterations(1)
4206           .Test(xnn_f32_prelu_ukernel__wasm_2x4);
4207       }
4208     }
4209   }
4210 
TEST(F32_PRELU__WASM_2X4,output_stride)4211   TEST(F32_PRELU__WASM_2X4, output_stride) {
4212     for (size_t rows = 1; rows <= 6; rows += 1) {
4213       for (size_t channels = 1; channels <= 20; channels += 3) {
4214         PReLUMicrokernelTester()
4215           .rows(rows)
4216           .channels(channels)
4217           .output_stride(23)
4218           .iterations(1)
4219           .Test(xnn_f32_prelu_ukernel__wasm_2x4);
4220       }
4221     }
4222   }
4223 
TEST(F32_PRELU__WASM_2X4,inplace)4224   TEST(F32_PRELU__WASM_2X4, inplace) {
4225     for (size_t rows = 1; rows <= 6; rows += 1) {
4226       for (size_t channels = 1; channels <= 20; channels += 3) {
4227         PReLUMicrokernelTester()
4228           .rows(rows)
4229           .channels(channels)
4230           .inplace(true)
4231           .iterations(1)
4232           .Test(xnn_f32_prelu_ukernel__wasm_2x4);
4233       }
4234     }
4235   }
4236 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
4237 
4238 
TEST(F32_PRELU__SCALAR_2X1,channels_eq_1)4239 TEST(F32_PRELU__SCALAR_2X1, channels_eq_1) {
4240   PReLUMicrokernelTester()
4241     .rows(2)
4242     .channels(1)
4243     .Test(xnn_f32_prelu_ukernel__scalar_2x1);
4244 }
4245 
TEST(F32_PRELU__SCALAR_2X1,channels_gt_1)4246 TEST(F32_PRELU__SCALAR_2X1, channels_gt_1) {
4247   for (size_t channels = 2; channels < 10; channels++) {
4248     PReLUMicrokernelTester()
4249       .rows(2)
4250       .channels(channels)
4251       .Test(xnn_f32_prelu_ukernel__scalar_2x1);
4252   }
4253 }
4254 
TEST(F32_PRELU__SCALAR_2X1,rows_lt_2)4255 TEST(F32_PRELU__SCALAR_2X1, rows_lt_2) {
4256   for (size_t rows = 1; rows < 2; rows++) {
4257     for (size_t channels = 1; channels <= 5; channels += 1) {
4258       PReLUMicrokernelTester()
4259         .rows(rows)
4260         .channels(channels)
4261         .Test(xnn_f32_prelu_ukernel__scalar_2x1);
4262     }
4263   }
4264 }
4265 
TEST(F32_PRELU__SCALAR_2X1,rows_div_2)4266 TEST(F32_PRELU__SCALAR_2X1, rows_div_2) {
4267   for (size_t rows = 4; rows <= 8; rows += 2) {
4268     for (size_t channels = 1; channels <= 5; channels += 1) {
4269       PReLUMicrokernelTester()
4270         .rows(rows)
4271         .channels(channels)
4272         .Test(xnn_f32_prelu_ukernel__scalar_2x1);
4273     }
4274   }
4275 }
4276 
TEST(F32_PRELU__SCALAR_2X1,rows_gt_2)4277 TEST(F32_PRELU__SCALAR_2X1, rows_gt_2) {
4278   for (size_t rows = 3; rows < 4; rows++) {
4279     for (size_t channels = 1; channels <= 5; channels += 1) {
4280       PReLUMicrokernelTester()
4281         .rows(rows)
4282         .channels(channels)
4283         .Test(xnn_f32_prelu_ukernel__scalar_2x1);
4284     }
4285   }
4286 }
4287 
TEST(F32_PRELU__SCALAR_2X1,input_stride)4288 TEST(F32_PRELU__SCALAR_2X1, input_stride) {
4289   for (size_t rows = 1; rows <= 6; rows += 1) {
4290     for (size_t channels = 1; channels <= 5; channels += 1) {
4291       PReLUMicrokernelTester()
4292         .rows(rows)
4293         .channels(channels)
4294         .input_stride(7)
4295         .iterations(1)
4296         .Test(xnn_f32_prelu_ukernel__scalar_2x1);
4297     }
4298   }
4299 }
4300 
TEST(F32_PRELU__SCALAR_2X1,output_stride)4301 TEST(F32_PRELU__SCALAR_2X1, output_stride) {
4302   for (size_t rows = 1; rows <= 6; rows += 1) {
4303     for (size_t channels = 1; channels <= 5; channels += 1) {
4304       PReLUMicrokernelTester()
4305         .rows(rows)
4306         .channels(channels)
4307         .output_stride(7)
4308         .iterations(1)
4309         .Test(xnn_f32_prelu_ukernel__scalar_2x1);
4310     }
4311   }
4312 }
4313 
TEST(F32_PRELU__SCALAR_2X1,inplace)4314 TEST(F32_PRELU__SCALAR_2X1, inplace) {
4315   for (size_t rows = 1; rows <= 6; rows += 1) {
4316     for (size_t channels = 1; channels <= 5; channels += 1) {
4317       PReLUMicrokernelTester()
4318         .rows(rows)
4319         .channels(channels)
4320         .inplace(true)
4321         .iterations(1)
4322         .Test(xnn_f32_prelu_ukernel__scalar_2x1);
4323     }
4324   }
4325 }
4326 
TEST(F32_PRELU__SCALAR_2X4,channels_eq_4)4327 TEST(F32_PRELU__SCALAR_2X4, channels_eq_4) {
4328   PReLUMicrokernelTester()
4329     .rows(2)
4330     .channels(4)
4331     .Test(xnn_f32_prelu_ukernel__scalar_2x4);
4332 }
4333 
TEST(F32_PRELU__SCALAR_2X4,channels_div_4)4334 TEST(F32_PRELU__SCALAR_2X4, channels_div_4) {
4335   for (size_t channels = 8; channels < 40; channels += 4) {
4336     PReLUMicrokernelTester()
4337       .rows(2)
4338       .channels(channels)
4339       .Test(xnn_f32_prelu_ukernel__scalar_2x4);
4340   }
4341 }
4342 
TEST(F32_PRELU__SCALAR_2X4,channels_lt_4)4343 TEST(F32_PRELU__SCALAR_2X4, channels_lt_4) {
4344   for (size_t channels = 1; channels < 4; channels++) {
4345     PReLUMicrokernelTester()
4346       .rows(2)
4347       .channels(channels)
4348       .Test(xnn_f32_prelu_ukernel__scalar_2x4);
4349   }
4350 }
4351 
TEST(F32_PRELU__SCALAR_2X4,channels_gt_4)4352 TEST(F32_PRELU__SCALAR_2X4, channels_gt_4) {
4353   for (size_t channels = 5; channels < 8; channels++) {
4354     PReLUMicrokernelTester()
4355       .rows(2)
4356       .channels(channels)
4357       .Test(xnn_f32_prelu_ukernel__scalar_2x4);
4358   }
4359 }
4360 
TEST(F32_PRELU__SCALAR_2X4,rows_lt_2)4361 TEST(F32_PRELU__SCALAR_2X4, rows_lt_2) {
4362   for (size_t rows = 1; rows < 2; rows++) {
4363     for (size_t channels = 1; channels <= 20; channels += 3) {
4364       PReLUMicrokernelTester()
4365         .rows(rows)
4366         .channels(channels)
4367         .Test(xnn_f32_prelu_ukernel__scalar_2x4);
4368     }
4369   }
4370 }
4371 
TEST(F32_PRELU__SCALAR_2X4,rows_div_2)4372 TEST(F32_PRELU__SCALAR_2X4, rows_div_2) {
4373   for (size_t rows = 4; rows <= 8; rows += 2) {
4374     for (size_t channels = 1; channels <= 20; channels += 3) {
4375       PReLUMicrokernelTester()
4376         .rows(rows)
4377         .channels(channels)
4378         .Test(xnn_f32_prelu_ukernel__scalar_2x4);
4379     }
4380   }
4381 }
4382 
TEST(F32_PRELU__SCALAR_2X4,rows_gt_2)4383 TEST(F32_PRELU__SCALAR_2X4, rows_gt_2) {
4384   for (size_t rows = 3; rows < 4; rows++) {
4385     for (size_t channels = 1; channels <= 20; channels += 3) {
4386       PReLUMicrokernelTester()
4387         .rows(rows)
4388         .channels(channels)
4389         .Test(xnn_f32_prelu_ukernel__scalar_2x4);
4390     }
4391   }
4392 }
4393 
TEST(F32_PRELU__SCALAR_2X4,input_stride)4394 TEST(F32_PRELU__SCALAR_2X4, input_stride) {
4395   for (size_t rows = 1; rows <= 6; rows += 1) {
4396     for (size_t channels = 1; channels <= 20; channels += 3) {
4397       PReLUMicrokernelTester()
4398         .rows(rows)
4399         .channels(channels)
4400         .input_stride(23)
4401         .iterations(1)
4402         .Test(xnn_f32_prelu_ukernel__scalar_2x4);
4403     }
4404   }
4405 }
4406 
TEST(F32_PRELU__SCALAR_2X4,output_stride)4407 TEST(F32_PRELU__SCALAR_2X4, output_stride) {
4408   for (size_t rows = 1; rows <= 6; rows += 1) {
4409     for (size_t channels = 1; channels <= 20; channels += 3) {
4410       PReLUMicrokernelTester()
4411         .rows(rows)
4412         .channels(channels)
4413         .output_stride(23)
4414         .iterations(1)
4415         .Test(xnn_f32_prelu_ukernel__scalar_2x4);
4416     }
4417   }
4418 }
4419 
TEST(F32_PRELU__SCALAR_2X4,inplace)4420 TEST(F32_PRELU__SCALAR_2X4, inplace) {
4421   for (size_t rows = 1; rows <= 6; rows += 1) {
4422     for (size_t channels = 1; channels <= 20; channels += 3) {
4423       PReLUMicrokernelTester()
4424         .rows(rows)
4425         .channels(channels)
4426         .inplace(true)
4427         .iterations(1)
4428         .Test(xnn_f32_prelu_ukernel__scalar_2x4);
4429     }
4430   }
4431 }