xref: /aosp_15_r20/external/XNNPACK/test/f16-prelu.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/f16-prelu.yaml
8 //   Generator: tools/generate-prelu-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/prelu.h>
17 #include "prelu-microkernel-tester.h"
18 
19 
20 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_PRELU__NEONFP16ARITH_2X8,channels_eq_8)21   TEST(F16_PRELU__NEONFP16ARITH_2X8, channels_eq_8) {
22     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
23     PReLUMicrokernelTester()
24       .rows(2)
25       .channels(8)
26       .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x8);
27   }
28 
TEST(F16_PRELU__NEONFP16ARITH_2X8,channels_div_8)29   TEST(F16_PRELU__NEONFP16ARITH_2X8, channels_div_8) {
30     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
31     for (size_t channels = 16; channels < 80; channels += 8) {
32       PReLUMicrokernelTester()
33         .rows(2)
34         .channels(channels)
35         .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x8);
36     }
37   }
38 
TEST(F16_PRELU__NEONFP16ARITH_2X8,channels_lt_8)39   TEST(F16_PRELU__NEONFP16ARITH_2X8, channels_lt_8) {
40     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
41     for (size_t channels = 1; channels < 8; channels++) {
42       PReLUMicrokernelTester()
43         .rows(2)
44         .channels(channels)
45         .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x8);
46     }
47   }
48 
TEST(F16_PRELU__NEONFP16ARITH_2X8,channels_gt_8)49   TEST(F16_PRELU__NEONFP16ARITH_2X8, channels_gt_8) {
50     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
51     for (size_t channels = 9; channels < 16; channels++) {
52       PReLUMicrokernelTester()
53         .rows(2)
54         .channels(channels)
55         .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x8);
56     }
57   }
58 
TEST(F16_PRELU__NEONFP16ARITH_2X8,rows_lt_2)59   TEST(F16_PRELU__NEONFP16ARITH_2X8, rows_lt_2) {
60     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
61     for (size_t rows = 1; rows < 2; rows++) {
62       for (size_t channels = 1; channels <= 40; channels += 7) {
63         PReLUMicrokernelTester()
64           .rows(rows)
65           .channels(channels)
66           .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x8);
67       }
68     }
69   }
70 
TEST(F16_PRELU__NEONFP16ARITH_2X8,rows_div_2)71   TEST(F16_PRELU__NEONFP16ARITH_2X8, rows_div_2) {
72     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
73     for (size_t rows = 4; rows <= 8; rows += 2) {
74       for (size_t channels = 1; channels <= 40; channels += 7) {
75         PReLUMicrokernelTester()
76           .rows(rows)
77           .channels(channels)
78           .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x8);
79       }
80     }
81   }
82 
TEST(F16_PRELU__NEONFP16ARITH_2X8,rows_gt_2)83   TEST(F16_PRELU__NEONFP16ARITH_2X8, rows_gt_2) {
84     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
85     for (size_t rows = 3; rows < 4; rows++) {
86       for (size_t channels = 1; channels <= 40; channels += 7) {
87         PReLUMicrokernelTester()
88           .rows(rows)
89           .channels(channels)
90           .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x8);
91       }
92     }
93   }
94 
TEST(F16_PRELU__NEONFP16ARITH_2X8,input_stride)95   TEST(F16_PRELU__NEONFP16ARITH_2X8, input_stride) {
96     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
97     for (size_t rows = 1; rows <= 6; rows += 1) {
98       for (size_t channels = 1; channels <= 40; channels += 7) {
99         PReLUMicrokernelTester()
100           .rows(rows)
101           .channels(channels)
102           .input_stride(43)
103           .iterations(1)
104           .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x8);
105       }
106     }
107   }
108 
TEST(F16_PRELU__NEONFP16ARITH_2X8,output_stride)109   TEST(F16_PRELU__NEONFP16ARITH_2X8, output_stride) {
110     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
111     for (size_t rows = 1; rows <= 6; rows += 1) {
112       for (size_t channels = 1; channels <= 40; channels += 7) {
113         PReLUMicrokernelTester()
114           .rows(rows)
115           .channels(channels)
116           .output_stride(43)
117           .iterations(1)
118           .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x8);
119       }
120     }
121   }
122 
TEST(F16_PRELU__NEONFP16ARITH_2X8,inplace)123   TEST(F16_PRELU__NEONFP16ARITH_2X8, inplace) {
124     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
125     for (size_t rows = 1; rows <= 6; rows += 1) {
126       for (size_t channels = 1; channels <= 40; channels += 7) {
127         PReLUMicrokernelTester()
128           .rows(rows)
129           .channels(channels)
130           .inplace(true)
131           .iterations(1)
132           .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x8);
133       }
134     }
135   }
136 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
137 
138 
139 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_PRELU__NEONFP16ARITH_2X16,channels_eq_16)140   TEST(F16_PRELU__NEONFP16ARITH_2X16, channels_eq_16) {
141     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
142     PReLUMicrokernelTester()
143       .rows(2)
144       .channels(16)
145       .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x16);
146   }
147 
TEST(F16_PRELU__NEONFP16ARITH_2X16,channels_div_16)148   TEST(F16_PRELU__NEONFP16ARITH_2X16, channels_div_16) {
149     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
150     for (size_t channels = 32; channels < 160; channels += 16) {
151       PReLUMicrokernelTester()
152         .rows(2)
153         .channels(channels)
154         .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x16);
155     }
156   }
157 
TEST(F16_PRELU__NEONFP16ARITH_2X16,channels_lt_16)158   TEST(F16_PRELU__NEONFP16ARITH_2X16, channels_lt_16) {
159     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
160     for (size_t channels = 1; channels < 16; channels++) {
161       PReLUMicrokernelTester()
162         .rows(2)
163         .channels(channels)
164         .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x16);
165     }
166   }
167 
TEST(F16_PRELU__NEONFP16ARITH_2X16,channels_gt_16)168   TEST(F16_PRELU__NEONFP16ARITH_2X16, channels_gt_16) {
169     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
170     for (size_t channels = 17; channels < 32; channels++) {
171       PReLUMicrokernelTester()
172         .rows(2)
173         .channels(channels)
174         .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x16);
175     }
176   }
177 
TEST(F16_PRELU__NEONFP16ARITH_2X16,rows_lt_2)178   TEST(F16_PRELU__NEONFP16ARITH_2X16, rows_lt_2) {
179     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
180     for (size_t rows = 1; rows < 2; rows++) {
181       for (size_t channels = 1; channels <= 80; channels += 15) {
182         PReLUMicrokernelTester()
183           .rows(rows)
184           .channels(channels)
185           .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x16);
186       }
187     }
188   }
189 
TEST(F16_PRELU__NEONFP16ARITH_2X16,rows_div_2)190   TEST(F16_PRELU__NEONFP16ARITH_2X16, rows_div_2) {
191     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
192     for (size_t rows = 4; rows <= 8; rows += 2) {
193       for (size_t channels = 1; channels <= 80; channels += 15) {
194         PReLUMicrokernelTester()
195           .rows(rows)
196           .channels(channels)
197           .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x16);
198       }
199     }
200   }
201 
TEST(F16_PRELU__NEONFP16ARITH_2X16,rows_gt_2)202   TEST(F16_PRELU__NEONFP16ARITH_2X16, rows_gt_2) {
203     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
204     for (size_t rows = 3; rows < 4; rows++) {
205       for (size_t channels = 1; channels <= 80; channels += 15) {
206         PReLUMicrokernelTester()
207           .rows(rows)
208           .channels(channels)
209           .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x16);
210       }
211     }
212   }
213 
TEST(F16_PRELU__NEONFP16ARITH_2X16,input_stride)214   TEST(F16_PRELU__NEONFP16ARITH_2X16, input_stride) {
215     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
216     for (size_t rows = 1; rows <= 6; rows += 1) {
217       for (size_t channels = 1; channels <= 80; channels += 15) {
218         PReLUMicrokernelTester()
219           .rows(rows)
220           .channels(channels)
221           .input_stride(83)
222           .iterations(1)
223           .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x16);
224       }
225     }
226   }
227 
TEST(F16_PRELU__NEONFP16ARITH_2X16,output_stride)228   TEST(F16_PRELU__NEONFP16ARITH_2X16, output_stride) {
229     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
230     for (size_t rows = 1; rows <= 6; rows += 1) {
231       for (size_t channels = 1; channels <= 80; channels += 15) {
232         PReLUMicrokernelTester()
233           .rows(rows)
234           .channels(channels)
235           .output_stride(83)
236           .iterations(1)
237           .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x16);
238       }
239     }
240   }
241 
TEST(F16_PRELU__NEONFP16ARITH_2X16,inplace)242   TEST(F16_PRELU__NEONFP16ARITH_2X16, inplace) {
243     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
244     for (size_t rows = 1; rows <= 6; rows += 1) {
245       for (size_t channels = 1; channels <= 80; channels += 15) {
246         PReLUMicrokernelTester()
247           .rows(rows)
248           .channels(channels)
249           .inplace(true)
250           .iterations(1)
251           .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x16);
252       }
253     }
254   }
255 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
256 
257 
258 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_PRELU__F16C_2X8,channels_eq_8)259   TEST(F16_PRELU__F16C_2X8, channels_eq_8) {
260     TEST_REQUIRES_X86_F16C;
261     PReLUMicrokernelTester()
262       .rows(2)
263       .channels(8)
264       .Test(xnn_f16_prelu_ukernel__f16c_2x8);
265   }
266 
TEST(F16_PRELU__F16C_2X8,channels_div_8)267   TEST(F16_PRELU__F16C_2X8, channels_div_8) {
268     TEST_REQUIRES_X86_F16C;
269     for (size_t channels = 16; channels < 80; channels += 8) {
270       PReLUMicrokernelTester()
271         .rows(2)
272         .channels(channels)
273         .Test(xnn_f16_prelu_ukernel__f16c_2x8);
274     }
275   }
276 
TEST(F16_PRELU__F16C_2X8,channels_lt_8)277   TEST(F16_PRELU__F16C_2X8, channels_lt_8) {
278     TEST_REQUIRES_X86_F16C;
279     for (size_t channels = 1; channels < 8; channels++) {
280       PReLUMicrokernelTester()
281         .rows(2)
282         .channels(channels)
283         .Test(xnn_f16_prelu_ukernel__f16c_2x8);
284     }
285   }
286 
TEST(F16_PRELU__F16C_2X8,channels_gt_8)287   TEST(F16_PRELU__F16C_2X8, channels_gt_8) {
288     TEST_REQUIRES_X86_F16C;
289     for (size_t channels = 9; channels < 16; channels++) {
290       PReLUMicrokernelTester()
291         .rows(2)
292         .channels(channels)
293         .Test(xnn_f16_prelu_ukernel__f16c_2x8);
294     }
295   }
296 
TEST(F16_PRELU__F16C_2X8,rows_lt_2)297   TEST(F16_PRELU__F16C_2X8, rows_lt_2) {
298     TEST_REQUIRES_X86_F16C;
299     for (size_t rows = 1; rows < 2; rows++) {
300       for (size_t channels = 1; channels <= 40; channels += 7) {
301         PReLUMicrokernelTester()
302           .rows(rows)
303           .channels(channels)
304           .Test(xnn_f16_prelu_ukernel__f16c_2x8);
305       }
306     }
307   }
308 
TEST(F16_PRELU__F16C_2X8,rows_div_2)309   TEST(F16_PRELU__F16C_2X8, rows_div_2) {
310     TEST_REQUIRES_X86_F16C;
311     for (size_t rows = 4; rows <= 8; rows += 2) {
312       for (size_t channels = 1; channels <= 40; channels += 7) {
313         PReLUMicrokernelTester()
314           .rows(rows)
315           .channels(channels)
316           .Test(xnn_f16_prelu_ukernel__f16c_2x8);
317       }
318     }
319   }
320 
TEST(F16_PRELU__F16C_2X8,rows_gt_2)321   TEST(F16_PRELU__F16C_2X8, rows_gt_2) {
322     TEST_REQUIRES_X86_F16C;
323     for (size_t rows = 3; rows < 4; rows++) {
324       for (size_t channels = 1; channels <= 40; channels += 7) {
325         PReLUMicrokernelTester()
326           .rows(rows)
327           .channels(channels)
328           .Test(xnn_f16_prelu_ukernel__f16c_2x8);
329       }
330     }
331   }
332 
TEST(F16_PRELU__F16C_2X8,input_stride)333   TEST(F16_PRELU__F16C_2X8, input_stride) {
334     TEST_REQUIRES_X86_F16C;
335     for (size_t rows = 1; rows <= 6; rows += 1) {
336       for (size_t channels = 1; channels <= 40; channels += 7) {
337         PReLUMicrokernelTester()
338           .rows(rows)
339           .channels(channels)
340           .input_stride(43)
341           .iterations(1)
342           .Test(xnn_f16_prelu_ukernel__f16c_2x8);
343       }
344     }
345   }
346 
TEST(F16_PRELU__F16C_2X8,output_stride)347   TEST(F16_PRELU__F16C_2X8, output_stride) {
348     TEST_REQUIRES_X86_F16C;
349     for (size_t rows = 1; rows <= 6; rows += 1) {
350       for (size_t channels = 1; channels <= 40; channels += 7) {
351         PReLUMicrokernelTester()
352           .rows(rows)
353           .channels(channels)
354           .output_stride(43)
355           .iterations(1)
356           .Test(xnn_f16_prelu_ukernel__f16c_2x8);
357       }
358     }
359   }
360 
TEST(F16_PRELU__F16C_2X8,inplace)361   TEST(F16_PRELU__F16C_2X8, inplace) {
362     TEST_REQUIRES_X86_F16C;
363     for (size_t rows = 1; rows <= 6; rows += 1) {
364       for (size_t channels = 1; channels <= 40; channels += 7) {
365         PReLUMicrokernelTester()
366           .rows(rows)
367           .channels(channels)
368           .inplace(true)
369           .iterations(1)
370           .Test(xnn_f16_prelu_ukernel__f16c_2x8);
371       }
372     }
373   }
374 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
375 
376 
377 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_PRELU__F16C_2X16,channels_eq_16)378   TEST(F16_PRELU__F16C_2X16, channels_eq_16) {
379     TEST_REQUIRES_X86_F16C;
380     PReLUMicrokernelTester()
381       .rows(2)
382       .channels(16)
383       .Test(xnn_f16_prelu_ukernel__f16c_2x16);
384   }
385 
TEST(F16_PRELU__F16C_2X16,channels_div_16)386   TEST(F16_PRELU__F16C_2X16, channels_div_16) {
387     TEST_REQUIRES_X86_F16C;
388     for (size_t channels = 32; channels < 160; channels += 16) {
389       PReLUMicrokernelTester()
390         .rows(2)
391         .channels(channels)
392         .Test(xnn_f16_prelu_ukernel__f16c_2x16);
393     }
394   }
395 
TEST(F16_PRELU__F16C_2X16,channels_lt_16)396   TEST(F16_PRELU__F16C_2X16, channels_lt_16) {
397     TEST_REQUIRES_X86_F16C;
398     for (size_t channels = 1; channels < 16; channels++) {
399       PReLUMicrokernelTester()
400         .rows(2)
401         .channels(channels)
402         .Test(xnn_f16_prelu_ukernel__f16c_2x16);
403     }
404   }
405 
TEST(F16_PRELU__F16C_2X16,channels_gt_16)406   TEST(F16_PRELU__F16C_2X16, channels_gt_16) {
407     TEST_REQUIRES_X86_F16C;
408     for (size_t channels = 17; channels < 32; channels++) {
409       PReLUMicrokernelTester()
410         .rows(2)
411         .channels(channels)
412         .Test(xnn_f16_prelu_ukernel__f16c_2x16);
413     }
414   }
415 
TEST(F16_PRELU__F16C_2X16,rows_lt_2)416   TEST(F16_PRELU__F16C_2X16, rows_lt_2) {
417     TEST_REQUIRES_X86_F16C;
418     for (size_t rows = 1; rows < 2; rows++) {
419       for (size_t channels = 1; channels <= 80; channels += 15) {
420         PReLUMicrokernelTester()
421           .rows(rows)
422           .channels(channels)
423           .Test(xnn_f16_prelu_ukernel__f16c_2x16);
424       }
425     }
426   }
427 
TEST(F16_PRELU__F16C_2X16,rows_div_2)428   TEST(F16_PRELU__F16C_2X16, rows_div_2) {
429     TEST_REQUIRES_X86_F16C;
430     for (size_t rows = 4; rows <= 8; rows += 2) {
431       for (size_t channels = 1; channels <= 80; channels += 15) {
432         PReLUMicrokernelTester()
433           .rows(rows)
434           .channels(channels)
435           .Test(xnn_f16_prelu_ukernel__f16c_2x16);
436       }
437     }
438   }
439 
TEST(F16_PRELU__F16C_2X16,rows_gt_2)440   TEST(F16_PRELU__F16C_2X16, rows_gt_2) {
441     TEST_REQUIRES_X86_F16C;
442     for (size_t rows = 3; rows < 4; rows++) {
443       for (size_t channels = 1; channels <= 80; channels += 15) {
444         PReLUMicrokernelTester()
445           .rows(rows)
446           .channels(channels)
447           .Test(xnn_f16_prelu_ukernel__f16c_2x16);
448       }
449     }
450   }
451 
TEST(F16_PRELU__F16C_2X16,input_stride)452   TEST(F16_PRELU__F16C_2X16, input_stride) {
453     TEST_REQUIRES_X86_F16C;
454     for (size_t rows = 1; rows <= 6; rows += 1) {
455       for (size_t channels = 1; channels <= 80; channels += 15) {
456         PReLUMicrokernelTester()
457           .rows(rows)
458           .channels(channels)
459           .input_stride(83)
460           .iterations(1)
461           .Test(xnn_f16_prelu_ukernel__f16c_2x16);
462       }
463     }
464   }
465 
TEST(F16_PRELU__F16C_2X16,output_stride)466   TEST(F16_PRELU__F16C_2X16, output_stride) {
467     TEST_REQUIRES_X86_F16C;
468     for (size_t rows = 1; rows <= 6; rows += 1) {
469       for (size_t channels = 1; channels <= 80; channels += 15) {
470         PReLUMicrokernelTester()
471           .rows(rows)
472           .channels(channels)
473           .output_stride(83)
474           .iterations(1)
475           .Test(xnn_f16_prelu_ukernel__f16c_2x16);
476       }
477     }
478   }
479 
TEST(F16_PRELU__F16C_2X16,inplace)480   TEST(F16_PRELU__F16C_2X16, inplace) {
481     TEST_REQUIRES_X86_F16C;
482     for (size_t rows = 1; rows <= 6; rows += 1) {
483       for (size_t channels = 1; channels <= 80; channels += 15) {
484         PReLUMicrokernelTester()
485           .rows(rows)
486           .channels(channels)
487           .inplace(true)
488           .iterations(1)
489           .Test(xnn_f16_prelu_ukernel__f16c_2x16);
490       }
491     }
492   }
493 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
494